From 6967d1e15ec98e08e13b6c3ee74a536f50904325 Mon Sep 17 00:00:00 2001 From: Erich Eckner Date: Thu, 27 Oct 2016 11:02:12 +0200 Subject: backupStatistics now partitions inode-lists for faster search --- backupStatistics.in | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/backupStatistics.in b/backupStatistics.in index f910db4..595beb8 100644 --- a/backupStatistics.in +++ b/backupStatistics.in @@ -36,13 +36,21 @@ do_stage() 2) if [ "$2" == '##DESCRIBE##' ] then - echo 'sort previous lists by $inode' + echo 'sort and partition previous lists by $inode' return 0 fi tmpDirA="$(mktemp -d)" tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" - sort -T "${tmpDirA}" -T "${tmpDirB}" -u "${cacheDir}/${backupID}.inodes" > \ - "${cacheDir}/${backupID}.inodes.sorted" + rm -rf "${cacheDir}/${backupID}.inodes.sorted" + mkdir "${cacheDir}/${backupID}.inodes.sorted" + sort -T "${tmpDirA}" -T "${tmpDirB}" -u "${cacheDir}/${backupID}.inodes" | \ + while read -r line + do + part="${line:0:4}" + part="${part%% *}" + echo "${line}" >> \ + "${cacheDir}/${backupID}.inodes.sorted/part.${part}" + done rmdir "${tmpDirA}" "${tmpDirB}" ;; 3) @@ -51,7 +59,8 @@ do_stage() echo 'generate lists $inode -> $contentHash' return 0 fi - uniq -m1 "${cacheDir}/${backupID}.inodes.sorted" | \ + cat "${cacheDir}/${backupID}.inodes.sorted/"part.* | \ + uniq -m1 | \ parallel \ sha512sum {=s/^[[:digit:]]\+ //=} \| \ sed "\"s|^\([0-9a-f]\{128\}\) .*\$|\1 "{=s/^\([[:digit:]]\+\) .*/\\1/=}"|\"" \ @@ -119,8 +128,9 @@ do_stage() fi | \ while read line do + originalInode="${line%% *}" original="$( - grep -m1 "^${line%% *} " "${cacheDir}/${backupID}.inodes.sorted" | \ + grep -m1 "^${originalInode} " "${cacheDir}/${backupID}.inodes.sorted/part.${originalInode:0:4}" | \ sed 's|^\S\+ ||' )" for kopieInode in ${line#* } @@ -130,7 +140,7 @@ do_stage() OIFS="${IFS}" IFS="$(printf '\n\t')" for kopie in $( - grep "^${kopieInode} " "${cacheDir}/${backupID}.inodes.sorted" | \ + grep "^${kopieInode} " "${cacheDir}/${backupID}.inodes.sorted/part.${kopieInode:0:4}" | \ sed 's|^\S\+ ||' ) do -- cgit v1.2.3-54-g00ecf