From c9784fb76a27f0a2a4c0bfb303ab25da49ed8846 Mon Sep 17 00:00:00 2001 From: Erich Eckner Date: Tue, 25 Oct 2016 13:34:04 +0200 Subject: split stage 3 into two (generate; sort) in backupStatistics --- backupStatistics.in | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'backupStatistics.in') diff --git a/backupStatistics.in b/backupStatistics.in index 1f3c8f7..f910db4 100644 --- a/backupStatistics.in +++ b/backupStatistics.in @@ -51,25 +51,33 @@ do_stage() echo 'generate lists $inode -> $contentHash' return 0 fi - tmpDirA="$(mktemp -d)" - tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" uniq -m1 "${cacheDir}/${backupID}.inodes.sorted" | \ parallel \ sha512sum {=s/^[[:digit:]]\+ //=} \| \ sed "\"s|^\([0-9a-f]\{128\}\) .*\$|\1 "{=s/^\([[:digit:]]\+\) .*/\\1/=}"|\"" \ - \; | \ - sort -T "${tmpDirA}" -T "${tmpDirB}" > \ + \; > \ "${cacheDir}/${backupID}.content" - rmdir "${tmpDirA}" "${tmpDirB}" ;; 4) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'sort previous lists by $contentHash' + return 0 + fi + tmpDirA="$(mktemp -d)" + tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" + sort -T "${tmpDirA}" -T "${tmpDirB}" "${cacheDir}/${backupID}.content" > \ + "${cacheDir}/${backupID}.content.sorted" + rmdir "${tmpDirA}" "${tmpDirB}" + ;; + 5) if [ "$2" == '##DESCRIBE##' ] then echo 'find duplicate hashes' return 0 fi ( - uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content" + uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content.sorted" echo "" ) | \ sed 's|^\S\+ ||' | \ @@ -85,7 +93,7 @@ do_stage() sed 's| $||' > \ "${cacheDir}/${backupID}.duplicates" ;; - 5) + 6) if [ "$2" == '##DESCRIBE##' ] then echo 'remove inodes with duplicate hashes' -- cgit v1.2.3-70-g09d2