From 61cfb0d0b963abbde781ddf5af8e3866d6f1b730 Mon Sep 17 00:00:00 2001 From: Erich Eckner Date: Mon, 14 Nov 2016 13:03:22 +0100 Subject: backupStatistics sollte jetzt nur Inodes mit <=64000 Hardlinks beachten und die (davon) am haeufigsten hard gelinkte Inode als Original verwenden --- backupStatistics.in | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/backupStatistics.in b/backupStatistics.in index 8865af5..aa9b92b 100644 --- a/backupStatistics.in +++ b/backupStatistics.in @@ -28,8 +28,7 @@ do_stage() for dat in $(ls "${dest}") do echo "${dat}:" - find "${dest}/${dat}" -type f -exec \ - stat -c'%i %n' {} \; >> \ + find "${dest}/${dat}" -type f -links -64001 -printf '%i %p\n' >> \ "${cacheDir}/${backupID}.inodes" done ;; @@ -56,14 +55,14 @@ do_stage() 3) if [ "$2" == '##DESCRIBE##' ] then - echo 'generate lists $inode -> $contentHash' + echo 'generate lists $inode -> $count, $contentHash' return 0 fi cat "${cacheDir}/${backupID}.inodes.sorted/"part.* | \ - uniq -m1 | \ + uniq -cm1 | \ parallel \ - sha512sum {=s/^[[:digit:]]\+ //=} \| \ - sed "\"s|^\([0-9a-f]\{128\}\) .*\$|\1 "{=s/^\([[:digit:]]\+\) .*/\\1/=}"|\"" \ + sha512sum {=s/^ *\([[:digit:]]\+ \)\{2\}//=} \| \ + sed '"s|^\([0-9a-f]\{128\}\) .*\$|\1 '{=s/^ *\([[:digit:]]\+ [[:digit:]]\+\) .*/\\1/=}'|"' \ \; > \ "${cacheDir}/${backupID}.content" ;; @@ -75,7 +74,7 @@ do_stage() fi tmpDirA="$(mktemp -d)" tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" - sort -T "${tmpDirA}" -T "${tmpDirB}" "${cacheDir}/${backupID}.content" > \ + sort -T "${tmpDirA}" -T "${tmpDirB}" -k1,1 -k2nr,2 "${cacheDir}/${backupID}.content" > \ "${cacheDir}/${backupID}.content.sorted" rmdir "${tmpDirA}" "${tmpDirB}" ;; @@ -89,7 +88,7 @@ do_stage() uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content.sorted" echo "" ) | \ - sed 's|^\S\+ ||' | \ + sed 's|^\(\S\+ \)\{2\}||' | \ while read s do if [ -z "${s}" ] -- cgit v1.2.3-54-g00ecf