diff options
author | Erich Eckner <git@eckner.net> | 2019-02-10 22:36:04 +0100 |
---|---|---|
committer | Erich Eckner <git@eckner.net> | 2019-02-10 22:36:04 +0100 |
commit | 314ad8d94c5ff33fd2eadb1027748550be0e7e4f (patch) | |
tree | b94d525ac5758380cbc8cbcc8bce8e5b68823697 /backup-statistics.in | |
parent | 080efe8cbf8e5bc69a17f8af8f5feffd01dd67fe (diff) | |
download | hardlinkedBackups-314ad8d94c5ff33fd2eadb1027748550be0e7e4f.tar.xz |
camelCase -> no-camel-case, backup-progress new
Diffstat (limited to 'backup-statistics.in')
-rw-r--r-- | backup-statistics.in | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/backup-statistics.in b/backup-statistics.in new file mode 100644 index 0000000..a44f41d --- /dev/null +++ b/backup-statistics.in @@ -0,0 +1,346 @@ +#!/bin/bash + +# backup-statistics version #VERSION# + +set -e + +[ -r "#ETCDIR#/backup.conf" ] && \ + . "#ETCDIR#/backup.conf" + +do_stage() +{ + case $1 in + 1) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'generate lists $filename -> $inode' + return 0 + fi + dest="${backups["${backupID}"]%% *}" + dest="${dest%/}" + while [ ! -d "${dest}" ] && [ ${maxWait} -gt 0 ] + do + sleep 1 + maxWait=$[${maxWait}-1] + done + + rm -f "${cacheDir}/${backupID}.inodes" + touch "${cacheDir}/${backupID}.inodes" + chmod go-rwx "${cacheDir}/${backupID}.inodes" + for dat in $(ls "${dest}") + do + echo "${dat}:" + find "${dest}/${dat}" -type f -links -64001 -printf '%i %D-%m-%U-%G %p\n' >> \ + "${cacheDir}/${backupID}.inodes" + done + ;; + 2) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'sort previous lists by $inode' + return 0 + fi + tmpDirA="$(mktemp -d)" + tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" + touch "${cacheDir}/${backupID}.inodes.sorted" + chmod go-rwx "${cacheDir}/${backupID}.inodes.sorted" + sort -T "${tmpDirA}" -T "${tmpDirB}" -u "${cacheDir}/${backupID}.inodes" > \ + "${cacheDir}/${backupID}.inodes.sorted" + rmdir "${tmpDirA}" "${tmpDirB}" + ;; + 3) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'generate lists $inode -> $count, $contentHash' + return 0 + fi + touch "${cacheDir}/${backupID}.content" + chmod go-rwx "${cacheDir}/${backupID}.content" + uniq -cm2 "${cacheDir}/${backupID}.inodes.sorted" | \ + parallel \ + sha512sum {=s/^ *\([[:digit:]]\+ \)\{2\}[0-9-]\+ //=} \| \ + sed '"s|^\([0-9a-f]\{128\}\) .*\$|\1'{=s/^ *\([[:digit:]]\+ [[:digit:]]\+\) \([0-9-]\+\) .*/-\\2 \\1/=}'|"' \ + \; > \ + "${cacheDir}/${backupID}.content" + ;; + 4) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'sort previous lists by $contentHash' + return 0 + fi + tmpDirA="$(mktemp -d)" + tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" + touch "${cacheDir}/${backupID}.content.sorted" + chmod go-rwx "${cacheDir}/${backupID}.content.sorted" + sort -T "${tmpDirA}" -T "${tmpDirB}" -k1,1 -k2nr,2 "${cacheDir}/${backupID}.content" > \ + "${cacheDir}/${backupID}.content.sorted" + rmdir "${tmpDirA}" "${tmpDirB}" + ;; + 5) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'generate sorted lists of groups of inodes with the same hashes' + return 0 + fi + index=0 + tmpDirA="$(mktemp -d)" + tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" + touch "${cacheDir}/${backupID}.duplicates" + chmod go-rwx "${cacheDir}/${backupID}.duplicates" + uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content.sorted" | \ + sed 's|^\(\S\+ \)\{2\}||' | \ + while read s + do + if [ -z "${s}" ] + then + index=$[${index}+1] + else + echo "${s#* } B ${index}" + fi + done | \ + sort -T "${tmpDirA}" -T "${tmpDirB}" > \ + "${cacheDir}/${backupID}.duplicates" + rmdir "${tmpDirA}" "${tmpDirB}" + ;; + 6) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'find files to inodes of previous lists' + return 0 + fi + tmpDirA="$(mktemp -d)" + tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")" + + unset block + unset lastBlock + unset firstInode + unset lastInode + + touch "${cacheDir}/${backupID}.duplicates.files" + chmod go-rwx "${cacheDir}/${backupID}.duplicates.files" + sed ' + s|^\(\S\+\) \S\+ |\1 F | + ' "${cacheDir}/${backupID}.inodes.sorted" | \ + sort -m -T "${tmpDirA}" -T "${tmpDirB}" -- \ + - "${cacheDir}/${backupID}.duplicates" | \ + while read -r inode type extra + do + if [ "${type}" == "B" ] + then + block="${extra}" + elif [ "${lastInode}" == "${inode}" ] && [ -n "${block}" ] + then + echo "${block} ${inode} ${extra}" + else + unset block + fi + lastInode="${inode}" + done | \ + sort -T "${tmpDirA}" -T "${tmpDirB}" -k1n,1 | \ + while read -r block inode extra + do + if [ "${lastBlock}" != "${block}" ] + then + firstInode="${inode}" + fi + if [ "${lastBlock}" != "${block}" ] || [ "${firstInode}" != "${inode}" ] + then + echo "${block} ${extra}" + fi + lastBlock="${block}" + done | \ + uniq -m1 --group=separate > \ + "${cacheDir}/${backupID}.duplicates.files" + rmdir "${tmpDirA}" "${tmpDirB}" + ;; + 7) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'relink files with different inodes and same hashes' + return 0 + fi + if [ ! -r "${cacheDir}/next.action" ] + then + cat "${cacheDir}/${backupID}.duplicates.files" + elif [ "$(head -n1 "${cacheDir}/next.action")" == "${backupID}" ] + then + startBlock="$(tail -n1 "${cacheDir}/next.action")" + sed " + :vor; + /^${startBlock} /bnach; + d; + bvor; + :nach; + n; + bnach + " "${cacheDir}/${backupID}.duplicates.files" + fi | \ + while read -r oBlock original + do + echo "${backupID}" > "${cacheDir}/next.action2" + echo "${oBlock}" >> "${cacheDir}/next.action2" + mv "${cacheDir}/next.action2" "${cacheDir}/next.action" + while read -r kBlock kopie + do + [ -z "${kopie}" ] && break + if [ "${kBlock}" != "${oBlock}" ] + then + >&2 echo "'${kBlock}' != '${oBlock}'" + >&2 echo "'${backupID}':" + >&2 echo "'${original}'" + >&2 echo "'${kopie}'" + exit 1 + fi + + if ${paranoid} + then + diff "${original}" "${kopie}" + fi + if [ $(stat -c'%h' "${original}") -ge 65000 ] + then + echo "rm \"${original}\"" + echo "ln \"${kopie}\" \"${original}\"" + if ! ${dummy} + then + rm "${original}" + ln "${kopie}" "${original}" + fi + else + echo "rm \"${kopie}\"" + echo "ln \"${original}\" \"${kopie}\"" + if ! ${dummy} + then + rm "${kopie}" + ln "${original}" "${kopie}" + fi + fi + done + done + if [ -r "${cacheDir}/next.action" ] && \ + [ "$(head -n1 "${cacheDir}/next.action")" == "${backupID}" ] + then + rm -f "${cacheDir}/next.action" "${cacheDir}/next.action2" + fi + ;; + esac +} + +usage() +{ + >&2 echo \ +'Usage: backup-statistics [OPTION] +Search and tidy duplicate and not-hardlinked files in the backups. + +With no options, tidy up all backups. THIS CAN BE VERY TIME CONSUMING. + +Mandatory arguments to long options are mandatory for short options too. + -d, --dummy only generate lists, do not modify backupfiles + -m, --max=maxNum stop execution after step maxNum + -p, --paranoid test for file differences before relinking (test _should_ be obsolete) + -s, --skip=skipNum skip first skipNum steps +#HELPTEXT# # + +the executed steps are:' + + for ((stage=1; stage<=#NUMSTAGES#; stage++)) + do + >&2 echo '' + >&2 echo " ${stage}. $(do_stage ${stage} '##DESCRIBE##')" + done + >&2 echo '' + [ -z "$1" ] && exit 1 + exit $1 +} + +eval set -- "$( + getopt -o dm:ps: \ + --long dummy \ + --long help \ + --long max: \ + --long paranoid \ + --long skip: \ + --long version \ + -n "$(basename "$0")" -- "$@" || \ + echo usage +)" + +dummy=false +maxNum=#NUMSTAGES# +paranoid=false +skipNum=0 + +while true; do + case "$1" in + -d|--dummy) + dummy=true + ;; + --help) + usage 0 + ;; + -m|--max) + shift + maxNum=$1 + ;; + -p|--paranoid) + paranoid=true + ;; + -s|--skip) + shift + skipNum=$1 + ;; + --version) + >&2 echo '#VERSION#' + exit 0 + ;; + --) + shift + [ $# -gt 0 ] && echo 'ERROR: Unknown parameter: '"$#" && usage + break + ;; + *) + >&2 echo 'That should not happen, '"$1"' unknown though ...' + exit -1 + ;; + esac + shift +done + +if [ ! -d "${cacheDir}" ] || [ -z "${cacheDir}" ] +then + >&2 "ERROR: Cache directory must exist, '${cacheDir}' does not! Closing." + exit 1 +fi + +( + echo -n 'Signature: ' + echo -n '.IsCacheDirectory' | \ + md5sum - | \ + cut -d ' ' -f 1 + echo '# This file is a cache directory tag created by '"$(basename "$0")"'.' + echo '# For information about cache directory tags, see:' + echo '# http://www.brynosaurus.com/cachedir/' +) > "${cacheDir}/CACHEDIR.TAG" +( + echo '+ .rsync-filter' + echo '- *' +) > "${cacheDir}/.rsync-filter" + +if [ ! "${skipNum}" -ge 0 ] || \ + [ ! "${skipNum}" -le #NUMSTAGES# ] || \ + [ ! "${maxNum}" -ge 0 ] || \ + [ ! "${maxNum}" -le #NUMSTAGES# ] +then + usage +fi + +for ((stage=${skipNum}+1; stage<=${maxNum}; stage++)) +do + echo "entering stage ${stage} ($(do_stage ${stage} '##DESCRIBE##')) ..." + for backupID in "${!backups[@]}" + do + echo "${backupID}:" + do_stage ${stage} "${backupID}" + done + echo "... stage ${stage} completed." +done |