summaryrefslogtreecommitdiff
path: root/backupStatistics.in
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2019-02-10 22:36:04 +0100
committerErich Eckner <git@eckner.net>2019-02-10 22:36:04 +0100
commit314ad8d94c5ff33fd2eadb1027748550be0e7e4f (patch)
treeb94d525ac5758380cbc8cbcc8bce8e5b68823697 /backupStatistics.in
parent080efe8cbf8e5bc69a17f8af8f5feffd01dd67fe (diff)
downloadhardlinkedBackups-314ad8d94c5ff33fd2eadb1027748550be0e7e4f.tar.xz
camelCase -> no-camel-case, backup-progress new
Diffstat (limited to 'backupStatistics.in')
-rw-r--r--backupStatistics.in346
1 files changed, 0 insertions, 346 deletions
diff --git a/backupStatistics.in b/backupStatistics.in
deleted file mode 100644
index eb66e2c..0000000
--- a/backupStatistics.in
+++ /dev/null
@@ -1,346 +0,0 @@
-#!/bin/bash
-
-# backupStatistics version #VERSION#
-
-set -e
-
-[ -r "#ETCDIR#/backup.conf" ] && \
- . "#ETCDIR#/backup.conf"
-
-do_stage()
-{
- case $1 in
- 1)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'generate lists $filename -> $inode'
- return 0
- fi
- dest="${backups["${backupID}"]%% *}"
- dest="${dest%/}"
- while [ ! -d "${dest}" ] && [ ${maxWait} -gt 0 ]
- do
- sleep 1
- maxWait=$[${maxWait}-1]
- done
-
- rm -f "${cacheDir}/${backupID}.inodes"
- touch "${cacheDir}/${backupID}.inodes"
- chmod go-rwx "${cacheDir}/${backupID}.inodes"
- for dat in $(ls "${dest}")
- do
- echo "${dat}:"
- find "${dest}/${dat}" -type f -links -64001 -printf '%i %D-%m-%U-%G %p\n' >> \
- "${cacheDir}/${backupID}.inodes"
- done
- ;;
- 2)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'sort previous lists by $inode'
- return 0
- fi
- tmpDirA="$(mktemp -d)"
- tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
- touch "${cacheDir}/${backupID}.inodes.sorted"
- chmod go-rwx "${cacheDir}/${backupID}.inodes.sorted"
- sort -T "${tmpDirA}" -T "${tmpDirB}" -u "${cacheDir}/${backupID}.inodes" > \
- "${cacheDir}/${backupID}.inodes.sorted"
- rmdir "${tmpDirA}" "${tmpDirB}"
- ;;
- 3)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'generate lists $inode -> $count, $contentHash'
- return 0
- fi
- touch "${cacheDir}/${backupID}.content"
- chmod go-rwx "${cacheDir}/${backupID}.content"
- uniq -cm2 "${cacheDir}/${backupID}.inodes.sorted" | \
- parallel \
- sha512sum {=s/^ *\([[:digit:]]\+ \)\{2\}[0-9-]\+ //=} \| \
- sed '"s|^\([0-9a-f]\{128\}\) .*\$|\1'{=s/^ *\([[:digit:]]\+ [[:digit:]]\+\) \([0-9-]\+\) .*/-\\2 \\1/=}'|"' \
- \; > \
- "${cacheDir}/${backupID}.content"
- ;;
- 4)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'sort previous lists by $contentHash'
- return 0
- fi
- tmpDirA="$(mktemp -d)"
- tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
- touch "${cacheDir}/${backupID}.content.sorted"
- chmod go-rwx "${cacheDir}/${backupID}.content.sorted"
- sort -T "${tmpDirA}" -T "${tmpDirB}" -k1,1 -k2nr,2 "${cacheDir}/${backupID}.content" > \
- "${cacheDir}/${backupID}.content.sorted"
- rmdir "${tmpDirA}" "${tmpDirB}"
- ;;
- 5)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'generate sorted lists of groups of inodes with the same hashes'
- return 0
- fi
- index=0
- tmpDirA="$(mktemp -d)"
- tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
- touch "${cacheDir}/${backupID}.duplicates"
- chmod go-rwx "${cacheDir}/${backupID}.duplicates"
- uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content.sorted" | \
- sed 's|^\(\S\+ \)\{2\}||' | \
- while read s
- do
- if [ -z "${s}" ]
- then
- index=$[${index}+1]
- else
- echo "${s#* } B ${index}"
- fi
- done | \
- sort -T "${tmpDirA}" -T "${tmpDirB}" > \
- "${cacheDir}/${backupID}.duplicates"
- rmdir "${tmpDirA}" "${tmpDirB}"
- ;;
- 6)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'find files to inodes of previous lists'
- return 0
- fi
- tmpDirA="$(mktemp -d)"
- tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
-
- unset block
- unset lastBlock
- unset firstInode
- unset lastInode
-
- touch "${cacheDir}/${backupID}.duplicates.files"
- chmod go-rwx "${cacheDir}/${backupID}.duplicates.files"
- sed '
- s|^\(\S\+\) \S\+ |\1 F |
- ' "${cacheDir}/${backupID}.inodes.sorted" | \
- sort -m -T "${tmpDirA}" -T "${tmpDirB}" -- \
- - "${cacheDir}/${backupID}.duplicates" | \
- while read -r inode type extra
- do
- if [ "${type}" == "B" ]
- then
- block="${extra}"
- elif [ "${lastInode}" == "${inode}" ] && [ -n "${block}" ]
- then
- echo "${block} ${inode} ${extra}"
- else
- unset block
- fi
- lastInode="${inode}"
- done | \
- sort -T "${tmpDirA}" -T "${tmpDirB}" -k1n,1 | \
- while read -r block inode extra
- do
- if [ "${lastBlock}" != "${block}" ]
- then
- firstInode="${inode}"
- fi
- if [ "${lastBlock}" != "${block}" ] || [ "${firstInode}" != "${inode}" ]
- then
- echo "${block} ${extra}"
- fi
- lastBlock="${block}"
- done | \
- uniq -m1 --group=separate > \
- "${cacheDir}/${backupID}.duplicates.files"
- rmdir "${tmpDirA}" "${tmpDirB}"
- ;;
- 7)
- if [ "$2" == '##DESCRIBE##' ]
- then
- echo 'relink files with different inodes and same hashes'
- return 0
- fi
- if [ ! -r "${cacheDir}/next.action" ]
- then
- cat "${cacheDir}/${backupID}.duplicates.files"
- elif [ "$(head -n1 "${cacheDir}/next.action")" == "${backupID}" ]
- then
- startBlock="$(tail -n1 "${cacheDir}/next.action")"
- sed "
- :vor;
- /^${startBlock} /bnach;
- d;
- bvor;
- :nach;
- n;
- bnach
- " "${cacheDir}/${backupID}.duplicates.files"
- fi | \
- while read -r oBlock original
- do
- echo "${backupID}" > "${cacheDir}/next.action2"
- echo "${oBlock}" >> "${cacheDir}/next.action2"
- mv "${cacheDir}/next.action2" "${cacheDir}/next.action"
- while read -r kBlock kopie
- do
- [ -z "${kopie}" ] && break
- if [ "${kBlock}" != "${oBlock}" ]
- then
- >&2 echo "'${kBlock}' != '${oBlock}'"
- >&2 echo "'${backupID}':"
- >&2 echo "'${original}'"
- >&2 echo "'${kopie}'"
- exit 1
- fi
-
- if ${paranoid}
- then
- diff "${original}" "${kopie}"
- fi
- if [ $(stat -c'%h' "${original}") -ge 65000 ]
- then
- echo "rm \"${original}\""
- echo "ln \"${kopie}\" \"${original}\""
- if ! ${dummy}
- then
- rm "${original}"
- ln "${kopie}" "${original}"
- fi
- else
- echo "rm \"${kopie}\""
- echo "ln \"${original}\" \"${kopie}\""
- if ! ${dummy}
- then
- rm "${kopie}"
- ln "${original}" "${kopie}"
- fi
- fi
- done
- done
- if [ -r "${cacheDir}/next.action" ] && \
- [ "$(head -n1 "${cacheDir}/next.action")" == "${backupID}" ]
- then
- rm -f "${cacheDir}/next.action" "${cacheDir}/next.action2"
- fi
- ;;
- esac
-}
-
-usage()
-{
- >&2 echo \
-'Usage: backupStatistics [OPTION]
-Search and tidy duplicate and not-hardlinked files in the backups.
-
-With no options, tidy up all backups. THIS CAN BE VERY TIME CONSUMING.
-
-Mandatory arguments to long options are mandatory for short options too.
- -d, --dummy only generate lists, do not modify backupfiles
- -m, --max=maxNum stop execution after step maxNum
- -p, --paranoid test for file differences before relinking (test _should_ be obsolete)
- -s, --skip=skipNum skip first skipNum steps
-#HELPTEXT# #
-
-the executed steps are:'
-
- for ((stage=1; stage<=#NUMSTAGES#; stage++))
- do
- >&2 echo ''
- >&2 echo " ${stage}. $(do_stage ${stage} '##DESCRIBE##')"
- done
- >&2 echo ''
- [ -z "$1" ] && exit 1
- exit $1
-}
-
-eval set -- "$(
- getopt -o dm:ps: \
- --long dummy \
- --long help \
- --long max: \
- --long paranoid \
- --long skip: \
- --long version \
- -n "$(basename "$0")" -- "$@" || \
- echo usage
-)"
-
-dummy=false
-maxNum=#NUMSTAGES#
-paranoid=false
-skipNum=0
-
-while true; do
- case "$1" in
- -d|--dummy)
- dummy=true
- ;;
- --help)
- usage 0
- ;;
- -m|--max)
- shift
- maxNum=$1
- ;;
- -p|--paranoid)
- paranoid=true
- ;;
- -s|--skip)
- shift
- skipNum=$1
- ;;
- --version)
- >&2 echo '#VERSION#'
- exit 0
- ;;
- --)
- shift
- [ $# -gt 0 ] && echo 'ERROR: Unknown parameter: '"$#" && usage
- break
- ;;
- *)
- >&2 echo 'That should not happen, '"$1"' unknown though ...'
- exit -1
- ;;
- esac
- shift
-done
-
-if [ ! -d "${cacheDir}" ] || [ -z "${cacheDir}" ]
-then
- >&2 "ERROR: Cache directory must exist, '${cacheDir}' does not! Closing."
- exit 1
-fi
-
-(
- echo -n 'Signature: '
- echo -n '.IsCacheDirectory' | \
- md5sum - | \
- cut -d ' ' -f 1
- echo '# This file is a cache directory tag created by '"$(basename "$0")"'.'
- echo '# For information about cache directory tags, see:'
- echo '# http://www.brynosaurus.com/cachedir/'
-) > "${cacheDir}/CACHEDIR.TAG"
-(
- echo '+ .rsync-filter'
- echo '- *'
-) > "${cacheDir}/.rsync-filter"
-
-if [ ! "${skipNum}" -ge 0 ] || \
- [ ! "${skipNum}" -le #NUMSTAGES# ] || \
- [ ! "${maxNum}" -ge 0 ] || \
- [ ! "${maxNum}" -le #NUMSTAGES# ]
-then
- usage
-fi
-
-for ((stage=${skipNum}+1; stage<=${maxNum}; stage++))
-do
- echo "entering stage ${stage} ($(do_stage ${stage} '##DESCRIBE##')) ..."
- for backupID in "${!backups[@]}"
- do
- echo "${backupID}:"
- do_stage ${stage} "${backupID}"
- done
- echo "... stage ${stage} completed."
-done