summaryrefslogtreecommitdiff
path: root/backupStatistics.in
diff options
context:
space:
mode:
Diffstat (limited to 'backupStatistics.in')
-rw-r--r--backupStatistics.in205
1 files changed, 205 insertions, 0 deletions
diff --git a/backupStatistics.in b/backupStatistics.in
new file mode 100644
index 0000000..d5511bf
--- /dev/null
+++ b/backupStatistics.in
@@ -0,0 +1,205 @@
+#!/bin/bash
+
+# backupStatistics version #VERSION#
+
+. #ETCDIR#/backup.conf
+
+do_stage()
+{
+ case $1 in
+ 1)
+ if [ "$2" == '##DESCRIBE##' ]
+ then
+ echo 'generate lists $filename -> $inode'
+ return 0
+ fi
+ dest="${backups["${backupID}"]%% *}"
+ dest="${dest%/}"
+ while [ ! -d "${dest}" ] && [ ${maxWait} -gt 0 ]
+ do
+ sleep 1
+ maxWait=$[${maxWait}-1]
+ done
+
+ rm -f "${cacheDir}/${backupID}.inodes"
+ for dat in $(ls "${dest}")
+ do
+ echo "${dat}:"
+ find "${dest}/${dat}" -type f -exec \
+ stat -c'%i %n' {} \; >> \
+ "${cacheDir}/${backupID}.inodes"
+ done
+ ;;
+ 2)
+ if [ "$2" == '##DESCRIBE##' ]
+ then
+ echo 'sort previous lists by $inode'
+ return 0
+ fi
+ sort -u "${cacheDir}/${backupID}.inodes" > \
+ "${cacheDir}/${backupID}.inodes.sorted"
+ ;;
+ 3)
+ if [ "$2" == '##DESCRIBE##' ]
+ then
+ echo 'generate lists $inode -> $contentHash'
+ return 0
+ fi
+ uniq -m1 "${cacheDir}/${backupID}.inodes.sorted" | \
+ parallel \
+ sha512sum {=s/^[[:digit:]]\+ //=} \| \
+ sed "\"s|^\([0-9a-f]\{128\}\) .*\$|\1 "{=s/^\([[:digit:]]\+\) .*/\\1/=}"|\"" \
+ \; | \
+ sort > \
+ "${cacheDir}/${backupID}.content"
+ ;;
+ 4)
+ if [ "$2" == '##DESCRIBE##' ]
+ then
+ echo 'find duplicate hashes'
+ return 0
+ fi
+ uniq -m1 -D "${cacheDir}/${backupID}.content" |
+ sed '
+ :a;
+ $!N;
+ s@^\(\S\+ \)\(.*\)\n\1@\1\2 @;
+ ta;
+ P;
+ D
+ ' | \
+ sed 's|^\S\+ ||' > \
+ "${cacheDir}/${backupID}.duplicates"
+ ;;
+ 5)
+ if [ "$2" == '##DESCRIBE##' ]
+ then
+ echo 'remove inodes with duplicate hashes'
+ return 0
+ fi
+ while read line
+ do
+ original="$(
+ grep "^${line%% *} " "${cacheDir}/${backupID}.inodes.sorted" | \
+ sed 's|^\S\+ ||'
+ )"
+ for kopieInode in ${line#* }
+ do
+ kopie="$(
+ grep "^${kopieInode} " "${cacheDir}/${backupID}.inodes.sorted" | \
+ sed 's|^\S\+ ||'
+ )"
+ if ${dummy}
+ then
+ echo "rm \"${kopie}\""
+ echo "ln \"${original}\" \"${kopie}\""
+ else
+ exit 1
+ DO NOT EXECUTE YET
+ # rm "${kopie}"
+ # ln "${original}" "${kopie}"
+ fi
+ done
+ done < \
+ "${cacheDir}/${backupID}.duplicates"
+ ;;
+ esac
+}
+
+usage()
+{
+ >&2 echo 'Usage: backupStatistics [OPTION]'
+ >&2 echo 'Search and tidy duplicate and not-hardlinked files in the backups.'
+ >&2 echo ''
+ >&2 echo 'With no options, tidy up all backups. THIS CAN BE VERY TIME CONSUMING.'
+ >&2 echo ''
+ >&2 echo 'Mandatory arguments to long options are mandatory for short options too.'
+ >&2 echo ' -d | --dummy only generate lists, do not modify backupfiles'
+ >&2 echo ' -h | --help display this help and exit'
+ >&2 echo ' -m | --max=maxNum stop execution after step maxNum'
+ >&2 echo ' -s | --skip=skipNum skip first skipNum steps'
+ >&2 echo ' -V | --version display version and exit'
+ >&2 echo ''
+ >&2 echo 'the executed steps are:'
+ for ((stage=1; stage<=#NUMSTAGES#; stage++))
+ do
+ >&2 echo ''
+ >&2 echo " ${stage}. $(do_stage ${stage} '##DESCRIBE##')"
+ done
+ >&2 echo ''
+ [ -z "$1" ] && exit 1
+ exit $1
+}
+
+eval set -- "$(
+ getopt -o dhm:s:V \
+ --long dummy \
+ --long help \
+ --long max: \
+ --long skip: \
+ --long version \
+ -n "$(basename "$0")" -- "$@" || \
+ echo usage
+)"
+
+dummy=false
+maxNum=#NUMSTAGES#
+skipNum=0
+
+while true; do
+ case "$1" in
+ -d|--dummy)
+ dummy=true
+ ;;
+ -h|--help)
+ usage 0
+ ;;
+ -m|--max)
+ shift
+ maxNum=$1
+ ;;
+ -s|--skip)
+ shift
+ skipNum=$1
+ ;;
+ -V|--version)
+ >&2 echo '#VERSION#'
+ exit 0
+ ;;
+ --)
+ shift
+ [ $# -gt 0 ] && echo 'ERROR: Unknown parameter: '"$#" && usage
+ break
+ ;;
+ *)
+ >&2 echo 'That should not happen, '"$1"' unknown though ...'
+ exit -1
+ ;;
+ esac
+ shift
+done
+
+if [ ! -d "${cacheDir}" ] || [ -z "${cacheDir}" ]
+then
+ >&2 "ERROR: Cache directory must exist, '${cacheDir}' does not! Closing."
+ exit 1
+fi
+
+if [ ! "${skipNum}" -ge 0 ] || \
+ [ ! "${skipNum}" -le #NUMSTAGES# ] || \
+ [ ! "${maxNum}" -ge 0 ] || \
+ [ ! "${maxNum}" -le #NUMSTAGES# ]
+then
+ usage
+fi
+
+for ((stage=${skipNum}-1; stage<=${maxNum}; stage++))
+do
+ echo "entering stage ${stage} ..."
+ for backupID in "${!backups[@]}"
+ do
+ echo "${backupID}:"
+ do_stage ${stage} "${backupID}"
+ done
+ echo "... stage ${stage} completed."
+done