diff options
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | backup.conf | 3 | ||||
-rw-r--r-- | backupStatistics.in | 205 | ||||
-rw-r--r-- | hardlinkedbackups.1.in | 25 | ||||
-rw-r--r-- | man.commons.in | 8 |
6 files changed, 256 insertions, 8 deletions
@@ -1,3 +1,6 @@ backup -hardlinkedbackups.1 +backupStatistics lastBackups +man.commons +*.common +*.1 @@ -26,22 +26,26 @@ MANDIR = /usr/share/man VERSION = 1.1 -all: hardlinkedbackups.1 backup lastBackups +all: backup backup.1 lastBackups lastBackups.1 backupStatistics backupStatistics.1 %: %.in - sed "s/#VERSION#/$(VERSION)/; s@#BINDIR#@$(BINDIR)@; s@#ETCDIR#@$(ETCDIR)@" $< > $@ + sed "s/#VERSION#/$(VERSION)/; s@#BINDIR#@$(BINDIR)@; s@#ETCDIR#@$(ETCDIR)@; s@#NUMSTAGES#@5@" $< > $@ + +%.common: man.commons + grep -v "^$(@:.common=)(" $< > $@ + +%.1: % %.common + help2man -o "$@" -N --include $<.common --no-discard-stderr "./$<" .PHONY: install dist clean install: all - install -D -m0755 -t $(DESTDIR)$(BINDIR) backup lastBackups - install -D -m0644 -t $(DESTDIR)$(MANDIR)/man1 hardlinkedbackups.1 - ln -s $(DESTDIR)$(MANDIR)/man1/hardlinkedbackups.1 $(DESTDIR)$(MANDIR)/man1/backups.1 - ln -s $(DESTDIR)$(MANDIR)/man1/hardlinkedbackups.1 $(DESTDIR)$(MANDIR)/man1/lastBackups.1 + install -D -m0755 -t $(DESTDIR)$(BINDIR) backup backupStatistics lastBackups + install -D -m0644 -t $(DESTDIR)$(MANDIR)/man1 backup.1 lastBackups.1 backupStatistics.1 install -D -m0644 -t $(DESTDIR)$(ETCDIR) backup.conf clean: - rm -f backup lastBackups hardlinkedbackups.1 + cat .gitignore | xargs rm -f dist: clean git status --porcelain 2> /dev/null | grep -q "\S" && (git add .; git commit -m"neue Version: $(VERSION)") || true diff --git a/backup.conf b/backup.conf index 0b65488..82fa2c0 100644 --- a/backup.conf +++ b/backup.conf @@ -22,3 +22,6 @@ outdatedLimit=$[2*24*60*60] # subdirectories which should be appended to the parent directory in the report recognSubdirRegex="home\|boot\|root\|erich" + +# directory for caching valuable information in backupStatistics +cacheDir="/path/to/cache/directory" diff --git a/backupStatistics.in b/backupStatistics.in new file mode 100644 index 0000000..d5511bf --- /dev/null +++ b/backupStatistics.in @@ -0,0 +1,205 @@ +#!/bin/bash + +# backupStatistics version #VERSION# + +. #ETCDIR#/backup.conf + +do_stage() +{ + case $1 in + 1) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'generate lists $filename -> $inode' + return 0 + fi + dest="${backups["${backupID}"]%% *}" + dest="${dest%/}" + while [ ! -d "${dest}" ] && [ ${maxWait} -gt 0 ] + do + sleep 1 + maxWait=$[${maxWait}-1] + done + + rm -f "${cacheDir}/${backupID}.inodes" + for dat in $(ls "${dest}") + do + echo "${dat}:" + find "${dest}/${dat}" -type f -exec \ + stat -c'%i %n' {} \; >> \ + "${cacheDir}/${backupID}.inodes" + done + ;; + 2) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'sort previous lists by $inode' + return 0 + fi + sort -u "${cacheDir}/${backupID}.inodes" > \ + "${cacheDir}/${backupID}.inodes.sorted" + ;; + 3) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'generate lists $inode -> $contentHash' + return 0 + fi + uniq -m1 "${cacheDir}/${backupID}.inodes.sorted" | \ + parallel \ + sha512sum {=s/^[[:digit:]]\+ //=} \| \ + sed "\"s|^\([0-9a-f]\{128\}\) .*\$|\1 "{=s/^\([[:digit:]]\+\) .*/\\1/=}"|\"" \ + \; | \ + sort > \ + "${cacheDir}/${backupID}.content" + ;; + 4) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'find duplicate hashes' + return 0 + fi + uniq -m1 -D "${cacheDir}/${backupID}.content" | + sed ' + :a; + $!N; + s@^\(\S\+ \)\(.*\)\n\1@\1\2 @; + ta; + P; + D + ' | \ + sed 's|^\S\+ ||' > \ + "${cacheDir}/${backupID}.duplicates" + ;; + 5) + if [ "$2" == '##DESCRIBE##' ] + then + echo 'remove inodes with duplicate hashes' + return 0 + fi + while read line + do + original="$( + grep "^${line%% *} " "${cacheDir}/${backupID}.inodes.sorted" | \ + sed 's|^\S\+ ||' + )" + for kopieInode in ${line#* } + do + kopie="$( + grep "^${kopieInode} " "${cacheDir}/${backupID}.inodes.sorted" | \ + sed 's|^\S\+ ||' + )" + if ${dummy} + then + echo "rm \"${kopie}\"" + echo "ln \"${original}\" \"${kopie}\"" + else + exit 1 + DO NOT EXECUTE YET + # rm "${kopie}" + # ln "${original}" "${kopie}" + fi + done + done < \ + "${cacheDir}/${backupID}.duplicates" + ;; + esac +} + +usage() +{ + >&2 echo 'Usage: backupStatistics [OPTION]' + >&2 echo 'Search and tidy duplicate and not-hardlinked files in the backups.' + >&2 echo '' + >&2 echo 'With no options, tidy up all backups. THIS CAN BE VERY TIME CONSUMING.' + >&2 echo '' + >&2 echo 'Mandatory arguments to long options are mandatory for short options too.' + >&2 echo ' -d | --dummy only generate lists, do not modify backupfiles' + >&2 echo ' -h | --help display this help and exit' + >&2 echo ' -m | --max=maxNum stop execution after step maxNum' + >&2 echo ' -s | --skip=skipNum skip first skipNum steps' + >&2 echo ' -V | --version display version and exit' + >&2 echo '' + >&2 echo 'the executed steps are:' + for ((stage=1; stage<=#NUMSTAGES#; stage++)) + do + >&2 echo '' + >&2 echo " ${stage}. $(do_stage ${stage} '##DESCRIBE##')" + done + >&2 echo '' + [ -z "$1" ] && exit 1 + exit $1 +} + +eval set -- "$( + getopt -o dhm:s:V \ + --long dummy \ + --long help \ + --long max: \ + --long skip: \ + --long version \ + -n "$(basename "$0")" -- "$@" || \ + echo usage +)" + +dummy=false +maxNum=#NUMSTAGES# +skipNum=0 + +while true; do + case "$1" in + -d|--dummy) + dummy=true + ;; + -h|--help) + usage 0 + ;; + -m|--max) + shift + maxNum=$1 + ;; + -s|--skip) + shift + skipNum=$1 + ;; + -V|--version) + >&2 echo '#VERSION#' + exit 0 + ;; + --) + shift + [ $# -gt 0 ] && echo 'ERROR: Unknown parameter: '"$#" && usage + break + ;; + *) + >&2 echo 'That should not happen, '"$1"' unknown though ...' + exit -1 + ;; + esac + shift +done + +if [ ! -d "${cacheDir}" ] || [ -z "${cacheDir}" ] +then + >&2 "ERROR: Cache directory must exist, '${cacheDir}' does not! Closing." + exit 1 +fi + +if [ ! "${skipNum}" -ge 0 ] || \ + [ ! "${skipNum}" -le #NUMSTAGES# ] || \ + [ ! "${maxNum}" -ge 0 ] || \ + [ ! "${maxNum}" -le #NUMSTAGES# ] +then + usage +fi + +for ((stage=${skipNum}-1; stage<=${maxNum}; stage++)) +do + echo "entering stage ${stage} ..." + for backupID in "${!backups[@]}" + do + echo "${backupID}:" + do_stage ${stage} "${backupID}" + done + echo "... stage ${stage} completed." +done diff --git a/hardlinkedbackups.1.in b/hardlinkedbackups.1.in index 914edbe..7902568 100644 --- a/hardlinkedbackups.1.in +++ b/hardlinkedbackups.1.in @@ -6,9 +6,12 @@ hardlinkedbackups \- hardlinked incremental backups via rsync (and possibly thro \fBbackup /tmp/pidFile /path/to/destination/ user@source:path [proxy_user@ssh_host]\fP .TP \fBlastBackups\fP +.TP +\fBbackupStatistics [options]\fP .SH DESCRIPTION \fBbackup\fP generates incremental backups (by hardlinking old unchanged files) via rsync and possibly a SOCKS\-tunnel. \fBlastBackups\fP shows date of backups and warns about outdated ones. +\fBbackupStatistics\fP exhaustively searches backups for duplicate but not hard-linked files. .SH USAGE .TP .B "backup /tmp/pidFile /path/to/destination/ user@source:path" @@ -24,6 +27,25 @@ This is designed to be called from a cron daemon for daily backups. .TP .B "lastBackups" reports about actuality of backups defined in \fB#ETCDIR#/backup.conf\fP +.TP +.B "backupStatistics" +Exhaustively search backups defined in \fB#ETCDIR#/backup.conf\fP for duplicate files which are not hard linked. +For detail see section \fBBACKUPSTATISTICS\fP. +.SH BACKUPSTATISTICS +execution happens in several stages: +.TP +1. +.TP +blabla +.TP +.B "-d | --dummy" +do not modify backed up files +.TP +.B "-m | --max \fImaxStage\fP" +stop execution after completing stage \fImaxStage\fP +.TP +.B "-s | --skip \fIskipStages\fP" +start execution right after stage \fIskipStages\fP .SH CONFIGURATION The configfile \fB#ETCDIR#/backup.conf\fP is a bash script, which defines the following variables: .TP @@ -41,6 +63,9 @@ time before backups are considered outdated [seconds] .TP .B "recognSubdirRegex" regular expression of subdirectories which should be appended to the parent directory in the report +.TP +.B "cacheDir" +directory for caching valuable information in \fBbackupStatistics\fP .SH FILES .TP .B "#BINDIR#/backup" diff --git a/man.commons.in b/man.commons.in new file mode 100644 index 0000000..45918c7 --- /dev/null +++ b/man.commons.in @@ -0,0 +1,8 @@ +[FILES] +#ETCDIR#/backup.conf +[AUTHOR] +Erich Eckner <opensource at eckner dot net>. +[SEE ALSO] +backup(1) +lastBackup(1) +backupStatistics(1) |