summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2017-01-11 13:33:48 +0100
committerErich Eckner <git@eckner.net>2017-01-11 13:33:48 +0100
commit8dea12d03d4c979111682d1f3645aac7f96cd366 (patch)
treeaa97159a4d1236c185c19889b805e1c1d4981d2a
parent549fd13ee5fcf010ee98e6f011366e941dc738b0 (diff)
downloadhardlinkedBackups-8dea12d03d4c979111682d1f3645aac7f96cd366.tar.xz
Ãsollte jetzt in O(n log(n)) laufen
-rw-r--r--Makefile2
-rw-r--r--backupStatistics.in174
2 files changed, 106 insertions, 70 deletions
diff --git a/Makefile b/Makefile
index 3090227..763626c 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ all: man.commons backup backup.1 lastBackups lastBackups.1 backupStatistics back
s/#VERSION#/$(VERSION)/; \
s@#BINDIR#@$(BINDIR)@; \
s@#ETCDIR#@$(ETCDIR)@; \
- s@#NUMSTAGES#@6@; \
+ s@#NUMSTAGES#@7@; \
s@#HELPTEXT#\(\s\+\)#@ --help \1display this help and exit\n --version\1display version and exit@; \
" $< > $@ && \
( [[ "$@" = *.* ]] || chmod +x "$@" )
diff --git a/backupStatistics.in b/backupStatistics.in
index 160ba0c..9e9608c 100644
--- a/backupStatistics.in
+++ b/backupStatistics.in
@@ -29,27 +29,19 @@ do_stage()
do
echo "${dat}:"
find "${dest}/${dat}" -type f -links -64001 -printf '%i %D-%m-%U-%G %p\n' >> \
- "${cacheDir}/${backupID}.inodes"
+ "${cacheDir}/${backupID}.inodes"
done
;;
2)
if [ "$2" == '##DESCRIBE##' ]
then
- echo 'sort and partition previous lists by $inode'
+ echo 'sort previous lists by $inode'
return 0
fi
tmpDirA="$(mktemp -d)"
tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
- rm -rf "${cacheDir}/${backupID}.inodes.sorted"
- mkdir "${cacheDir}/${backupID}.inodes.sorted"
- sort -T "${tmpDirA}" -T "${tmpDirB}" -u "${cacheDir}/${backupID}.inodes" | \
- while read -r line
- do
- part="${line:0:4}"
- part="${part%% *}"
- echo "${line}" >> \
- "${cacheDir}/${backupID}.inodes.sorted/part.${part}"
- done
+ sort -T "${tmpDirA}" -T "${tmpDirB}" -u "${cacheDir}/${backupID}.inodes" > \
+ "${cacheDir}/${backupID}.inodes.sorted"
rmdir "${tmpDirA}" "${tmpDirB}"
;;
3)
@@ -58,8 +50,7 @@ do_stage()
echo 'generate lists $inode -> $count, $contentHash'
return 0
fi
- cat "${cacheDir}/${backupID}.inodes.sorted/"part.* | \
- uniq -cm2 | \
+ uniq -cm2 "${cacheDir}/${backupID}.inodes.sorted" | \
parallel \
sha512sum {=s/^ *\([[:digit:]]\+ \)\{2\}[0-9-]\+ //=} \| \
sed '"s|^\([0-9a-f]\{128\}\) .*\$|\1'{=s/^ *\([[:digit:]]\+ [[:digit:]]\+\) \([0-9-]\+\) .*/-\\2 \\1/=}'|"' \
@@ -81,96 +72,141 @@ do_stage()
5)
if [ "$2" == '##DESCRIBE##' ]
then
- echo 'find duplicate hashes'
+ echo 'generate sorted lists of groups of inodes with the same hashes'
return 0
fi
- (
- uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content.sorted"
- echo ""
- ) | \
+ index=0
+ tmpDirA="$(mktemp -d)"
+ tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
+ uniq -m1 --all-repeated=separate "${cacheDir}/${backupID}.content.sorted" | \
sed 's|^\(\S\+ \)\{2\}||' | \
while read s
do
if [ -z "${s}" ]
then
- echo ""
+ index=$[${index}+1]
else
- echo -n "${s} "
+ echo "${s#* } B ${index}"
fi
done | \
- sed 's| $||' > \
+ sort -T "${tmpDirA}" -T "${tmpDirB}" > \
"${cacheDir}/${backupID}.duplicates"
+ rmdir "${tmpDirA}" "${tmpDirB}"
;;
6)
if [ "$2" == '##DESCRIBE##' ]
then
- echo 'remove inodes with duplicate hashes'
+ echo 'find files to inodes of previous lists'
return 0
fi
- if [ -r "${cacheDir}/next.action" ]
+ tmpDirA="$(mktemp -d)"
+ tmpDirB="$(mktemp -d "${cacheDir}/tmp.XXXXXX")"
+
+ unset block
+ unset lastBlock
+ unset firstInode
+ unset lastInode
+
+ sed '
+ s|^\(\S\+\) \S\+ |\1 F |
+ ' "${cacheDir}/${backupID}.inodes.sorted" | \
+ sort -m -T "${tmpDirA}" -T "${tmpDirB}" -- \
+ - "${cacheDir}/${backupID}.duplicates" | \
+ while read -r inode type extra
+ do
+ if [ "${type}" == "B" ]
+ then
+ block="${extra}"
+ elif [ "${lastInode}" == "${inode}" ] && [ -n "${block}" ]
+ then
+ echo "${block} ${inode} ${extra}"
+ else
+ unset block
+ fi
+ lastInode="${inode}"
+ done | \
+ sort -T "${tmpDirA}" -T "${tmpDirB}" -k1n,1 | \
+ while read -r block inode extra
+ do
+ if [ "${lastBlock}" != "${block}" ]
+ then
+ firstInode="${inode}"
+ fi
+ if [ "${lastBlock}" != "${block}" ] || [ "${firstInode}" != "${inode}" ]
+ then
+ echo "${block} ${extra}"
+ fi
+ lastBlock="${block}"
+ done | \
+ uniq -m1 --group=separate > \
+ "${cacheDir}/${backupID}.duplicates.files"
+ rmdir "${tmpDirA}" "${tmpDirB}"
+ ;;
+ 7)
+ if [ "$2" == '##DESCRIBE##' ]
then
- startInode="$(cat "${cacheDir}/next.action")"
+ echo 'relink files with different inodes and same hashes'
+ return 0
+ fi
+ if [ ! -r "${cacheDir}/next.action" ]
+ then
+ cat "${cacheDir}/${backupID}.duplicates.files"
+ elif [ "$(head -n1 "${cacheDir}/next.action")" == "${backupID}" ]
+ then
+ startBlock="$(tail -n1 "${cacheDir}/next.action")"
sed "
:vor;
- / ${startInode}\( \|$\)/{
- s@^\(\S\+ \)\(.* \)\?${startInode}\( \|$\)@\1${startInode}\3@;
- bnach
- };
+ /^${startBlock} /bnach;
d;
bvor;
:nach;
n;
bnach
- " "${cacheDir}/${backupID}.duplicates"
- else
- cat "${cacheDir}/${backupID}.duplicates"
+ " "${cacheDir}/${backupID}.duplicates.files"
fi | \
- while read line
+ while read -r oBlock original
do
- originalInode="${line%% *}"
- original="$(
- grep -m1 "^${originalInode} " "${cacheDir}/${backupID}.inodes.sorted/part.${originalInode:0:4}" | \
- sed 's|^\S\+ ||'
- )"
- for kopieInode in ${line#* }
+ echo "${backupID}" > "${cacheDir}/next.action2"
+ echo "${oBlock}" >> "${cacheDir}/next.action2"
+ mv "${cacheDir}/next.action2" "${cacheDir}/next.action"
+ while read -r kBlock kopie
do
- echo "${kopieInode}" > "${cacheDir}/next.action2"
- mv "${cacheDir}/next.action2" "${cacheDir}/next.action"
- OIFS="${IFS}"
- IFS="$(printf '\n\t')"
- for kopie in $(
- grep "^${kopieInode} " "${cacheDir}/${backupID}.inodes.sorted/part.${kopieInode:0:4}" | \
- sed 's|^\S\+ ||'
- )
- do
- IFS="${OIFS}"
- if ${paranoid}
+ [ -z "${kopie}" ] && break
+ if [ "${kBlock}" != "${oBlock}" ]
+ then
+ >&2 echo "'${kBlock}' != '${oBlock}'"
+ >&2 echo "'${backupID}':"
+ >&2 echo "'${original}'"
+ >&2 echo "'${kopie}'"
+ exit 1
+ fi
+
+ if ${paranoid}
+ then
+ diff "${original}" "${kopie}"
+ fi
+ if [ $(stat -c'%h' "${original}") -ge 65000 ]
+ then
+ echo "rm \"${original}\""
+ echo "ln \"${kopie}\" \"${original}\""
+ if ! ${dummy}
then
- diff "${original}" "${kopie}"
+ rm "${original}"
+ ln "${kopie}" "${original}"
fi
- if [ $(stat -c'%h' "${original}") -ge 65000 ]
+ else
+ echo "rm \"${kopie}\""
+ echo "ln \"${original}\" \"${kopie}\""
+ if ! ${dummy}
then
- echo "rm \"${original}\""
- echo "ln \"${kopie}\" \"${original}\""
- if ! ${dummy}
- then
- rm "${original}"
- ln "${kopie}" "${original}"
- fi
- else
- echo "rm \"${kopie}\""
- echo "ln \"${original}\" \"${kopie}\""
- if ! ${dummy}
- then
- rm "${kopie}"
- ln "${original}" "${kopie}"
- fi
+ rm "${kopie}"
+ ln "${original}" "${kopie}"
fi
- done
+ fi
done
done
if [ -r "${cacheDir}/next.action" ] && \
- grep -q " $(cat "${cacheDir}/next.action")\( \|$\)" "${cacheDir}/${backupID}.duplicates"
+ [ "$(head -n1 "${cacheDir}/next.action")" == "${backupID}" ]
then
rm -f "${cacheDir}/next.action" "${cacheDir}/next.action2"
fi