diff options
author | Erich Eckner <git@eckner.net> | 2020-05-14 11:42:50 +0200 |
---|---|---|
committer | Erich Eckner <git@eckner.net> | 2020-05-14 11:42:50 +0200 |
commit | 505a9d19281069703f04b477f85dfdb0f21165fb (patch) | |
tree | 8a4ef063ea1d2a33b0ac7eadabbc025c8efe63d7 | |
parent | d51290b4776fbc281d5b098479999771a360f0cf (diff) | |
download | arch-mirror-505a9d19281069703f04b477f85dfdb0f21165fb.tar.xz |
arch-mirror: enable download from http and https mirrors, too
-rwxr-xr-x | arch-mirror | 95 |
1 files changed, 89 insertions, 6 deletions
diff --git a/arch-mirror b/arch-mirror index 67e84ba..714792c 100755 --- a/arch-mirror +++ b/arch-mirror @@ -17,6 +17,89 @@ if ! stty &>/dev/null; then quiet="-q" fi +retrieve_listing() { + local url="$1" + case "${url%%://*}" in + 'rsync') + rsync "${url}" \ + | grep '^d' \ + | awk '{print $5}' + ;; + 'https'|'http') + curl -fSsL "${url}" \ + | sed ' + s@^.*<a href="\([^"]\+\)/">\1/</a>.*$@\1@ + t + d + ' + ;; + *) + >&2 printf 'Unknown protocol: %s\n' "${url%%://*}" + ;; + esac +} + +retrieve_file() { + local url="$1" + local file="$2" + case "${url%%://*}" in + 'rsync') + rsync -q "${url}" "${file}" + ;; + 'https'|'http') + curl -fSsL "${url}" -o "${file}" + ;; + *) + >&2 printf 'Unknown protocol: %s\n' "${url%%://*}" + ;; + esac +} + +retrieve_content() { + local url="$1" + local destination="$2" + case "${url%%://*}" in + 'rsync') + rsync ${rsync_options} ${extra_options} ${quiet} "${url}" "${destination}" + ;; + 'https'|'http') + wget --mirror -nH -np ${quiet} -P "${destination}" --cut-dirs $( + echo "${url#*//}" \ + | tr -d '\n' \ + | tr '/' '\n' \ + | sed 1d \ + | wc -l + ) "${url}" \ + || return $? + find "${destination}" \ + -mindepth 1 \ + -type d \ + | while read -r dir; do + index="${dir}/index.html" + [ -r "${index}" ] || continue + { + find "${dir}" -mindepth 1 -maxdepth 1 -type f -printf '%f\n' + sed -n ' + s@^.*<a href="\([^"/]*\)">.*$@\1@ + T + /^\./! p + ' "${index}" \ + | sort -u \ + | sed 'p' + } \ + | sort \ + | uniq -u \ + | while read -r file; do + rm "${dir}/${file}" + done + done + ;; + *) + >&2 printf 'Unknown protocol: %s\n' "${url%%://*}" + ;; + esac +} + resolve_complete_mirror() { local host="$1" local pre="${host%%//*}//" @@ -38,9 +121,7 @@ resolve_complete_mirror() { } \ | awk '{print "'"${pre}"'" $1 "'"${post}"'"}' \ | while read -r url; do - rsync "${url}" \ - | grep '^d' \ - | awk '{print $5}' \ + retrieve_listing "${url}" \ | grep -v '^\(merged$\|\.\)' \ | awk '{print "/" $1 "/#'"${url}"'" $1 "/"}' done @@ -79,7 +160,7 @@ fetch_lastsync() { while read -r proximity url; do rm -f "${tmp_file}" if [ -n "${1}" ]; then - if ! rsync -q "${url}${1}" "${tmp_file}" 2>/dev/null; then + if ! retrieve_file "${url}${1}" "${tmp_file}" 2>/dev/null; then rm -f "${tmp_file}" fi fi @@ -91,7 +172,7 @@ fetch_lastsync() { if [ -s "${tmp_file}" ]; then break fi - if ! rsync -q "${url}${dir}${file}" "${tmp_file}" 2>/dev/null; then + if ! retrieve_file "${url}${dir}${file}" "${tmp_file}" 2>/dev/null; then rm -f "${tmp_file}" fi done @@ -119,6 +200,8 @@ far_mirrors=( ) '/archlinux/#rsync://ftp.gwdg.de/pub/linux/archlinux/' '/archlinuxarm/#rsync://ftp.halifax.rwth-aachen.de/archlinux-arm/' + '/archlinuxfr/#https://repo.archlinux.fr/' + '/quarry/#https://pkgbuild.com/~anatolik/quarry/' ) close_mirrors=( @@ -178,7 +261,7 @@ printf '%s\n' "${far_mirrors[@]}" "${close_mirrors[@]}" \ if [ "${level}" -le "${last_successful_level}" ]; then continue fi - if rsync ${rsync_options} ${extra_options} ${quiet} "${url}" "${mirror_dir}${to_sync}"; then + if retrieve_content "${url}" "${mirror_dir}${to_sync}"; then last_successful_level="${level}" fi done |