summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2020-05-14 11:42:50 +0200
committerErich Eckner <git@eckner.net>2020-05-14 11:42:50 +0200
commit505a9d19281069703f04b477f85dfdb0f21165fb (patch)
tree8a4ef063ea1d2a33b0ac7eadabbc025c8efe63d7
parentd51290b4776fbc281d5b098479999771a360f0cf (diff)
downloadarch-mirror-505a9d19281069703f04b477f85dfdb0f21165fb.tar.xz
arch-mirror: enable download from http and https mirrors, too
-rwxr-xr-xarch-mirror95
1 files changed, 89 insertions, 6 deletions
diff --git a/arch-mirror b/arch-mirror
index 67e84ba..714792c 100755
--- a/arch-mirror
+++ b/arch-mirror
@@ -17,6 +17,89 @@ if ! stty &>/dev/null; then
quiet="-q"
fi
+retrieve_listing() {
+ local url="$1"
+ case "${url%%://*}" in
+ 'rsync')
+ rsync "${url}" \
+ | grep '^d' \
+ | awk '{print $5}'
+ ;;
+ 'https'|'http')
+ curl -fSsL "${url}" \
+ | sed '
+ s@^.*<a href="\([^"]\+\)/">\1/</a>.*$@\1@
+ t
+ d
+ '
+ ;;
+ *)
+ >&2 printf 'Unknown protocol: %s\n' "${url%%://*}"
+ ;;
+ esac
+}
+
+retrieve_file() {
+ local url="$1"
+ local file="$2"
+ case "${url%%://*}" in
+ 'rsync')
+ rsync -q "${url}" "${file}"
+ ;;
+ 'https'|'http')
+ curl -fSsL "${url}" -o "${file}"
+ ;;
+ *)
+ >&2 printf 'Unknown protocol: %s\n' "${url%%://*}"
+ ;;
+ esac
+}
+
+retrieve_content() {
+ local url="$1"
+ local destination="$2"
+ case "${url%%://*}" in
+ 'rsync')
+ rsync ${rsync_options} ${extra_options} ${quiet} "${url}" "${destination}"
+ ;;
+ 'https'|'http')
+ wget --mirror -nH -np ${quiet} -P "${destination}" --cut-dirs $(
+ echo "${url#*//}" \
+ | tr -d '\n' \
+ | tr '/' '\n' \
+ | sed 1d \
+ | wc -l
+ ) "${url}" \
+ || return $?
+ find "${destination}" \
+ -mindepth 1 \
+ -type d \
+ | while read -r dir; do
+ index="${dir}/index.html"
+ [ -r "${index}" ] || continue
+ {
+ find "${dir}" -mindepth 1 -maxdepth 1 -type f -printf '%f\n'
+ sed -n '
+ s@^.*<a href="\([^"/]*\)">.*$@\1@
+ T
+ /^\./! p
+ ' "${index}" \
+ | sort -u \
+ | sed 'p'
+ } \
+ | sort \
+ | uniq -u \
+ | while read -r file; do
+ rm "${dir}/${file}"
+ done
+ done
+ ;;
+ *)
+ >&2 printf 'Unknown protocol: %s\n' "${url%%://*}"
+ ;;
+ esac
+}
+
resolve_complete_mirror() {
local host="$1"
local pre="${host%%//*}//"
@@ -38,9 +121,7 @@ resolve_complete_mirror() {
} \
| awk '{print "'"${pre}"'" $1 "'"${post}"'"}' \
| while read -r url; do
- rsync "${url}" \
- | grep '^d' \
- | awk '{print $5}' \
+ retrieve_listing "${url}" \
| grep -v '^\(merged$\|\.\)' \
| awk '{print "/" $1 "/#'"${url}"'" $1 "/"}'
done
@@ -79,7 +160,7 @@ fetch_lastsync() {
while read -r proximity url; do
rm -f "${tmp_file}"
if [ -n "${1}" ]; then
- if ! rsync -q "${url}${1}" "${tmp_file}" 2>/dev/null; then
+ if ! retrieve_file "${url}${1}" "${tmp_file}" 2>/dev/null; then
rm -f "${tmp_file}"
fi
fi
@@ -91,7 +172,7 @@ fetch_lastsync() {
if [ -s "${tmp_file}" ]; then
break
fi
- if ! rsync -q "${url}${dir}${file}" "${tmp_file}" 2>/dev/null; then
+ if ! retrieve_file "${url}${dir}${file}" "${tmp_file}" 2>/dev/null; then
rm -f "${tmp_file}"
fi
done
@@ -119,6 +200,8 @@ far_mirrors=(
)
'/archlinux/#rsync://ftp.gwdg.de/pub/linux/archlinux/'
'/archlinuxarm/#rsync://ftp.halifax.rwth-aachen.de/archlinux-arm/'
+ '/archlinuxfr/#https://repo.archlinux.fr/'
+ '/quarry/#https://pkgbuild.com/~anatolik/quarry/'
)
close_mirrors=(
@@ -178,7 +261,7 @@ printf '%s\n' "${far_mirrors[@]}" "${close_mirrors[@]}" \
if [ "${level}" -le "${last_successful_level}" ]; then
continue
fi
- if rsync ${rsync_options} ${extra_options} ${quiet} "${url}" "${mirror_dir}${to_sync}"; then
+ if retrieve_content "${url}" "${mirror_dir}${to_sync}"; then
last_successful_level="${level}"
fi
done