#!/bin/bash base_dir=$(readlink -f "$(dirname "$0")") if [ $# -ne 3 ]; then >&2 printf 'usage: %s wiki-depth markov-depth word-count\n' "${0##*/}" exit 1 fi depth=$1 shift if [ ! -s "${base_dir}/.words.${depth}" ]; then urls=() for i in $(seq "${depth}"); do urls=( $( "${base_dir}/dive-into-wikipedia.sh" "${urls[@]}" \ | sort -u ) ) echo $i >&2 done printf '%s\n' "${urls[@]}" \ | parallel -j0 -n1 curl -x 'socks5://127.0.0.1:9050' -s \ | "${base_dir}/remove-tags.sh" \ | tr -C 'a-zA-ZäöüÄÖÜß' '\n' \ | grep -vxF '' \ | sort -u \ > "${base_dir}/.words.${depth}" fi "${base_dir}/markov" "${base_dir}/.words.${depth}" "$@"