diff options
author | Erich Eckner <git@eckner.net> | 2019-01-09 23:48:58 +0100 |
---|---|---|
committer | Erich Eckner <git@eckner.net> | 2019-01-09 23:48:58 +0100 |
commit | 01dc6496d063a8f04f5716cd30098db4d0619481 (patch) | |
tree | 344470e32d9de88fc1ac676fcb4ec1769706a94e /generate-random-word-from-wikipedia.sh | |
parent | 747d6044dfcad03f2899f8b68cd37925a63ebbf7 (diff) | |
download | markov-01dc6496d063a8f04f5716cd30098db4d0619481.tar.xz |
geht jetzt
Diffstat (limited to 'generate-random-word-from-wikipedia.sh')
-rwxr-xr-x | generate-random-word-from-wikipedia.sh | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/generate-random-word-from-wikipedia.sh b/generate-random-word-from-wikipedia.sh new file mode 100755 index 0000000..fbc720f --- /dev/null +++ b/generate-random-word-from-wikipedia.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +base_dir=$(readlink -f "$(dirname "$0")") + +depth=$1 +shift + +if [ ! -s "${base_dir}/.words.${depth}" ]; then + + urls=() + + for i in $(seq "${depth}"); do + urls=( + $( + "${base_dir}/dive-into-wikipedia.sh" "${urls[@]}" \ + | sort -u + ) + ) + echo $i >&2 + done + + printf '%s\n' "${urls[@]}" \ + | parallel -j0 -n1 "${base_dir}/remove-tags.sh" \ + | sort -u \ + | grep -vxF '' \ + > "${base_dir}/.words.${depth}" + +fi + +"${base_dir}/markov" "${base_dir}/.words.${depth}" "$@" |