summaryrefslogtreecommitdiff
path: root/generate-random-word-from-wikipedia.sh
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2019-01-09 23:48:58 +0100
committerErich Eckner <git@eckner.net>2019-01-09 23:48:58 +0100
commit01dc6496d063a8f04f5716cd30098db4d0619481 (patch)
tree344470e32d9de88fc1ac676fcb4ec1769706a94e /generate-random-word-from-wikipedia.sh
parent747d6044dfcad03f2899f8b68cd37925a63ebbf7 (diff)
downloadmarkov-01dc6496d063a8f04f5716cd30098db4d0619481.tar.xz
geht jetzt
Diffstat (limited to 'generate-random-word-from-wikipedia.sh')
-rwxr-xr-xgenerate-random-word-from-wikipedia.sh30
1 files changed, 30 insertions, 0 deletions
diff --git a/generate-random-word-from-wikipedia.sh b/generate-random-word-from-wikipedia.sh
new file mode 100755
index 0000000..fbc720f
--- /dev/null
+++ b/generate-random-word-from-wikipedia.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+base_dir=$(readlink -f "$(dirname "$0")")
+
+depth=$1
+shift
+
+if [ ! -s "${base_dir}/.words.${depth}" ]; then
+
+ urls=()
+
+ for i in $(seq "${depth}"); do
+ urls=(
+ $(
+ "${base_dir}/dive-into-wikipedia.sh" "${urls[@]}" \
+ | sort -u
+ )
+ )
+ echo $i >&2
+ done
+
+ printf '%s\n' "${urls[@]}" \
+ | parallel -j0 -n1 "${base_dir}/remove-tags.sh" \
+ | sort -u \
+ | grep -vxF '' \
+ > "${base_dir}/.words.${depth}"
+
+fi
+
+"${base_dir}/markov" "${base_dir}/.words.${depth}" "$@"