summaryrefslogtreecommitdiff
path: root/generate-random-word-from-wikipedia.sh
diff options
context:
space:
mode:
Diffstat (limited to 'generate-random-word-from-wikipedia.sh')
-rwxr-xr-xgenerate-random-word-from-wikipedia.sh30
1 files changed, 30 insertions, 0 deletions
diff --git a/generate-random-word-from-wikipedia.sh b/generate-random-word-from-wikipedia.sh
new file mode 100755
index 0000000..fbc720f
--- /dev/null
+++ b/generate-random-word-from-wikipedia.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+base_dir=$(readlink -f "$(dirname "$0")")
+
+depth=$1
+shift
+
+if [ ! -s "${base_dir}/.words.${depth}" ]; then
+
+ urls=()
+
+ for i in $(seq "${depth}"); do
+ urls=(
+ $(
+ "${base_dir}/dive-into-wikipedia.sh" "${urls[@]}" \
+ | sort -u
+ )
+ )
+ echo $i >&2
+ done
+
+ printf '%s\n' "${urls[@]}" \
+ | parallel -j0 -n1 "${base_dir}/remove-tags.sh" \
+ | sort -u \
+ | grep -vxF '' \
+ > "${base_dir}/.words.${depth}"
+
+fi
+
+"${base_dir}/markov" "${base_dir}/.words.${depth}" "$@"