summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErich Eckner <git@eckner.net>2019-01-10 13:24:34 +0100
committerErich Eckner <git@eckner.net>2019-01-10 13:24:34 +0100
commit023e6986409ea92689f2a06a12d4f08019928f7b (patch)
treeb81e74d8675d002e924c164b942322a3cec641de
parent9689618102d3693254f5fca08233e9182bf8787a (diff)
downloadmarkov-023e6986409ea92689f2a06a12d4f08019928f7b.tar.xz
remove-tags.sh should not download, that should be done in generate-random-word-from-wikipedia.sh
-rwxr-xr-xgenerate-random-word-from-wikipedia.sh3
-rwxr-xr-xremove-tags.sh9
2 files changed, 6 insertions, 6 deletions
diff --git a/generate-random-word-from-wikipedia.sh b/generate-random-word-from-wikipedia.sh
index dee670e..6b43c81 100755
--- a/generate-random-word-from-wikipedia.sh
+++ b/generate-random-word-from-wikipedia.sh
@@ -25,7 +25,8 @@ if [ ! -s "${base_dir}/.words.${depth}" ]; then
done
printf '%s\n' "${urls[@]}" \
- | parallel -j0 -n1 "${base_dir}/remove-tags.sh" \
+ | parallel -j0 -n1 curl -x 'socks5://127.0.0.1:9050' -s \
+ | "${base_dir}/remove-tags.sh" \
| tr -C '[a-zA-ZäöüÄÖÜß]' '\n' \
| grep -vxF '' \
| sort -u \
diff --git a/remove-tags.sh b/remove-tags.sh
index 5f46c92..3640d27 100755
--- a/remove-tags.sh
+++ b/remove-tags.sh
@@ -1,10 +1,9 @@
#!/bin/bash
-curl -x 'socks5://127.0.0.1:9050' -s "$1" \
- | sed '
- s/>/>\n/g
- s/</\n</g
- ' \
+sed '
+ s/>/>\n/g
+ s/</\n</g
+' \
| sed -n '
/^<!--.*>$/ d
/^<!--/,/-->$/ d