# TT(Y)zara, a Tristan Tzara-style dadaist poetry generator.

#! /bin/sh
#
# TT(Y)zara, a Tristan Tzara-style dadaist poetry generator.
#
# Copyleft (ↄ) 2019 jkirchartz <[email protected]>
#
# Distributed under terms of the NPL (Necessary Public License) license.
#

# grab an article from:
num=$(shuf -i1-10 -n1)
echo $num
case "$num" in
[1][0])
# 391.org dada manifestos
url=$(lynx -dump https://391.org/manifestos/page/$(shuf -i1-7 -n1)/ | grep manifestos/[0-9] | grep -v page | rev | cut -d' ' -f1 | rev | shuf -n1)
;;
[9])
# Textfiles.com
url=$(lynx -dump "https://github.com/opsxcq/mirror-textfiles.com/search?l=Text&p=$(shuf -i1-100 -n1)" | grep blob | shuf -n1 | sed -e 's/.*textfiles\.com/http:\/\/textfiles\.com/')
;;
[8])
# Wikipedia
url="https://en.wikipedia.org/wiki/Special:Random"
;;
[7])
# NPR
url=$(lynx -dump https://text.npr.org/ | grep "=[7-9][7-9]" | rev | cut -d' ' -f1 | rev | shuf -n 1);
;;
[6])
# Lobsters
url=$(lynx -dump https://lobste.rs/newest | grep -v "lobste" | grep [[:digit:]] | rev | cut -d' ' -f1 | rev | grep http | shuf -n1)
;;
[5])
# Reuters
url=$(lynx -dump https://www.reuters.com/commentary | grep article | rev | cut -d' ' -f1 | rev | shuf -n1);
;;
[4])
# Christian Science Monitor
url=$(lynx -dump https://www.csmonitor.com/layout/set/text/textedition | grep \/20 | rev | cut -d' ' -f1 | rev | shuf -n1);
;;
[3])
# CNN
url=$(lynx -dump https://lite.cnn.io/en | grep article | rev | cut -d' ' -f1 | rev | shuf -n1);
;;
[2])
# Folding Story
url=$(lynx -dump http://foldingstory.com/read/ | grep http | tail -n 10 | rev | cut -d' ' -f1 | rev | shuf -n1)
;;
*)
# Dreams
url=$(lynx -dump http://www.dreamjournal.net/main/dreams.cfm?timeframe=month | grep /journal/ | grep -v /user/ | shuf -n1 | rev | cut -d' ' -f1 | rev)
;;
esac;

case $(shuf -i1-4 -n1) in
[4])
linepattern='n;n;n;n;n;G;'
;;
[3])
linepattern='n;n;n;G;'
;;
[2])
linepattern='n;n;G;'
;;
*)
linepattern='n;n;n;n;G;'
;;
esac;

# generate poem
lynx -dump -nolist ${url} | awk 'NF>=10' | sed -e "s/\[[^\]]*\]//g" | sed -e "/^[ \t]*\*/d" | grep -o -E "[A-Za-z\'-]+" |\
shuf | tr '\n' ' ' | fold -sw $(shuf -i 60-100 -n1) |\
shuf -n $(shuf -i 3-10 -n1) | fold -sw $(shuf -i 13-30 -n1) |\
sed "${linepattern}"

# pipe-by-by rundown:
# Fetch & Clean Article
# 1: lynx: dump contents of URL
# 2: awk: clear all lines less than 10 words long (assume shorter lines are links/navigation/etc, and longer lines are content)
# 3: sed: remove lynx-syntax images
# 4: sed: normalize multiple whitespace characters
# 5: grep: only keep letters ' and -
# Put in bag & Shake
# 6: snuf: shuffle lines
# Pull out words (not quite one-at-a-time, but forced into the shape of a poem)
# 7: tr: convert to one long line
# 8: fold: fold text into lines 60-100 characters long (mindful not to split words)
# 9: shuf: shuffle lines, only return 3-10 of them
# 10: fold: fold text into lines 13-30 characters long (mindful not to split words)
# 11: sed: split text into stanzas 3-6 lines long

echo "\n\n by T.T(Y)zara\n (from ${url})"