#! /bin/sh
#
# TT(Y)zara, a Tristan Tzara-style dadaist poetry generator.
#
# Copyleft (ↄ) 2019 jkirchartz <[email protected]>
#
# Distributed under terms of the NPL (Necessary Public License) license.
#

# grab an article from:
num=$(shuf -i1-10 -n1)
echo $num
case "$num" in
 [1][0])
   # 391.org dada manifestos
   url=$(lynx -dump https://391.org/manifestos/page/$(shuf -i1-7 -n1)/ | grep manifestos/[0-9] | grep -v page | rev | cut -d' ' -f1 | rev | shuf -n1)
   ;;
 [9])
   # Textfiles.com
   url=$(lynx -dump "https://github.com/opsxcq/mirror-textfiles.com/search?l=Text&p=$(shuf -i1-100 -n1)" | grep blob | shuf -n1 | sed -e 's/.*textfiles\.com/http:\/\/textfiles\.com/')
   ;;
 [8])
   # Wikipedia
   url="https://en.wikipedia.org/wiki/Special:Random"
   ;;
 [7])
   # NPR
   url=$(lynx -dump https://text.npr.org/ | grep "=[7-9][7-9]" | rev | cut -d' ' -f1 | rev | shuf -n 1);
   ;;
 [6])
   # Lobsters
   url=$(lynx -dump https://lobste.rs/newest | grep -v "lobste" | grep [[:digit:]] | rev | cut -d' ' -f1 | rev | grep http | shuf -n1)
   ;;
 [5])
   # Reuters
   url=$(lynx -dump https://www.reuters.com/commentary | grep article | rev | cut -d' ' -f1 | rev | shuf -n1);
   ;;
 [4])
   # Christian Science Monitor
   url=$(lynx -dump https://www.csmonitor.com/layout/set/text/textedition | grep \/20 | rev | cut -d' ' -f1 | rev | shuf -n1);
   ;;
 [3])
   # CNN
   url=$(lynx -dump https://lite.cnn.io/en | grep article | rev | cut -d' ' -f1 | rev | shuf -n1);
   ;;
 [2])
   # Folding Story
   url=$(lynx -dump http://foldingstory.com/read/ | grep http | tail -n 10 | rev | cut -d' ' -f1 | rev | shuf -n1)
   ;;
 *)
   # Dreams
   url=$(lynx -dump http://www.dreamjournal.net/main/dreams.cfm?timeframe=month | grep /journal/ | grep -v /user/ | shuf -n1 | rev | cut -d' ' -f1 | rev)
   ;;
esac;

case $(shuf -i1-4 -n1) in
 [4])
   linepattern='n;n;n;n;n;G;'
   ;;
 [3])
   linepattern='n;n;n;G;'
   ;;
 [2])
   linepattern='n;n;G;'
   ;;
 *)
   linepattern='n;n;n;n;G;'
   ;;
esac;

# generate poem
lynx -dump -nolist ${url} | awk 'NF>=10' | sed -e "s/\[[^\]]*\]//g" | sed -e "/^[ \t]*\*/d" | grep -o -E "[A-Za-z\'-]+" |\
 shuf | tr '\n' ' ' | fold -sw $(shuf -i 60-100 -n1) |\
 shuf -n $(shuf -i 3-10 -n1) | fold -sw $(shuf -i 13-30 -n1) |\
 sed "${linepattern}"

# pipe-by-by rundown:
# Fetch & Clean Article
# 1: lynx: dump contents of URL
# 2: awk: clear all lines less than 10 words long (assume shorter lines are links/navigation/etc, and longer lines are content)
# 3: sed: remove lynx-syntax images
# 4: sed: normalize multiple whitespace characters
# 5: grep: only keep letters ' and -
# Put in bag & Shake
# 6: snuf: shuffle lines
# Pull out words (not quite one-at-a-time, but forced into the shape of a poem)
# 7: tr: convert to one long line
# 8: fold: fold text into lines 60-100 characters long (mindful not to split words)
# 9: shuf: shuffle lines, only return 3-10 of them
# 10: fold: fold text into lines 13-30 characters long (mindful not to split words)
# 11: sed: split text into stanzas 3-6 lines long

echo "\n\n      by T.T(Y)zara\n (from ${url})"