mirror of
https://github.com/kmein/niveum
synced 2026-03-16 10:11:08 +01:00
72 lines
1.8 KiB
Nix
Executable File
72 lines
1.8 KiB
Nix
Executable File
{ writers, lib, curl, pup, gnused, coreutils, pandoc, gawk, jq }:
|
|
writers.writeDashBin "random-zeno" ''
|
|
set -efu
|
|
|
|
export PATH=${lib.makeBinPath [ curl pup gnused coreutils pandoc gawk jq ]}
|
|
|
|
root="http://www.zeno.org"
|
|
character_limit=350
|
|
|
|
fetch() {
|
|
curl -sSL "$root$1"
|
|
}
|
|
|
|
blacklist='\/Biographie$'
|
|
|
|
next_links() {
|
|
html="$1"
|
|
echo "$html" | pup '.zenoTXLinkInt attr{href}' | sed "/$blacklist/d"
|
|
echo "$html" | pup '.zenoTRNavBottom a attr{href}' | sed "/$blacklist/d"
|
|
}
|
|
|
|
eval_html() {
|
|
url="$1"
|
|
html="$(fetch "$url")"
|
|
links="$(next_links "$html" | sed '/^\s*$/d')"
|
|
links_count="$(echo "$links" | wc -l)"
|
|
if [ -z "$links" ]
|
|
then
|
|
random_paragraph="$(echo "$html" | pup '.zenoCOMain p' | tr '\n' ' ' | sed 's/<\/p>/&\n/g')"
|
|
[ "$random_paragraph" = null ] && exit
|
|
prettify "$url" "$random_paragraph"
|
|
else
|
|
next_link="$(echo "$links" | shuf -n1)"
|
|
eval_html "$next_link"
|
|
fi
|
|
}
|
|
|
|
prettify() {
|
|
url="$1"
|
|
html="$2"
|
|
|
|
plain_text="$(echo "$html" | tr '\n' ' ' \
|
|
| sed \
|
|
-e 's/<h[1-6][^>]*>[^<]*<\/h[1-6]>//g' \
|
|
-e 's/\[[0-9]*\]\s*//g' \
|
|
| pandoc -f html -t plain \
|
|
| tr '\n' ' '
|
|
)"
|
|
|
|
truncated=$(echo "$plain_text" | awk -v limit="$character_limit" '
|
|
{
|
|
for (i = 1; i <= NF; i++) {
|
|
word_length = length($i) + 1; # +1 for the space
|
|
if (total_length + word_length > limit) {
|
|
print "…";
|
|
break;
|
|
} else {
|
|
total_length += word_length;
|
|
printf "%s ", $i; # Print the word followed by a space
|
|
}
|
|
}
|
|
print "";
|
|
}
|
|
' | sed 's/\([:,.!?;]\)[^:,.!?;]*…/\1 …/')
|
|
|
|
url="$(echo "$url" | jq -sRr @uri)"
|
|
printf "%s\n\n%s\n" "$truncated" "$root$url"
|
|
}
|
|
|
|
eval_html "$1"
|
|
''
|