mirror of
https://github.com/kmein/niveum
synced 2026-03-16 10:11:08 +01:00
bots: program nietzschebot
This commit is contained in:
71
packages/random-zeno.nix
Executable file
71
packages/random-zeno.nix
Executable file
@@ -0,0 +1,71 @@
|
||||
{ writers, lib, curl, pup, gnused, coreutils, pandoc, gawk, jq }:
|
||||
writers.writeDashBin "random-zeno" ''
|
||||
set -efu
|
||||
|
||||
export PATH=${lib.makeBinPath [ curl pup gnused coreutils pandoc gawk jq ]}
|
||||
|
||||
root="http://www.zeno.org"
|
||||
character_limit=350
|
||||
|
||||
fetch() {
|
||||
curl -sSL "$root$1"
|
||||
}
|
||||
|
||||
blacklist='\/Biographie$'
|
||||
|
||||
next_links() {
|
||||
html="$1"
|
||||
echo "$html" | pup '.zenoTXLinkInt attr{href}' | sed "/$blacklist/d"
|
||||
echo "$html" | pup '.zenoTRNavBottom a attr{href}' | sed "/$blacklist/d"
|
||||
}
|
||||
|
||||
eval_html() {
|
||||
url="$1"
|
||||
html="$(fetch "$url")"
|
||||
links="$(next_links "$html" | sed '/^\s*$/d')"
|
||||
links_count="$(echo "$links" | wc -l)"
|
||||
if [ -z "$links" ]
|
||||
then
|
||||
random_paragraph="$(echo "$html" | pup '.zenoCOMain p' | tr '\n' ' ' | sed 's/<\/p>/&\n/g')"
|
||||
[ "$random_paragraph" = null ] && exit
|
||||
prettify "$url" "$random_paragraph"
|
||||
else
|
||||
next_link="$(echo "$links" | shuf -n1)"
|
||||
eval_html "$next_link"
|
||||
fi
|
||||
}
|
||||
|
||||
prettify() {
|
||||
url="$1"
|
||||
html="$2"
|
||||
|
||||
plain_text="$(echo "$html" | tr '\n' ' ' \
|
||||
| sed \
|
||||
-e 's/<h[1-6][^>]*>[^<]*<\/h[1-6]>//g' \
|
||||
-e 's/\[[0-9]*\]\s*//g' \
|
||||
| pandoc -f html -t plain \
|
||||
| tr '\n' ' '
|
||||
)"
|
||||
|
||||
truncated=$(echo "$plain_text" | awk -v limit="$character_limit" '
|
||||
{
|
||||
for (i = 1; i <= NF; i++) {
|
||||
word_length = length($i) + 1; # +1 for the space
|
||||
if (total_length + word_length > limit) {
|
||||
print "…";
|
||||
break;
|
||||
} else {
|
||||
total_length += word_length;
|
||||
printf "%s ", $i; # Print the word followed by a space
|
||||
}
|
||||
}
|
||||
print "";
|
||||
}
|
||||
' | sed 's/\([:,.!?;]\)[^:,.!?;]*…/\1 …/')
|
||||
|
||||
url="$(echo "$url" | jq -sRr @uri)"
|
||||
printf "%s\n\n%s\n" "$truncated" "$root$url"
|
||||
}
|
||||
|
||||
eval_html "$1"
|
||||
''
|
||||
Reference in New Issue
Block a user