fix: use aliases to correctly and uniquely get all texts

This commit is contained in:
Kierán Meinhardt
2019-10-01 21:34:50 +02:00
parent 9d755bb420
commit e8409354e4

View File

@@ -1,10 +1,9 @@
#!/bin/sh #!/bin/sh
kv_GET () { alias to_utf8='iconv -f latin1 -t utf8'
route=$1 alias curl_GET='curl -X GET -s -G'
shift
curl -s "https://www.keinverlag.de$route" "$@" | iconv -f latin1 -t utf8 BASE_URL=https://www.keinverlag.de
}
kv_author_id () { kv_author_id () {
if [ $# -ne 1 ]; then if [ $# -ne 1 ]; then
@@ -14,7 +13,8 @@ kv_author_id () {
author_name=$1 author_name=$1
kv_GET "/$author_name.kv" \ curl_GET "$BASE_URL/$author_name.kv" \
| to_utf8 \
| sed -n 's/.*autor=\([0-9]\+\).*/\1/p' \ | sed -n 's/.*autor=\([0-9]\+\).*/\1/p' \
| head -1 | head -1
} }
@@ -27,7 +27,8 @@ kv_text () {
text_id=$1 text_id=$1
kv_GET "/$text_id.text" \ curl_GET "$BASE_URL/$text_id.text" \
| to_utf8 \
| sed -n '/<h1>/,/<!-- Kommentarbox -->/p' \ | sed -n '/<h1>/,/<!-- Kommentarbox -->/p' \
| sed 's/<h3>.\+<\/h3>//g' \ | sed 's/<h3>.\+<\/h3>//g' \
| pandoc -f html -t plain | pandoc -f html -t plain
@@ -41,8 +42,9 @@ kv_author_texts () {
author_id=$1 author_id=$1
kv_GET '/autorentexte.php' -d sortby=datum -d start=0 -d limit=10000 -d autor="$author_id" \ curl_GET "$BASE_URL/autorentexte.php" -d sortby=tnr -d start=0 -d limit=10000 -d autor="$author_id" \
| sed -n 's/.*<a href="\([0-9]\+\)\.text">.*/\1/p' | to_utf8 \
| sed -n 's/.*<li><a href="\([0-9]\+\)\.text">.*/\1/p'
} }
case $1 in case $1 in
@@ -51,7 +53,7 @@ case $1 in
kv_text "$@";; kv_text "$@";;
author) author)
shift shift
for text_id in $(kv_author_texts "$(kv_author_id "$@")"); do for text_id in $(kv_author_texts "$(kv_author_id "$@")" | uniq); do
kv_text "$text_id" kv_text "$text_id"
done ;; done ;;
*) *)