feat: add shell version
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
.direnv
|
||||
.envrc
|
||||
.history
|
||||
58
keinverlag
Executable file
58
keinverlag
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/bin/sh
|
||||
|
||||
kv_GET () {
|
||||
route=$1
|
||||
shift
|
||||
curl -s "https://www.keinverlag.de$route" "$@" | iconv -f latin1 -t utf8
|
||||
}
|
||||
|
||||
kv_author_id () {
|
||||
if [ $# -ne 1 ]; then
|
||||
echo Please call kv_author_id with an author name. >/dev/stderr
|
||||
exit 1
|
||||
fi
|
||||
|
||||
author_name=$1
|
||||
|
||||
kv_GET "/$author_name.kv" \
|
||||
| sed -n 's/.*autor=\([0-9]\+\).*/\1/p' \
|
||||
| head -1
|
||||
}
|
||||
|
||||
kv_text () {
|
||||
if [ $# -ne 1 ]; then
|
||||
echo Please call kv_text with a text ID. >/dev/stderr
|
||||
exit 1
|
||||
fi
|
||||
|
||||
text_id=$1
|
||||
|
||||
kv_GET "/$text_id.text" \
|
||||
| sed -n '/<h1>/,/<!-- Kommentarbox -->/p' \
|
||||
| pandoc -f html -t plain
|
||||
}
|
||||
|
||||
kv_author_texts () {
|
||||
if [ $# -ne 1 ]; then
|
||||
echo Please call kv_author_texts with an author ID. >/dev/stderr
|
||||
exit 1
|
||||
fi
|
||||
|
||||
author_id=$1
|
||||
|
||||
kv_GET '/autorentexte.php' -d sortby=datum -d start=0 -d limit=10000 -d autor="$author_id" \
|
||||
| sed -n 's/.*<a href="\([0-9]\+\)\.text">.*/\1/p'
|
||||
}
|
||||
|
||||
case $1 in
|
||||
text)
|
||||
shift
|
||||
kv_text "$@";;
|
||||
author)
|
||||
shift
|
||||
for text_id in $(kv_author_texts "$(kv_author_id "$@")"); do
|
||||
kv_text "$text_id"
|
||||
done ;;
|
||||
*)
|
||||
echo >/dev/stderr "Usage: $0 text|author ID"
|
||||
esac
|
||||
54
kevin.py
54
kevin.py
@@ -6,16 +6,19 @@ import re
|
||||
import requests
|
||||
|
||||
|
||||
def soup_from(url):
|
||||
return BeautifulSoup(requests.get(url).text, "lxml")
|
||||
def soup_from(response):
|
||||
return BeautifulSoup(response.text, "lxml")
|
||||
|
||||
|
||||
class Author:
|
||||
def __init__(self, author_id: int) -> None:
|
||||
author_texts_url = "https://www.keinverlag.de/autorentexte.php?start=0&limit=1000000&sortby=tnr&autor={}".format(author_id)
|
||||
soup = soup_from(author_texts_url)
|
||||
response = requests.get(
|
||||
"https://www.keinverlag.de/autorentexte.php",
|
||||
params={"start": 0, "limit": 10000, "sortby": "tnr", "author": author_id},
|
||||
)
|
||||
soup = soup_from(response)
|
||||
self.texts = [] # type: List[Text]
|
||||
for text in soup.select("ul.textliste > li > a[href$=\".text\"]"):
|
||||
for text in soup.select('ul.textliste > li > a[href$=".text"]'):
|
||||
# strip off the last five characters (".text")
|
||||
text_id = int(text["href"][:-5])
|
||||
try:
|
||||
@@ -37,16 +40,26 @@ class Author:
|
||||
|
||||
class Text:
|
||||
def __init__(self, text_id: int) -> None:
|
||||
normalization = {132: "\"", 147: "\"", 0x96: "--", 0x91: "'", 0x92: "'", 0x97: "---"}
|
||||
normalization = {
|
||||
132: '"',
|
||||
147: '"',
|
||||
0x96: "--",
|
||||
0x91: "'",
|
||||
0x92: "'",
|
||||
0x97: "---",
|
||||
}
|
||||
text_url = "https://www.keinverlag.de/{}.text".format(text_id)
|
||||
soup = soup_from(text_url)
|
||||
try:
|
||||
self.title = soup.select("h1 > span")[0].text.translate(normalization)
|
||||
self.content = BeautifulSoup(re.sub(
|
||||
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
|
||||
r"_\1_",
|
||||
str(soup.select(".fliesstext > span")[0])
|
||||
), "lxml").text.translate(normalization)
|
||||
self.content = BeautifulSoup(
|
||||
re.sub(
|
||||
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
|
||||
r"_\1_",
|
||||
str(soup.select(".fliesstext > span")[0]),
|
||||
),
|
||||
"lxml",
|
||||
).text.translate(normalization)
|
||||
self.author = soup.select("h3 > a")[2].text
|
||||
self.type = soup.select("h1 ~ h3")[0].text
|
||||
except IndexError:
|
||||
@@ -56,8 +69,11 @@ class Text:
|
||||
return "#### {maybe_author}{title}{maybe_type}\n\n{content}".format(
|
||||
title=self.title,
|
||||
maybe_author=self.author + ": " if with_author else "",
|
||||
maybe_type=" ("+self.type+")" if with_type else "",
|
||||
content="\n".join(line + "\\" if line else "" for line in self.content.splitlines()))
|
||||
maybe_type=" (" + self.type + ")" if with_type else "",
|
||||
content="\n".join(
|
||||
line + "\\" if line else "" for line in self.content.splitlines()
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -67,11 +83,17 @@ if __name__ == "__main__":
|
||||
|
||||
handle_text = subparsers.add_parser("text", help="Handle one text")
|
||||
handle_text.add_argument("tid", help="KeinVerlag text id", type=int)
|
||||
handle_text.set_defaults(func=lambda a: print(Text(a.tid).markdown(with_type=a.type)))
|
||||
handle_text.set_defaults(
|
||||
func=lambda a: print(Text(a.tid).markdown(with_type=a.type))
|
||||
)
|
||||
|
||||
handle_author = subparsers.add_parser("author", help="Handle all texts by an author")
|
||||
handle_author = subparsers.add_parser(
|
||||
"author", help="Handle all texts by an author"
|
||||
)
|
||||
handle_author.add_argument("aid", help="KeinVerlag author id", type=str)
|
||||
handle_author.set_defaults(func=lambda a: print(Author(a.aid).markdown(with_type=a.type)))
|
||||
handle_author.set_defaults(
|
||||
func=lambda a: print(Author(a.aid).markdown(with_type=a.type))
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
||||
Reference in New Issue
Block a user