feat: add shell version

This commit is contained in:
Kierán Meinhardt
2019-10-01 19:22:59 +02:00
parent a5426f8e51
commit bed2a8a40d
4 changed files with 109 additions and 16 deletions

View File

@@ -6,16 +6,19 @@ import re
import requests
def soup_from(url):
return BeautifulSoup(requests.get(url).text, "lxml")
def soup_from(response):
return BeautifulSoup(response.text, "lxml")
class Author:
def __init__(self, author_id: int) -> None:
author_texts_url = "https://www.keinverlag.de/autorentexte.php?start=0&limit=1000000&sortby=tnr&autor={}".format(author_id)
soup = soup_from(author_texts_url)
response = requests.get(
"https://www.keinverlag.de/autorentexte.php",
params={"start": 0, "limit": 10000, "sortby": "tnr", "author": author_id},
)
soup = soup_from(response)
self.texts = [] # type: List[Text]
for text in soup.select("ul.textliste > li > a[href$=\".text\"]"):
for text in soup.select('ul.textliste > li > a[href$=".text"]'):
# strip off the last five characters (".text")
text_id = int(text["href"][:-5])
try:
@@ -37,16 +40,26 @@ class Author:
class Text:
def __init__(self, text_id: int) -> None:
normalization = {132: "\"", 147: "\"", 0x96: "--", 0x91: "'", 0x92: "'", 0x97: "---"}
normalization = {
132: '"',
147: '"',
0x96: "--",
0x91: "'",
0x92: "'",
0x97: "---",
}
text_url = "https://www.keinverlag.de/{}.text".format(text_id)
soup = soup_from(text_url)
try:
self.title = soup.select("h1 > span")[0].text.translate(normalization)
self.content = BeautifulSoup(re.sub(
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
r"_\1_",
str(soup.select(".fliesstext > span")[0])
), "lxml").text.translate(normalization)
self.content = BeautifulSoup(
re.sub(
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
r"_\1_",
str(soup.select(".fliesstext > span")[0]),
),
"lxml",
).text.translate(normalization)
self.author = soup.select("h3 > a")[2].text
self.type = soup.select("h1 ~ h3")[0].text
except IndexError:
@@ -56,8 +69,11 @@ class Text:
return "#### {maybe_author}{title}{maybe_type}\n\n{content}".format(
title=self.title,
maybe_author=self.author + ": " if with_author else "",
maybe_type=" ("+self.type+")" if with_type else "",
content="\n".join(line + "\\" if line else "" for line in self.content.splitlines()))
maybe_type=" (" + self.type + ")" if with_type else "",
content="\n".join(
line + "\\" if line else "" for line in self.content.splitlines()
),
)
if __name__ == "__main__":
@@ -67,11 +83,17 @@ if __name__ == "__main__":
handle_text = subparsers.add_parser("text", help="Handle one text")
handle_text.add_argument("tid", help="KeinVerlag text id", type=int)
handle_text.set_defaults(func=lambda a: print(Text(a.tid).markdown(with_type=a.type)))
handle_text.set_defaults(
func=lambda a: print(Text(a.tid).markdown(with_type=a.type))
)
handle_author = subparsers.add_parser("author", help="Handle all texts by an author")
handle_author = subparsers.add_parser(
"author", help="Handle all texts by an author"
)
handle_author.add_argument("aid", help="KeinVerlag author id", type=str)
handle_author.set_defaults(func=lambda a: print(Author(a.aid).markdown(with_type=a.type)))
handle_author.set_defaults(
func=lambda a: print(Author(a.aid).markdown(with_type=a.type))
)
args = parser.parse_args()
args.func(args)