Small fixes

- Remove unused import
~ Reformat text content normalization
+ Keep empty lines empty instead of replacing them with "\"
This commit is contained in:
kmein
2018-05-22 12:07:45 +02:00
parent df015db743
commit 2dcf600707

View File

@@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from argparse import ArgumentParser from argparse import ArgumentParser
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import datetime
from typing import List from typing import List
import re import re
import requests import requests
@@ -43,9 +42,11 @@ class Text:
soup = soup_from(text_url) soup = soup_from(text_url)
try: try:
self.title = soup.select("h1 > span")[0].text.translate(normalization) self.title = soup.select("h1 > span")[0].text.translate(normalization)
content = str(soup.select(".fliesstext > span")[0]) self.content = BeautifulSoup(re.sub(
content = re.sub(r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>', r"_\1_", content) r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
self.content = BeautifulSoup(content, "lxml").text.translate(normalization) r"_\1_",
str(soup.select(".fliesstext > span")[0])
), "lxml").text.translate(normalization)
self.author = soup.select("h3 > a")[2].text self.author = soup.select("h3 > a")[2].text
self.type = soup.select("h1 ~ h3")[0].text self.type = soup.select("h1 ~ h3")[0].text
except IndexError: except IndexError:
@@ -56,7 +57,7 @@ class Text:
title=self.title, title=self.title,
maybe_author=self.author + ": " if with_author else "", maybe_author=self.author + ": " if with_author else "",
maybe_type=" ("+self.type+")" if with_type else "", maybe_type=" ("+self.type+")" if with_type else "",
content="\n".join(line + "\\" for line in self.content.splitlines())) content="\n".join(line + "\\" if line else line for line in self.content.splitlines()))
if __name__ == "__main__": if __name__ == "__main__":