Small fixes

- Remove unused import
~ Reformat text content normalization
+ Keep empty lines empty instead of replacing them with "\"
This commit is contained in:
kmein
2018-05-22 12:07:45 +02:00
parent df015db743
commit 2dcf600707

View File

@@ -1,7 +1,6 @@
#!/usr/bin/env python3
from argparse import ArgumentParser
from bs4 import BeautifulSoup
from datetime import datetime
from typing import List
import re
import requests
@@ -43,9 +42,11 @@ class Text:
soup = soup_from(text_url)
try:
self.title = soup.select("h1 > span")[0].text.translate(normalization)
content = str(soup.select(".fliesstext > span")[0])
content = re.sub(r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>', r"_\1_", content)
self.content = BeautifulSoup(content, "lxml").text.translate(normalization)
self.content = BeautifulSoup(re.sub(
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
r"_\1_",
str(soup.select(".fliesstext > span")[0])
), "lxml").text.translate(normalization)
self.author = soup.select("h3 > a")[2].text
self.type = soup.select("h1 ~ h3")[0].text
except IndexError:
@@ -56,7 +57,7 @@ class Text:
title=self.title,
maybe_author=self.author + ": " if with_author else "",
maybe_type=" ("+self.type+")" if with_type else "",
content="\n".join(line + "\\" for line in self.content.splitlines()))
content="\n".join(line + "\\" if line else line for line in self.content.splitlines()))
if __name__ == "__main__":