Small fixes
- Remove unused import ~ Reformat text content normalization + Keep empty lines empty instead of replacing them with "\"
This commit is contained in:
11
kevin.py
11
kevin.py
@@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from datetime import datetime
|
|
||||||
from typing import List
|
from typing import List
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
@@ -43,9 +42,11 @@ class Text:
|
|||||||
soup = soup_from(text_url)
|
soup = soup_from(text_url)
|
||||||
try:
|
try:
|
||||||
self.title = soup.select("h1 > span")[0].text.translate(normalization)
|
self.title = soup.select("h1 > span")[0].text.translate(normalization)
|
||||||
content = str(soup.select(".fliesstext > span")[0])
|
self.content = BeautifulSoup(re.sub(
|
||||||
content = re.sub(r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>', r"_\1_", content)
|
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
|
||||||
self.content = BeautifulSoup(content, "lxml").text.translate(normalization)
|
r"_\1_",
|
||||||
|
str(soup.select(".fliesstext > span")[0])
|
||||||
|
), "lxml").text.translate(normalization)
|
||||||
self.author = soup.select("h3 > a")[2].text
|
self.author = soup.select("h3 > a")[2].text
|
||||||
self.type = soup.select("h1 ~ h3")[0].text
|
self.type = soup.select("h1 ~ h3")[0].text
|
||||||
except IndexError:
|
except IndexError:
|
||||||
@@ -56,7 +57,7 @@ class Text:
|
|||||||
title=self.title,
|
title=self.title,
|
||||||
maybe_author=self.author + ": " if with_author else "",
|
maybe_author=self.author + ": " if with_author else "",
|
||||||
maybe_type=" ("+self.type+")" if with_type else "",
|
maybe_type=" ("+self.type+")" if with_type else "",
|
||||||
content="\n".join(line + "\\" for line in self.content.splitlines()))
|
content="\n".join(line + "\\" if line else line for line in self.content.splitlines()))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user