Small fixes
- Remove unused import ~ Reformat text content normalization + Keep empty lines empty instead of replacing them with "\"
This commit is contained in:
11
kevin.py
11
kevin.py
@@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
from argparse import ArgumentParser
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
import re
|
||||
import requests
|
||||
@@ -43,9 +42,11 @@ class Text:
|
||||
soup = soup_from(text_url)
|
||||
try:
|
||||
self.title = soup.select("h1 > span")[0].text.translate(normalization)
|
||||
content = str(soup.select(".fliesstext > span")[0])
|
||||
content = re.sub(r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>', r"_\1_", content)
|
||||
self.content = BeautifulSoup(content, "lxml").text.translate(normalization)
|
||||
self.content = BeautifulSoup(re.sub(
|
||||
r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
|
||||
r"_\1_",
|
||||
str(soup.select(".fliesstext > span")[0])
|
||||
), "lxml").text.translate(normalization)
|
||||
self.author = soup.select("h3 > a")[2].text
|
||||
self.type = soup.select("h1 ~ h3")[0].text
|
||||
except IndexError:
|
||||
@@ -56,7 +57,7 @@ class Text:
|
||||
title=self.title,
|
||||
maybe_author=self.author + ": " if with_author else "",
|
||||
maybe_type=" ("+self.type+")" if with_type else "",
|
||||
content="\n".join(line + "\\" for line in self.content.splitlines()))
|
||||
content="\n".join(line + "\\" if line else line for line in self.content.splitlines()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user