Add type annotations and error handling in case there is no rhymes link on wiktionary.

This commit is contained in:
kmein
2018-05-16 16:22:33 +02:00
parent ae6cded704
commit 01f3d510bf

View File

@@ -1,7 +1,8 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests
import re import re
import requests
from typing import Iterator
DEFAULT_LANG = "de" DEFAULT_LANG = "de"
@@ -18,15 +19,16 @@ def rhymes_url(entry_url, lang=DEFAULT_LANG):
entry_html = requests.get(entry_url).text entry_html = requests.get(entry_url).text
soup = BeautifulSoup(entry_html, "lxml") soup = BeautifulSoup(entry_html, "lxml")
result_url = base_url(lang) result_url = base_url(lang)
if lang == "de": try:
result_url += soup.find("a", href=re.compile(r"/wiki/Reim:Deutsch:.*"))["href"] if lang == "de":
elif lang == "en": result_url += soup.find("a", href=re.compile(r"/wiki/Reim:Deutsch:.*"))["href"]
result_url += soup.find("a", href=re.compile(r"/wiki/Rhymes:English/.*"))["href"] elif lang == "en":
else: result_url += soup.find("a", href=re.compile(r"/wiki/Rhymes:English/.*"))["href"]
pass return result_url
return result_url except:
raise ValueError("Entry at {} not found.".format(entry_url))
def find_rhymes(rhymes_url): def find_rhymes(rhymes_url: str) -> Iterator[str]:
rhymes_html = requests.get(rhymes_url).text rhymes_html = requests.get(rhymes_url).text
soup = BeautifulSoup(rhymes_html, "lxml") soup = BeautifulSoup(rhymes_html, "lxml")
for li in soup.select("div#content ul > li > a"): for li in soup.select("div#content ul > li > a"):