Files
to-hen/rilke.py
2018-05-15 21:57:54 +02:00

42 lines
1.2 KiB
Python

from bs4 import BeautifulSoup
import requests
import re
DEFAULT_LANG = "de"
def cook_soup(url):
return BeautifulSoup(requests.get(url).text, "lxml")
def base_url(lang=DEFAULT_LANG):
return "https://{}.wiktionary.org".format(lang)
def entry_url(word, lang=DEFAULT_LANG):
return base_url(lang) + "/wiki/" + word
def find_rhymes_url(entry_url, lang=DEFAULT_LANG):
entry_html = requests.get(entry_url).text
soup = BeautifulSoup(entry_html, "lxml")
result_url = base_url(lang)
if lang == "de":
result_url += soup.find("a", href=re.compile(r"/wiki/Reim:Deutsch:.*"))["href"]
elif lang == "en":
result_url += soup.find("a", href=re.compile(r"/wiki/Rhymes:English/.*"))["href"]
else:
pass
return result_url
def find_rhymes(rhymes_url):
rhymes_html = requests.get(rhymes_url).text
soup = BeautifulSoup(rhymes_html, "lxml")
for li in soup.select("div#content ul > li > a"):
try:
yield li.text
except KeyError:
continue
if __name__ == "__main__":
entry_url = entry_url("stabil", lang="de")
rhymes_url = find_rhymes_url(entry_url, lang="de")
for rhyme in find_rhymes(rhymes_url):
print(rhyme)