commit 6aba9e127c1d0d2f1294b50d282aebbbbca6ccfb Author: kmein Date: Tue May 15 21:57:54 2018 +0200 Initial commit which does not work diff --git a/__pycache__/rilke.cpython-35.pyc b/__pycache__/rilke.cpython-35.pyc new file mode 100644 index 0000000..3b1a49f Binary files /dev/null and b/__pycache__/rilke.cpython-35.pyc differ diff --git a/rilke.py b/rilke.py new file mode 100644 index 0000000..b6b2ea4 --- /dev/null +++ b/rilke.py @@ -0,0 +1,41 @@ +from bs4 import BeautifulSoup +import requests +import re + +DEFAULT_LANG = "de" + +def cook_soup(url): + return BeautifulSoup(requests.get(url).text, "lxml") + +def base_url(lang=DEFAULT_LANG): + return "https://{}.wiktionary.org".format(lang) + +def entry_url(word, lang=DEFAULT_LANG): + return base_url(lang) + "/wiki/" + word + +def find_rhymes_url(entry_url, lang=DEFAULT_LANG): + entry_html = requests.get(entry_url).text + soup = BeautifulSoup(entry_html, "lxml") + result_url = base_url(lang) + if lang == "de": + result_url += soup.find("a", href=re.compile(r"/wiki/Reim:Deutsch:.*"))["href"] + elif lang == "en": + result_url += soup.find("a", href=re.compile(r"/wiki/Rhymes:English/.*"))["href"] + else: + pass + return result_url + +def find_rhymes(rhymes_url): + rhymes_html = requests.get(rhymes_url).text + soup = BeautifulSoup(rhymes_html, "lxml") + for li in soup.select("div#content ul > li > a"): + try: + yield li.text + except KeyError: + continue + +if __name__ == "__main__": + entry_url = entry_url("stabil", lang="de") + rhymes_url = find_rhymes_url(entry_url, lang="de") + for rhyme in find_rhymes(rhymes_url): + print(rhyme)