Initial commit which does not work
This commit is contained in:
41
rilke.py
Normal file
41
rilke.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import re
|
||||
|
||||
DEFAULT_LANG = "de"
|
||||
|
||||
def cook_soup(url):
|
||||
return BeautifulSoup(requests.get(url).text, "lxml")
|
||||
|
||||
def base_url(lang=DEFAULT_LANG):
|
||||
return "https://{}.wiktionary.org".format(lang)
|
||||
|
||||
def entry_url(word, lang=DEFAULT_LANG):
|
||||
return base_url(lang) + "/wiki/" + word
|
||||
|
||||
def find_rhymes_url(entry_url, lang=DEFAULT_LANG):
|
||||
entry_html = requests.get(entry_url).text
|
||||
soup = BeautifulSoup(entry_html, "lxml")
|
||||
result_url = base_url(lang)
|
||||
if lang == "de":
|
||||
result_url += soup.find("a", href=re.compile(r"/wiki/Reim:Deutsch:.*"))["href"]
|
||||
elif lang == "en":
|
||||
result_url += soup.find("a", href=re.compile(r"/wiki/Rhymes:English/.*"))["href"]
|
||||
else:
|
||||
pass
|
||||
return result_url
|
||||
|
||||
def find_rhymes(rhymes_url):
|
||||
rhymes_html = requests.get(rhymes_url).text
|
||||
soup = BeautifulSoup(rhymes_html, "lxml")
|
||||
for li in soup.select("div#content ul > li > a"):
|
||||
try:
|
||||
yield li.text
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
if __name__ == "__main__":
|
||||
entry_url = entry_url("stabil", lang="de")
|
||||
rhymes_url = find_rhymes_url(entry_url, lang="de")
|
||||
for rhyme in find_rhymes(rhymes_url):
|
||||
print(rhyme)
|
||||
Reference in New Issue
Block a user