diff --git a/onomastics/geogen b/onomastics/geogen new file mode 100755 index 0000000..0a7ef63 --- /dev/null +++ b/onomastics/geogen @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""Output SVG maps of surnames. +Powered by Christoph Stöpel's Geogen API. + +Usage: + geogen.py relative ... [--color=] + geogen.py absolute ... [--color=] + geogen.py (-h | --help) + +Options: + -h --help Show this screen. + --color= Diagram accent colour. +""" +from docopt import docopt +import pandas as pd +import requests +import requests_cache + +from collections import Counter +from typing import List, TypedDict +import locale + +locale.setlocale(locale.LC_ALL, "de_DE") + +requests_cache.install_cache("stoepel_cache") + + +class DistrictOrState(TypedDict): + name: str + key: str + population: int + path: str + + +DISTRICTS_URL = "https://geogen.stoepel.net/content/de/districts.json" +STATES_URL = "https://geogen.stoepel.net/content/de/states.json" +DISTRICT_CLUSTER_URL = "https://geogen.stoepel.net/api/clusters/district" + +unlines = "\n".join + + +def get_districts() -> List[DistrictOrState]: + return requests.get(DISTRICTS_URL).json() + + +def get_states() -> List[DistrictOrState]: + return requests.get(STATES_URL).json() + + +def get_name_info(names: List[str]) -> Counter: + count: Counter = Counter() + for name in names: + response = requests.get(DISTRICT_CLUSTER_URL, params={"name": name}).json() + key_value_list = response["clusterers"]["DistrictClusterer"]["Data"] + count += Counter(dict(key_value_list)) + return count + + +def create_data_frame(names: List[str]) -> pd.DataFrame: + df = pd.DataFrame.from_dict(get_districts()) + df["absolute"] = df["key"].map(get_name_info(names)).fillna(0) + df["relative"] = 1_000_000 * (df["absolute"] / df["population"]) + return df + + +def generate_map(df, key, fill_color): + state_paths = [] + for state in get_states(): + state_paths.append( + f'' + ) + district_paths = [] + for _, district in df.iterrows(): + district_paths.append( + f'{district["name"]}: {locale.str(round(district[key], 2))}' + ) + return f""" + + + + + + + + + + {locale.str(round(df[key].max(),2))} + 0 + + {unlines(district_paths)} + {unlines(state_paths)} +""" + + +if __name__ == "__main__": + arguments = docopt(__doc__) + df = create_data_frame(arguments[""]) + color = arguments["--color"] or "navy" + if arguments["relative"]: + print(generate_map(df, "relative", fill_color=color)) + elif arguments["absolute"]: + print(generate_map(df, "absolute", fill_color=color)) diff --git a/onomastics/setup.py b/onomastics/setup.py new file mode 100755 index 0000000..05ac58b --- /dev/null +++ b/onomastics/setup.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +from setuptools import setup + +setup( + name="Geogen", + version="1.0", + description="Geogen (geogen.stoepel.net) API wrapper", + author="Kierán Meinhardt", + author_email="kmein@posteo.de", + packages=[], + scripts=["geogen"], + install_requires=[ + "requests >=2.24.0, <3.0.0", + "requests-cache >=0.5.2, <1.0.0", + "pandas >=1.1.1, <2.0.0", + "docopt >=0.6.2, <1.0.0", + ], +)