21 lines
732 B
Bash
21 lines
732 B
Bash
|
|
#!/bin/sh
|
||
|
|
root=https://www.orte-in-deutschland.de
|
||
|
|
|
||
|
|
extract_orte() {
|
||
|
|
htmlq 'a[href*="-gemeinde-"], a[href*="-ort-"]' --attribute href
|
||
|
|
}
|
||
|
|
|
||
|
|
curl -sSL "$root/alphabetisches-ortsverzeichnis.html" \
|
||
|
|
| htmlq '#suchindex li a[href^="orte-in"]' --attribute href \
|
||
|
|
| while read -r slug; do
|
||
|
|
curl -sSL "$root/$slug" \
|
||
|
|
| htmlq '#subsuchindex li a[href^="orte-in"]' --attribute href \
|
||
|
|
| while read -r slug; do
|
||
|
|
first_page="$(curl -sSL "$root/$slug")"
|
||
|
|
echo "$first_page" | extract_orte
|
||
|
|
echo "$first_page" | htmlq 'a[href*="?seite="]' --attribute href | sort -u | while read -r page; do
|
||
|
|
curl -sSL "$root/$slug$page" | extract_orte
|
||
|
|
done
|
||
|
|
done
|
||
|
|
done
|