From df015db743e1ae6ce40ac58a79767926feaa54bd Mon Sep 17 00:00:00 2001
From: kmein <kieran.meinhardt@gmail.com>
Date: Tue, 22 May 2018 10:57:44 +0200
Subject: [PATCH 1/6] Add program and runner script

---
 epub.css |  3 +++
 kevin.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 kevin.sh |  2 ++
 3 files changed, 81 insertions(+)
 create mode 100644 epub.css
 create mode 100755 kevin.py
 create mode 100755 kevin.sh
diff --git a/epub.css b/epub.css
new file mode 100644
index 0000000..cb7e08e
--- /dev/null
+++ b/epub.css
@@ -0,0 +1,3 @@
+body{margin:40px auto;max-width:650px;line-height:1.6;font-size:18px;color:#444;padding:0}
+a{color:inherit;text-decoration:none}
+a:hover{text-decoration:underline}
diff --git a/kevin.py b/kevin.py
new file mode 100755
index 0000000..f8d3836
--- /dev/null
+++ b/kevin.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+from argparse import ArgumentParser
+from bs4 import BeautifulSoup
+from datetime import datetime
+from typing import List
+import re
+import requests
+
+
+def soup_from(url):
+    return BeautifulSoup(requests.get(url).text, "lxml")
+
+
+class Author:
+    def __init__(self, author_id: int) -> None:
+        author_texts_url = "https://www.keinverlag.de/autorentexte.php?start=0&limit=1000000&sortby=tnr&autor={}".format(author_id)
+        soup = soup_from(author_texts_url)
+        self.texts = []  # type: List[Text]
+        for text in soup.select("ul.textliste > li > a[href$=\".text\"]"):
+            # strip off the last five characters (".text")
+            text_id = int(text["href"][:-5])
+            try:
+                self.texts.append(Text(text_id))
+            except ValueError:
+                continue
+
+    def markdown(self, *, with_type: bool = False) -> str:
+        name = self.texts[0].author
+
+        def __gen():
+            yield "% {}".format(name)
+            for text in self.texts:
+                yield "\n\n* * *\n\n"
+                yield text.markdown(with_author=False, with_type=with_type)
+
+        return "\n".join(__gen())
+
+
+class Text:
+    def __init__(self, text_id: int) -> None:
+        normalization = {132: "\"", 147: "\"", 0x96: "--", 0x91: "'", 0x92: "'", 0x97: "---"}
+        text_url = "https://www.keinverlag.de/{}.text".format(text_id)
+        soup = soup_from(text_url)
+        try:
+            self.title = soup.select("h1 > span")[0].text.translate(normalization)
+            content = str(soup.select(".fliesstext > span")[0])
+            content = re.sub(r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>', r"_\1_", content)
+            self.content = BeautifulSoup(content, "lxml").text.translate(normalization)
+            self.author = soup.select("h3 > a")[2].text
+            self.type = soup.select("h1 ~ h3")[0].text
+        except IndexError:
+            raise ValueError("Text {} not available.".format(text_id))
+
+    def markdown(self, *, with_author: bool = True, with_type: bool = False) -> str:
+        return "#### {maybe_author}{title}{maybe_type}\n\n{content}".format(
+            title=self.title,
+            maybe_author=self.author + ": " if with_author else "",
+            maybe_type=" ("+self.type+")" if with_type else "",
+            content="\n".join(line + "\\" for line in self.content.splitlines()))
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--type", help="Include text type", action="store_true")
+    subparsers = parser.add_subparsers()
+
+    handle_text = subparsers.add_parser("text", help="Handle one text")
+    handle_text.add_argument("tid", help="KeinVerlag text id", type=int)
+    handle_text.set_defaults(func=lambda a: print(Text(a.tid).markdown(with_type=a.type)))
+
+    handle_author = subparsers.add_parser("author", help="Handle all texts by an author")
+    handle_author.add_argument("aid", help="KeinVerlag author id", type=str)
+    handle_author.set_defaults(func=lambda a: print(Author(a.aid).markdown(with_type=a.type)))
+
+    args = parser.parse_args()
+    args.func(args)
diff --git a/kevin.sh b/kevin.sh
new file mode 100755
index 0000000..474f091
--- /dev/null
+++ b/kevin.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+python3 kevin.py author "$1" | pandoc -f markdown+smart --table-of-contents --toc-depth=6 --standalone --css=epub.css -o "$2"

From 2dcf6007075ed14f03c38c7ec6676616db781d90 Mon Sep 17 00:00:00 2001
From: kmein <kieran.meinhardt@gmail.com>
Date: Tue, 22 May 2018 12:07:45 +0200
Subject: [PATCH 2/6] Small fixes

- Remove unused import
~ Reformat text content normalization
+ Keep empty lines empty instead of replacing them with "\"
---
 kevin.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kevin.py b/kevin.py
index f8d3836..9fac409 100755
--- a/kevin.py
+++ b/kevin.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 from argparse import ArgumentParser
 from bs4 import BeautifulSoup
-from datetime import datetime
 from typing import List
 import re
 import requests
@@ -43,9 +42,11 @@ class Text:
         soup = soup_from(text_url)
         try:
             self.title = soup.select("h1 > span")[0].text.translate(normalization)
-            content = str(soup.select(".fliesstext > span")[0])
-            content = re.sub(r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>', r"_\1_", content)
-            self.content = BeautifulSoup(content, "lxml").text.translate(normalization)
+            self.content = BeautifulSoup(re.sub(
+                r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
+                r"_\1_",
+                str(soup.select(".fliesstext > span")[0])
+                ), "lxml").text.translate(normalization)
             self.author = soup.select("h3 > a")[2].text
             self.type = soup.select("h1 ~ h3")[0].text
         except IndexError:
@@ -56,7 +57,7 @@ class Text:
             title=self.title,
             maybe_author=self.author + ": " if with_author else "",
             maybe_type=" ("+self.type+")" if with_type else "",
-            content="\n".join(line + "\\" for line in self.content.splitlines()))
+            content="\n".join(line + "\\" if line else line for line in self.content.splitlines()))
 
 
 if __name__ == "__main__":

From a5426f8e51d8f06ef7b446a53f39343ab60e7145 Mon Sep 17 00:00:00 2001
From: kmein <kieran.meinhardt@gmail.com>
Date: Tue, 22 May 2018 12:10:30 +0200
Subject: [PATCH 3/6] "Explicit is better than implicit."

~ Markdown generation for empty lines
---
 kevin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kevin.py b/kevin.py
index 9fac409..8b1d9ad 100755
--- a/kevin.py
+++ b/kevin.py
@@ -57,7 +57,7 @@ class Text:
             title=self.title,
             maybe_author=self.author + ": " if with_author else "",
             maybe_type=" ("+self.type+")" if with_type else "",
-            content="\n".join(line + "\\" if line else line for line in self.content.splitlines()))
+            content="\n".join(line + "\\" if line else "" for line in self.content.splitlines()))
 
 
 if __name__ == "__main__":

From bed2a8a40d4377585a68bbcf5fc20c76e290149b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kier=C3=A1n=20Meinhardt?= <kieran.meinhardt@gmail.com>
Date: Tue, 1 Oct 2019 19:22:59 +0200
Subject: [PATCH 4/6] feat: add shell version

---
 .gitignore |  3 +++
 keinverlag | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 kevin.py   | 54 +++++++++++++++++++++++++++++++++++---------------
 shell.nix  | 10 ++++++++++
 4 files changed, 109 insertions(+), 16 deletions(-)
 create mode 100644 .gitignore
 create mode 100755 keinverlag
 create mode 100644 shell.nix

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..806fbfb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.direnv
+.envrc
+.history
diff --git a/keinverlag b/keinverlag
new file mode 100755
index 0000000..90b1972
--- /dev/null
+++ b/keinverlag
@@ -0,0 +1,58 @@
+#!/bin/sh
+
+kv_GET () {
+  route=$1
+  shift
+  curl -s "https://www.keinverlag.de$route" "$@" | iconv -f latin1 -t utf8
+}
+
+kv_author_id () {
+  if [ $# -ne 1 ]; then
+    echo Please call kv_author_id with an author name. >/dev/stderr
+    exit 1
+  fi
+
+  author_name=$1
+
+  kv_GET "/$author_name.kv" \
+    | sed -n 's/.*autor=\([0-9]\+\).*/\1/p' \
+    | head -1
+}
+
+kv_text () {
+  if [ $# -ne 1 ]; then
+    echo Please call kv_text with a text ID. >/dev/stderr
+    exit 1
+  fi
+
+  text_id=$1
+
+  kv_GET "/$text_id.text" \
+    | sed -n '/<h1>/,/<!-- Kommentarbox -->/p' \
+    | pandoc -f html -t plain
+}
+
+kv_author_texts () {
+  if [ $# -ne 1 ]; then
+    echo Please call kv_author_texts with an author ID. >/dev/stderr
+    exit 1
+  fi
+
+  author_id=$1
+
+  kv_GET '/autorentexte.php' -d sortby=datum -d start=0 -d limit=10000 -d autor="$author_id" \
+    | sed -n 's/.*<a href="\([0-9]\+\)\.text">.*/\1/p'
+}
+
+case $1 in
+  text)
+    shift
+    kv_text "$@";;
+  author)
+    shift
+    for text_id in $(kv_author_texts "$(kv_author_id "$@")"); do
+      kv_text "$text_id"
+    done ;;
+  *)
+    echo >/dev/stderr "Usage: $0 text|author ID"
+esac
diff --git a/kevin.py b/kevin.py
index 8b1d9ad..9b4cbe1 100755
--- a/kevin.py
+++ b/kevin.py
@@ -6,16 +6,19 @@ import re
 import requests
 
 
-def soup_from(url):
-    return BeautifulSoup(requests.get(url).text, "lxml")
+def soup_from(response):
+    return BeautifulSoup(response.text, "lxml")
 
 
 class Author:
     def __init__(self, author_id: int) -> None:
-        author_texts_url = "https://www.keinverlag.de/autorentexte.php?start=0&limit=1000000&sortby=tnr&autor={}".format(author_id)
-        soup = soup_from(author_texts_url)
+        response = requests.get(
+            "https://www.keinverlag.de/autorentexte.php",
+            params={"start": 0, "limit": 10000, "sortby": "tnr", "author": author_id},
+        )
+        soup = soup_from(response)
         self.texts = []  # type: List[Text]
-        for text in soup.select("ul.textliste > li > a[href$=\".text\"]"):
+        for text in soup.select('ul.textliste > li > a[href$=".text"]'):
             # strip off the last five characters (".text")
             text_id = int(text["href"][:-5])
             try:
@@ -37,16 +40,26 @@ class Author:
 
 class Text:
     def __init__(self, text_id: int) -> None:
-        normalization = {132: "\"", 147: "\"", 0x96: "--", 0x91: "'", 0x92: "'", 0x97: "---"}
+        normalization = {
+            132: '"',
+            147: '"',
+            0x96: "--",
+            0x91: "'",
+            0x92: "'",
+            0x97: "---",
+        }
         text_url = "https://www.keinverlag.de/{}.text".format(text_id)
         soup = soup_from(text_url)
         try:
             self.title = soup.select("h1 > span")[0].text.translate(normalization)
-            self.content = BeautifulSoup(re.sub(
-                r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
-                r"_\1_",
-                str(soup.select(".fliesstext > span")[0])
-                ), "lxml").text.translate(normalization)
+            self.content = BeautifulSoup(
+                re.sub(
+                    r'<span style="font-style: italic;">(([\n\r]|.)*?)</span>',
+                    r"_\1_",
+                    str(soup.select(".fliesstext > span")[0]),
+                ),
+                "lxml",
+            ).text.translate(normalization)
             self.author = soup.select("h3 > a")[2].text
             self.type = soup.select("h1 ~ h3")[0].text
         except IndexError:
@@ -56,8 +69,11 @@ class Text:
         return "#### {maybe_author}{title}{maybe_type}\n\n{content}".format(
             title=self.title,
             maybe_author=self.author + ": " if with_author else "",
-            maybe_type=" ("+self.type+")" if with_type else "",
-            content="\n".join(line + "\\" if line else "" for line in self.content.splitlines()))
+            maybe_type=" (" + self.type + ")" if with_type else "",
+            content="\n".join(
+                line + "\\" if line else "" for line in self.content.splitlines()
+            ),
+        )
 
 
 if __name__ == "__main__":
@@ -67,11 +83,17 @@ if __name__ == "__main__":
 
     handle_text = subparsers.add_parser("text", help="Handle one text")
     handle_text.add_argument("tid", help="KeinVerlag text id", type=int)
-    handle_text.set_defaults(func=lambda a: print(Text(a.tid).markdown(with_type=a.type)))
+    handle_text.set_defaults(
+        func=lambda a: print(Text(a.tid).markdown(with_type=a.type))
+    )
 
-    handle_author = subparsers.add_parser("author", help="Handle all texts by an author")
+    handle_author = subparsers.add_parser(
+        "author", help="Handle all texts by an author"
+    )
     handle_author.add_argument("aid", help="KeinVerlag author id", type=str)
-    handle_author.set_defaults(func=lambda a: print(Author(a.aid).markdown(with_type=a.type)))
+    handle_author.set_defaults(
+        func=lambda a: print(Author(a.aid).markdown(with_type=a.type))
+    )
 
     args = parser.parse_args()
     args.func(args)
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..59c35ed
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,10 @@
+{ pkgs ? import <nixpkgs> {} }:
+pkgs.mkShell {
+  buildInputs = with pkgs; [
+    pandoc
+    python3Packages.beautifulsoup4
+    python3Packages.requests
+    python3Packages.lxml
+  ];
+  shellHook = "export HISTFILE=${toString ./.history}";
+}

From 9d755bb4202c7032442c6bdc81483656b34af4c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kier=C3=A1n=20Meinhardt?= <kieran.meinhardt@gmail.com>
Date: Tue, 1 Oct 2019 21:02:16 +0200
Subject: [PATCH 5/6] feat: cut out h3s so only the text is left

---
 keinverlag | 1 +
 1 file changed, 1 insertion(+)

diff --git a/keinverlag b/keinverlag
index 90b1972..5efe721 100755
--- a/keinverlag
+++ b/keinverlag
@@ -29,6 +29,7 @@ kv_text () {
 
   kv_GET "/$text_id.text" \
     | sed -n '/<h1>/,/<!-- Kommentarbox -->/p' \
+    | sed 's/<h3>.\+<\/h3>//g' \
     | pandoc -f html -t plain
 }
 

From e8409354e4ba381744e2ef98c952402901f3fb92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kier=C3=A1n=20Meinhardt?= <kieran.meinhardt@gmail.com>
Date: Tue, 1 Oct 2019 21:34:50 +0200
Subject: [PATCH 6/6] fix: use aliases to correctly and uniquely get all texts

---
 keinverlag | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/keinverlag b/keinverlag
index 5efe721..cde6f39 100755
--- a/keinverlag
+++ b/keinverlag
@@ -1,10 +1,9 @@
 #!/bin/sh
 
-kv_GET () {
-  route=$1
-  shift
-  curl -s "https://www.keinverlag.de$route" "$@" | iconv -f latin1 -t utf8
-}
+alias to_utf8='iconv -f latin1 -t utf8'
+alias curl_GET='curl -X GET -s -G'
+
+BASE_URL=https://www.keinverlag.de
 
 kv_author_id () {
   if [ $# -ne 1 ]; then
@@ -14,7 +13,8 @@ kv_author_id () {
 
   author_name=$1
 
-  kv_GET "/$author_name.kv" \
+  curl_GET "$BASE_URL/$author_name.kv" \
+    | to_utf8 \
     | sed -n 's/.*autor=\([0-9]\+\).*/\1/p' \
     | head -1
 }
@@ -27,7 +27,8 @@ kv_text () {
 
   text_id=$1
 
-  kv_GET "/$text_id.text" \
+  curl_GET "$BASE_URL/$text_id.text" \
+    | to_utf8 \
     | sed -n '/<h1>/,/<!-- Kommentarbox -->/p' \
     | sed 's/<h3>.\+<\/h3>//g' \
     | pandoc -f html -t plain
@@ -41,8 +42,9 @@ kv_author_texts () {
 
   author_id=$1
 
-  kv_GET '/autorentexte.php' -d sortby=datum -d start=0 -d limit=10000 -d autor="$author_id" \
-    | sed -n 's/.*<a href="\([0-9]\+\)\.text">.*/\1/p'
+  curl_GET "$BASE_URL/autorentexte.php" -d sortby=tnr -d start=0 -d limit=10000 -d autor="$author_id" \
+    | to_utf8 \
+    | sed -n 's/.*<li><a href="\([0-9]\+\)\.text">.*/\1/p'
 }
 
 case $1 in
@@ -51,7 +53,7 @@ case $1 in
     kv_text "$@";;
   author)
     shift
-    for text_id in $(kv_author_texts "$(kv_author_id "$@")"); do
+    for text_id in $(kv_author_texts "$(kv_author_id "$@")" | uniq); do
       kv_text "$text_id"
     done ;;
   *)