Compare commits

...

27 Commits

Author SHA1 Message Date
9d7543ec8b streaming-link-bot: import 2026-03-14 07:30:46 +01:00
b193946a1a temperature-development: import 2026-03-14 07:29:36 +01:00
0d64c4dfb3 statistics.racket: import 2026-03-14 07:28:30 +01:00
ca469fff65 roff: import 2026-03-14 07:27:43 +01:00
7b44f211d3 scan2pdf: import 2026-03-14 07:26:44 +01:00
0e3291b086 pronouncer: import 2026-03-14 07:26:20 +01:00
61d6c9488b gitignore 2026-03-14 07:25:47 +01:00
72f4a6de80 print-remote: import 2026-03-14 07:25:41 +01:00
110166d988 obsidian-tasks: import 2026-03-14 07:24:15 +01:00
b9522bfc89 name-eugenics: import 2026-03-14 07:23:00 +01:00
ec9fadb5f9 marcov-chain-monte-carlo: import 2026-03-14 07:22:31 +01:00
30a6debb7b hijri: import 2026-03-14 07:21:42 +01:00
a182f694c0 rusty-jeep: debug → release 2026-03-14 07:19:49 +01:00
d369c86982 subsequence-repetition: document 2026-03-14 07:19:13 +01:00
63141e1f81 group-expense: import 2026-03-14 07:18:32 +01:00
832b13a694 quantitative-literaturwissenschaft: import 2026-03-14 07:18:15 +01:00
9c57ea69f2 ocaml 2026-03-14 07:05:56 +01:00
c8bb7585ee make fenix overridable 2025-12-29 17:49:00 +01:00
305ae463a5 use naersk 2025-12-29 17:14:25 +01:00
b8c7dd98be rust-overlay should follow nixpkgs 2025-12-29 16:37:35 +01:00
047016aff8 pun-sort: add frontend 2025-12-28 22:33:05 +01:00
367499d380 pun-sort: make port configurable 2025-12-28 22:15:24 +01:00
5f0b8d90a8 pun-sort: package api 2025-12-28 22:12:51 +01:00
950805bc9d pun-sort: add api 2025-12-28 21:58:59 +01:00
dfd03dd376 pun-sort: add more language examples 2025-12-28 21:42:55 +01:00
c50502225e pun-sort: make language-gnostic 2025-12-28 21:22:13 +01:00
612acab4fc pun-sort: init 2025-12-28 21:06:24 +01:00
44 changed files with 23405 additions and 85 deletions

8
.gitignore vendored
View File

@@ -10,3 +10,11 @@ input.txt
greek.csv
node_modules
target
_build/
dist/
dist-newstyle/
*.pdf
*.ps
*~
*.zip
.ipynb_checkpoints/

95
flake.lock generated
View File

@@ -1,27 +1,70 @@
{
"nodes": {
"flake-utils": {
"fenix": {
"inputs": {
"nixpkgs": "nixpkgs",
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1659877975,
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
"lastModified": 1766990808,
"narHash": "sha256-6RfhhB6fzxY0p6eE6UhBXoWSaEAcjCgDG9UaQ6ge1tQ=",
"owner": "nix-community",
"repo": "fenix",
"rev": "dfa623c0a6682e6bd4269cc6192f965beb69aa03",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"owner": "nix-community",
"repo": "fenix",
"type": "github"
}
},
"naersk": {
"inputs": {
"fenix": [
"fenix"
],
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1763384566,
"narHash": "sha256-r+wgI+WvNaSdxQmqaM58lVNvJYJ16zoq+tKN20cLst4=",
"owner": "nix-community",
"repo": "naersk",
"rev": "d4155d6ebb70fbe2314959842f744aa7cabbbf6a",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "naersk",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1665296151,
"narHash": "sha256-uOB0oxqxN9K7XGF1hcnY+PQnlQJ+3bP2vCn/+Ru/bbc=",
"lastModified": 1766902085,
"narHash": "sha256-coBu0ONtFzlwwVBzmjacUQwj3G+lybcZ1oeNSQkgC0M=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "c0b0e0fddf73fd517c3471e546c0df87a42d53f4",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1766870016,
"narHash": "sha256-fHmxAesa6XNqnIkcS6+nIHuEmgd/iZSP/VXxweiEuQw=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "14ccaaedd95a488dd7ae142757884d8e125b3363",
"rev": "5c2bc52fb9f8c264ed6c93bd20afa2ff5e763dce",
"type": "github"
},
"original": {
@@ -33,29 +76,25 @@
},
"root": {
"inputs": {
"nixpkgs": [
"rust-overlay",
"nixpkgs"
],
"rust-overlay": "rust-overlay"
"fenix": "fenix",
"naersk": "naersk",
"nixpkgs": "nixpkgs_2"
}
},
"rust-overlay": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
},
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1677119371,
"narHash": "sha256-L0Da4eKzDZrsy8ysOS1lhgDjAgEqGvYGf/lXaRd5/YQ=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "c67c79ea25664d66e74ae91a6fa0d6c65d12d3a7",
"lastModified": 1766928643,
"narHash": "sha256-gq96i+4i2QEK94stPLzMeDdpKPOTOvw4Zicy+qLe7p8=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "be6975f8f90d33a3b205265a0a858ee29fabae13",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"owner": "rust-lang",
"ref": "nightly",
"repo": "rust-analyzer",
"type": "github"
}
}

View File

@@ -2,24 +2,27 @@
description = "All kinds of stuff";
inputs = {
rust-overlay.url = "github:oxalica/rust-overlay";
nixpkgs.follows = "rust-overlay/nixpkgs";
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
naersk.url = "github:nix-community/naersk";
fenix.url = "github:nix-community/fenix";
naersk.inputs.nixpkgs.follows = "nixpkgs";
naersk.inputs.fenix.follows = "fenix";
};
outputs =
{
self,
fenix,
nixpkgs,
rust-overlay,
naersk,
}:
let
pkgsFor =
system:
import nixpkgs {
inherit system;
overlays = [
rust-overlay.overlays.default
];
overlays = [];
config = {};
};
lib = nixpkgs.lib;
eachSupportedSystem = lib.genAttrs lib.systems.flakeExposed;
@@ -29,6 +32,7 @@
system:
let
pkgs = pkgsFor system;
naersk' = pkgs.callPackage naersk {};
in
{
bvg =
@@ -52,6 +56,7 @@
};
onomap = pkgs.haskellPackages.callCabal2nix "onomap" ./onomastics-ng { };
hesychius = hesychius/hesychius.txt;
pun-sort-api = pkgs.callPackage ./pun-sort {};
alarm = pkgs.writers.writeDashBin "alarm" ''
set -efu
export PATH=${
@@ -74,7 +79,7 @@
echo $(echo "($(od -tu -An -N 2 /dev/urandom)%1000)+500"|bc) $(echo "($(od -tu -An -N 2 /dev/urandom)%500)+100"|bc)
done | rusty-jeep 1
'';
rusty-jeep = pkgs.rustPlatform.buildRustPackage rec {
rusty-jeep = naersk'.buildPackage {
name = "rusty-jeep";
version = "1.0.0";
src = ./rusty-jeep;

View File

@@ -5,4 +5,5 @@
(lisa 10.45 (laura)) ; honig + nüsse
(lisa 5.99 (lisa laura johannes kieran)) ; vegane würste + nüsse
(johannes 1.89 (lisa laura johannes kieran)) ; reeses
(laura 20 (lisa laura johannes kieran)) ; sprit
)

View File

@@ -5,6 +5,7 @@
(srfi srfi-69)
(ice-9 pretty-print))
(define (balance expenses)
(define (add-expense! totals expense)
(let* ((payer (car expense))
@@ -22,7 +23,7 @@
participants)
;; update payer
(hash-table-update! totals payer
(lambda (x) (+ x amount))
(lambda (x) (+ x amount (- share)))
(lambda () 0))))
(define totals (make-hash-table))
(for-each (lambda (expense)

15
hijri/hijri.go Normal file
View File

@@ -0,0 +1,15 @@
package main
import (
"fmt"
"time"
"github.com/kiaderouiche/go-hijricalendar"
)
func main() {
// Get a new instance of hijri.Time representing the current time
ht := hijri.Now(hijri.UmmAlQura())
// Get year, month, day
fmt.Println(ht.Date())
fmt.Println(ht.Year(), ht.Month(), ht.Day())
}

View File

@@ -0,0 +1,8 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
packages = [
(pkgs.ghc.withPackages (hs: [
hs.mcmc
]))
];
}

View File

@@ -0,0 +1,48 @@
(random-seed (time-nanosecond (current-time)))
(define (sample elements)
(let* ((len (length elements))
(random-index (random len)))
(list-ref elements random-index)))
(define (flatten lst)
(cond
((null? lst) '())
((list? (car lst)) (append (flatten (car lst)) (flatten (cdr lst))))
(else (cons (car lst) (flatten (cdr lst))))))
(define (symbols-to-string symbol-list)
(apply string-append (map symbol->string symbol-list)))
(define V '(a e i o u))
(define C '(p t k b d g f s h v z m n r l y w))
(define syllable-types
'((C V)
(V)
(C V C)
(V C)))
(define (random-syllable)
(let* ((syllable-type (sample syllable-types)))
(map
(lambda (cover-symbol)
(sample (case cover-symbol
((C) C)
((V) V)
(else (error #f "Invalid symbol")))))
syllable-type)))
(define (multiple n f)
(if (= n 0) '()
(cons (f)
(multiple (- n 1) f))))
(let*
((max-syllables 5)
(syllable-count (+ 1 (random max-syllables)))
(syllables (multiple syllable-count random-syllable)))
(begin
(display (symbols-to-string (flatten syllables)))
(newline)))

4
name-eugenics/shell.nix Normal file
View File

@@ -0,0 +1,4 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
packages = [ pkgs.chez ];
}

View File

@@ -0,0 +1,23 @@
#!/bin/sh
VAULT=~/cloud/syncthing/obsidian
rg --no-filename '^\s*- \[ ]' "$VAULT" \
| awk '
BEGIN {
# initialize "last-tag"
# initialize tags assoc array
}
{
# find out tag
# remove tag from task text
# add task text to assoc array
# save last-tag
}
/^\s+/ {
# save under last-tag in assoc array if no tag
}
END {
# group by tag
}
'

3
ocaml/README.md Normal file
View File

@@ -0,0 +1,3 @@
_Climbing on the camel's back._ 🐫
Experiments with OCaml, inspired by [Bobkonf](https://bobkonf.de) 2026.

5
ocaml/dune Normal file
View File

@@ -0,0 +1,5 @@
(executable
(name main)
(modes js)
(libraries js_of_ocaml)
(preprocess (pps js_of_ocaml-ppx)))

2
ocaml/dune-project Normal file
View File

@@ -0,0 +1,2 @@
(lang dune 3.10)
(name ocaml_js_demo)

27
ocaml/flake.lock generated Normal file
View File

@@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1773282481,
"narHash": "sha256-b/GV2ysM8mKHhinse2wz+uP37epUrSE+sAKXy/xvBY4=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "fe416aaedd397cacb33a610b33d60ff2b431b127",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

73
ocaml/flake.nix Normal file
View File

@@ -0,0 +1,73 @@
{
description = "OCaml -> JavaScript demo using js_of_ocaml";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
};
outputs =
{ self, nixpkgs }:
let
system = "x86_64-linux";
pkgs = import nixpkgs { inherit system; };
in
{
devShells.${system}.default = pkgs.mkShell {
packages = with pkgs; [
ocaml
dune_3
ocamlPackages.js_of_ocaml
ocamlPackages.js_of_ocaml-compiler
nodejs
tmux
ocamlPackages.ocaml-lsp
ocamlPackages.ocamlformat
(pkgs.writeShellApplication {
name = "dev";
runtimeInputs = [ dune nodejs tmux ];
text = ''
SESSION=ocaml-js
tmux new-session -d -s "$SESSION"
tmux new-window -t "$SESSION" -n build
tmux send-keys -t "$SESSION:0" "dune build main.bc.js --watch" C-m
tmux new-window -t "$SESSION" -n server
tmux send-keys -t "$SESSION:1" "npx live-server result" C-m
tmux attach -t "$SESSION"
'';
})
];
shellHook = ''
echo "Run 'dev' to start tmux dev environment."
'';
};
packages.${system}.default = pkgs.stdenv.mkDerivation {
pname = "ocaml-js-demo";
version = "0.1";
src = ./.;
nativeBuildInputs = with pkgs; [
ocaml
dune_3
ocamlPackages.js_of_ocaml
ocamlPackages.js_of_ocaml-compiler
ocamlPackages.js_of_ocaml-ppx
];
buildPhase = ''
dune build main.bc.js
'';
installPhase = ''
mkdir -p $out
cp _build/default/main.bc.js $out/
cp index.html $out/
'';
};
};
}

15
ocaml/index.html Normal file
View File

@@ -0,0 +1,15 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>OCaml DOM demo</title>
</head>
<body>
<h1 id="title">OCaml DOM Demo</h1>
<button id="btn">Click me</button>
<p id="output"></p>
<script src="main.bc.js"></script>
</body>
</html>

23
ocaml/main.ml Normal file
View File

@@ -0,0 +1,23 @@
open Js_of_ocaml
(* open Js_of_ocaml.Dom_html *)
let document = Dom_html.document
let by_id id =
Js.Opt.get
(document##getElementById (Js.string id))
(fun () -> failwith ("Missing element: " ^ id))
let () =
let button = by_id "btn" in
let output = by_id "output" in
let clicks = ref 0 in
button##.onclick :=
Dom_html.handler (fun _ ->
incr clicks;
output##.textContent :=
Js.some (Js.string ("Clicked " ^ string_of_int !clicks ^ " times"));
Js._false
)

18
print-remote/print.sh Normal file
View File

@@ -0,0 +1,18 @@
printer=OfficeJet
proxy=zaatar
remote_temporary_directory=$(ssh "$proxy" 'mktemp -d')
clean() {
ssh "$proxy" "rm -rf $remote_temporary_directory"
}
trap clean EXIT
for file in $@; do
if [ -f "$file" ]; then
rsync "$file" "$proxy:$remote_temporary_directory"
ssh "$proxy" lpr -P "$printer" "$remote_temporary_directory/$(basename "$file")"
else
echo Skipping non-file "$file" >&2
fi
done
ssh "$proxy" lpq -P "$printer"

11
pronouncer/bereshit Normal file
View File

@@ -0,0 +1,11 @@
בְּרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ׃ ב וְהָאָ֗רֶץ הָיְתָ֥ה תֹ֙הוּ֙ וָבֹ֔הוּ וְחֹ֖שֶׁךְ עַל־פְּנֵ֣י תְה֑וֹם וְר֣וּחַ אֱלֹהִ֔ים מְרַחֶ֖פֶת עַל־פְּנֵ֥י הַמָּֽיִם׃ ג וַיֹּ֥אמֶר אֱלֹהִ֖ים יְהִ֣י א֑וֹר וַֽיְהִי־אֽוֹר׃ ד וַיַּ֧רְא אֱלֹהִ֛ים אֶת־הָא֖וֹר כִּי־ט֑וֹב וַיַּבְדֵּ֣ל אֱלֹהִ֔ים בֵּ֥ין הָא֖וֹר וּבֵ֥ין הַחֹֽשֶׁךְ׃ ה וַיִּקְרָ֨א אֱלֹהִ֤ים ׀ לָאוֹר֙ י֔וֹם וְלַחֹ֖שֶׁךְ קָ֣רָא לָ֑יְלָה וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם אֶחָֽד׃
  
[לוי]ו וַיֹּ֣אמֶר אֱלֹהִ֔ים יְהִ֥י רָקִ֖יעַ בְּת֣וֹךְ הַמָּ֑יִם וִיהִ֣י מַבְדִּ֔יל בֵּ֥ין מַ֖יִם לָמָֽיִם׃ ז וַיַּ֣עַשׂ אֱלֹהִים֮ אֶת־הָרָקִ֒יעַ֒ וַיַּבְדֵּ֗ל בֵּ֤ין הַמַּ֙יִם֙ אֲשֶׁר֙ מִתַּ֣חַת לָרָקִ֔יעַ וּבֵ֣ין הַמַּ֔יִם אֲשֶׁ֖ר מֵעַ֣ל לָרָקִ֑יעַ וַֽיְהִי־כֵֽן׃ ח וַיִּקְרָ֧א אֱלֹהִ֛ים לָֽרָקִ֖יעַ שָׁמָ֑יִם וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם שֵׁנִֽי׃
  
[ישראל]ט וַיֹּ֣אמֶר אֱלֹהִ֗ים יִקָּו֨וּ הַמַּ֜יִם מִתַּ֤חַת הַשָּׁמַ֙יִם֙ אֶל־מָק֣וֹם אֶחָ֔ד וְתֵרָאֶ֖ה הַיַּבָּשָׁ֑ה וַֽיְהִי־כֵֽן׃ י וַיִּקְרָ֨א אֱלֹהִ֤ים ׀ לַיַּבָּשָׁה֙ אֶ֔רֶץ וּלְמִקְוֵ֥ה הַמַּ֖יִם קָרָ֣א יַמִּ֑ים וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֽוֹב׃ יא וַיֹּ֣אמֶר אֱלֹהִ֗ים תַּֽדְשֵׁ֤א הָאָ֙רֶץ֙ דֶּ֗שֶׁא עֵ֚שֶׂב מַזְרִ֣יעַ זֶ֔רַע עֵ֣ץ פְּרִ֞י עֹ֤שֶׂה פְּרִי֙ לְמִינ֔וֹ אֲשֶׁ֥ר זַרְעוֹ־ב֖וֹ עַל־הָאָ֑רֶץ וַֽיְהִי־כֵֽן׃ יב וַתּוֹצֵ֨א הָאָ֜רֶץ דֶּ֠שֶׁא עֵ֣שֶׂב מַזְרִ֤יעַ זֶ֙רַע֙ לְמִינֵ֔הוּ וְעֵ֧ץ עֹֽשֶׂה־פְּרִ֛י אֲשֶׁ֥ר זַרְעוֹ־ב֖וֹ לְמִינֵ֑הוּ וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֽוֹב׃ יג וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם שְׁלִישִֽׁי׃
  
[ע"כ ישראל]יד וַיֹּ֣אמֶר אֱלֹהִ֗ים יְהִ֤י מְאֹרֹת֙ בִּרְקִ֣יעַ הַשָּׁמַ֔יִם לְהַבְדִּ֕יל בֵּ֥ין הַיּ֖וֹם וּבֵ֣ין הַלָּ֑יְלָה וְהָי֤וּ לְאֹתֹת֙ וּלְמ֣וֹעֲדִ֔ים וּלְיָמִ֖ים וְשָׁנִֽים׃ טו וְהָי֤וּ לִמְאוֹרֹת֙ בִּרְקִ֣יעַ הַשָּׁמַ֔יִם לְהָאִ֖יר עַל־הָאָ֑רֶץ וַֽיְהִי־כֵֽן׃ טז וַיַּ֣עַשׂ אֱלֹהִ֔ים אֶת־שְׁנֵ֥י הַמְּאֹרֹ֖ת הַגְּדֹלִ֑ים אֶת־הַמָּא֤וֹר הַגָּדֹל֙ לְמֶמְשֶׁ֣לֶת הַיּ֔וֹם וְאֶת־הַמָּא֤וֹר הַקָּטֹן֙ לְמֶמְשֶׁ֣לֶת הַלַּ֔יְלָה וְאֵ֖ת הַכּוֹכָבִֽים׃ יז וַיִּתֵּ֥ן אֹתָ֛ם אֱלֹהִ֖ים בִּרְקִ֣יעַ הַשָּׁמָ֑יִם לְהָאִ֖יר עַל־הָאָֽרֶץ׃ יח וְלִמְשֹׁל֙ בַּיּ֣וֹם וּבַלַּ֔יְלָה וּֽלְﬞהַבְדִּ֔יל בֵּ֥ין הָא֖וֹר וּבֵ֣ין הַחֹ֑שֶׁךְ וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֽוֹב׃ יט וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם רְבִיעִֽי׃
  
כ וַיֹּ֣אמֶר אֱלֹהִ֔ים יִשְׁרְצ֣וּ הַמַּ֔יִם שֶׁ֖רֶץ נֶ֣פֶשׁ חַיָּ֑ה וְעוֹף֙ יְעוֹפֵ֣ף עַל־הָאָ֔רֶץ עַל־פְּנֵ֖י רְקִ֥יעַ הַשָּׁמָֽיִם׃ כא וַיִּבְרָ֣א אֱלֹהִ֔ים אֶת־הַתַּנִּינִ֖ם הַגְּדֹלִ֑ים וְאֵ֣ת כׇּל־נֶ֣פֶשׁ הַֽחַיָּ֣ה ׀ הָֽרֹמֶ֡שֶׂת אֲשֶׁר֩ שָׁרְצ֨וּ הַמַּ֜יִם לְמִֽינֵהֶ֗ם וְאֵ֨ת כׇּל־ע֤וֹף כָּנָף֙ לְמִינֵ֔הוּ וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֽוֹב׃ כב וַיְבָ֧רֶךְ אֹתָ֛ם אֱלֹהִ֖ים לֵאמֹ֑ר פְּר֣וּ וּרְב֗וּ וּמִלְא֤וּ אֶת־הַמַּ֙יִם֙ בַּיַּמִּ֔ים וְהָע֖וֹף יִ֥רֶב בָּאָֽרֶץ׃ כג וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם חֲמִישִֽׁי׃
  
כד וַיֹּ֣אמֶר אֱלֹהִ֗ים תּוֹצֵ֨א הָאָ֜רֶץ נֶ֤פֶשׁ חַיָּה֙ לְמִינָ֔הּ בְּהֵמָ֥ה וָרֶ֛מֶשׂ וְחַֽיְתוֹ־אֶ֖רֶץ לְמִינָ֑הּ וַֽיְהִי־כֵֽן׃ כה וַיַּ֣עַשׂ אֱלֹהִים֩ אֶת־חַיַּ֨ת הָאָ֜רֶץ לְמִינָ֗הּ וְאֶת־הַבְּהֵמָה֙ לְמִינָ֔הּ וְאֵ֛ת כׇּל־רֶ֥מֶשׂ הָֽאֲדָמָ֖ה לְמִינֵ֑הוּ וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֽוֹב׃ כו וַיֹּ֣אמֶר אֱלֹהִ֔ים נַֽעֲשֶׂ֥ה אָדָ֛ם בְּצַלְמֵ֖נוּ כִּדְמוּתֵ֑נוּ וְיִרְדּוּ֩ בִדְגַ֨ת הַיָּ֜ם וּבְע֣וֹף הַשָּׁמַ֗יִם וּבַבְּהֵמָה֙ וּבְכׇל־הָאָ֔רֶץ וּבְכׇל־הָרֶ֖מֶשׂ הָֽרֹמֵ֥שׂ עַל־הָאָֽרֶץ׃ כז וַיִּבְרָ֨א אֱלֹהִ֤ים ׀ אֶת־הָֽאָדָם֙ בְּצַלְמ֔וֹ בְּצֶ֥לֶם אֱלֹהִ֖ים בָּרָ֣א אֹת֑וֹ זָכָ֥ר וּנְקֵבָ֖ה בָּרָ֥א אֹתָֽם׃ כח וַיְבָ֣רֶךְ אֹתָם֮ אֱלֹהִים֒ וַיֹּ֨אמֶר לָהֶ֜ם אֱלֹהִ֗ים פְּר֥וּ וּרְב֛וּ וּמִלְא֥וּ אֶת־הָאָ֖רֶץ וְכִבְשֻׁ֑הָ וּרְד֞וּ בִּדְגַ֤ת הַיָּם֙ וּבְע֣וֹף הַשָּׁמַ֔יִם וּבְכׇל־חַיָּ֖ה הָֽרֹמֶ֥שֶׂת עַל־הָאָֽרֶץ׃ כט וַיֹּ֣אמֶר אֱלֹהִ֗ים הִנֵּה֩ נָתַ֨תִּי לָכֶ֜ם אֶת־כׇּל־עֵ֣שֶׂב ׀ זֹרֵ֣עַ זֶ֗רַע אֲשֶׁר֙ עַל־פְּנֵ֣י כׇל־הָאָ֔רֶץ וְאֶת־כׇּל־הָעֵ֛ץ אֲשֶׁר־בּ֥וֹ פְרִי־עֵ֖ץ זֹרֵ֣עַ זָ֑רַע לָכֶ֥ם יִֽהְיֶ֖ה לְאׇכְלָֽה׃ ל וּֽלְכׇל־חַיַּ֣ת הָ֠אָ֠רֶץ וּלְכׇל־ע֨וֹף הַשָּׁמַ֜יִם וּלְכֹ֣ל ׀ רוֹמֵ֣שׂ עַל־הָאָ֗רֶץ אֲשֶׁר־בּוֹ֙ נֶ֣פֶשׁ חַיָּ֔ה אֶת־כׇּל־יֶ֥רֶק עֵ֖שֶׂב לְאׇכְלָ֑ה וַֽיְהִי־כֵֽן׃ לא וַיַּ֤רְא אֱלֹהִים֙ אֶת־כׇּל־אֲשֶׁ֣ר עָשָׂ֔ה וְהִנֵּה־ט֖וֹב מְאֹ֑ד וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם הַשִּׁשִּֽׁי׃

8
pronouncer/input Normal file
View File

@@ -0,0 +1,8 @@
בְּרֵאשִׁית בָּרָא אֱלֹהִים אֵת הַשָּׁמַיִם וְאֵת הָאָֽרֶץ׃
וְהָאָרֶץ הָיְתָה תֹהוּ וָבֹהוּ וְחֹשֶׁךְ עַל־פְּנֵי תְהוֹם וְרוּחַ אֱלֹהִים מְרַחֶפֶת עַל־פְּנֵי הַמָּֽיִם׃
וַיֹּאמֶר אֱלֹהִים יְהִי אוֹר וַֽיְהִי־אֽוֹר׃
וַיַּרְא אֱלֹהִים אֶת־הָאוֹר כִּי־טוֹב וַיַּבְדֵּל אֱלֹהִים בֵּין הָאוֹר וּבֵין הַחֹֽשֶׁךְ׃
וַיִּקְרָא אֱלֹהִים ׀ לָאוֹר יוֹם וְלַחֹשֶׁךְ קָרָא לָיְלָה וַֽיְהִי־עֶרֶב וַֽיְהִי־בֹקֶר יוֹם אֶחָֽד׃ פ
וַיֹּאמֶר אֱלֹהִים יְהִי רָקִיעַ בְּתוֹךְ הַמָּיִם וִיהִי מַבְדִּיל בֵּין מַיִם לָמָֽיִם׃
וַיַּעַשׂ אֱלֹהִים אֶת־הָרָקִיעַ וַיַּבְדֵּל בֵּין הַמַּיִם אֲשֶׁר מִתַּחַת לָרָקִיעַ וּבֵין הַמַּיִם אֲשֶׁר מֵעַל לָרָקִיעַ וַֽיְהִי־כֵֽן׃
וַיִּקְרָא אֱלֹהִים לָֽרָקִיעַ שָׁמָיִם וַֽיְהִי־עֶרֶב וַֽיְהִי־בֹקֶר יוֹם שֵׁנִֽי׃ פ

90
pronouncer/process.sed Executable file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env -S sed -f
s/ֽ//g # meteg
s/\([ְֱִֵֶַָֹֹ]\?\)\(ּ\?\)\([ׁׂ]\?\)/\3\2\1/g
s/[֛֖֥֑֣֤֧֮֒֗֙֨֠֔֩֡]/'/g
s/ךְ/ך/g
s/שׁ/š/g
s/שׂ/ś/g
s/בּ/b/g
s/דּ/d/g
s/גּ/g/g
s/[פף]ּ/p/g
s/תּ/t/g
s/[כך]ּ/k/g
s/ב/β/g
s/ד/δ/g
s/ג/γ/g
s/[פף]/φ/g
s/ת/θ/g
s/[כך]/χ/g
s/ח//g
s/[ןנ]/n/g
s/ר/r/g
s/א/ʾ/g
s/[מם]/m/g
s/[צץ]//g
s/ל/l/g
s/ז/z/g
s/ק//g
s/ט//g
s/ה/h/g
s/ע/ʿ/g
s/וּ/û/g
s/ֵי/ê/g
s/ִי/î/g
s/ְ/ə/g
s/ֱ/ĕ/g
s/ֶ/e/g
s/ֹ/ō/g
s/ַ/a/g
s/ֲ/ă/g
s/ֵ/ē/g
s/ִ/i/g
s/ֻ/u/g
s/ָ/ā/g
s/ׇ/o/g
s/ו/v/g
s/י/y/g
s//ô/g
s/\(.\)ּ/\1\1/g
s/\([ḥʿ]\)a\b/a\1/g # patah furtivum
s/\([ai]\)\([rḇḳy]\)ə/\1\2/g
s/־/-/g
s/׃/./g
s/\([aeiuāēōêîû]\)\('\?\)\([ptk]\)\('\?\)\([aeiuāēōêîû]\)/\1\2\3\3\4\5/g # dagesch forte
s/\([aeiuoá]\)\(.\)ə/\1\2/g # schwa quiescens
s/\bφ\b//g
s/a'/á/g
s/ā\(.\?\)'/ā́\1/g
s/ē\(.\?\)'/ḗ\1/g
s/e\(.\?\)'/é\1/g
s/ō\(.\?\)'/ṓ\1/g
s/ô\(.\?\)'/ố\1/g
s/i\(.\?\)'/i\1/g
s/î\(.\?\)'/î́\1/g
s/i'y\|î'/î́/g
s/iy/î/g
s/ḗy/ế/g
s//ố/g
s/ō'/ṓ/g
s/û'\|'û/û́/g
s/i'/í/g
s/φ//g
s/θ//g
s/χ//g
s/β//g
s/δ//g
s/γ//g

21
pun-sort/default.nix Normal file
View File

@@ -0,0 +1,21 @@
{ writers, python3Packages }:
writers.writePython3Bin "pun_sort_api.py" {
flakeIgnore = [
"E203"
"E203"
"E226"
"E265"
"E302"
"E305"
"E501"
"F401"
"F841"
"W503"
];
libraries = [
python3Packages.fastapi
python3Packages.uvicorn
python3Packages.pydantic
python3Packages.pydantic-core
];
} ./sort_api.py

25
pun-sort/example.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/sh
set -efu
text_en="Once upon a time, in a quiet village nestled between rolling hills and sparkling rivers, there lived a clever fox named Felix. Felix was known throughout the village for his cunning tricks and playful antics. Every morning, he would sneak through the meadows, darting past rabbits and chickens, always careful to avoid the farmer's watchful eyes. Despite his mischievous ways, Felix had a kind heart and often shared his clever solutions with friends in need. One day, a heavy storm swept through the valley, leaving many paths muddy and rivers swollen. Felix saw his chance to help: he guided lost ducklings back to their pond, and showed the frightened kittens how to find shelter under the sturdy oak trees. The villagers watched in amazement as the fox moved gracefully through the rain-soaked fields, his orange fur glistening and his sharp eyes alert. By the time the storm passed, the village had gained a newfound respect for Felix. Tales of his bravery spread far and wide, carried by wandering merchants and whispered by children as they played near the cobblestone streets. Nights in the village were quiet once more, but the story of Felix, the fox who danced through storm and shadow, continued to inspire laughter, cleverness, and courage in the hearts of all who heard it."
echo "$text_en" | python3 sort.py
text_de="In einem kleinen Dorf, versteckt zwischen sanften Hügeln und klaren Bächen, lebte ein listiger Fuchs namens Fritz. Fritz war bekannt für seine cleveren Streiche und seine verspielte Natur. Jeden Morgen schlich er durch die Wiesen, huschte an Hasen und Hühnern vorbei und achtete dabei genau auf die wachsamen Augen des Bauern. Trotz seiner schelmischen Art hatte Fritz ein gutes Herz und half oft Freunden in Not. Eines Tages fegte ein heftiger Sturm durch das Tal, die Wege wurden schlammig und die Flüsse traten über die Ufer. Fritz sah seine Chance, zu helfen: Er führte verlorene Entenküken zurück zu ihrem Teich und zeigte den ängstlichen Kätzchen, wie sie Schutz unter den starken Eichen finden konnten. Die Dorfbewohner beobachteten erstaunt, wie der Fuchs anmutig durch die regengetränkten Felder sprang, sein orangefarbenes Fell glänzte und seine scharfen Augen waren stets wachsam. Nachdem der Sturm vorübergezogen war, gewann das Dorf großen Respekt für Fritz. Geschichten über seine Tapferkeit verbreiteten sich weit und breit, getragen von reisenden Händlern und von Kindern, die beim Spielen auf den Kopfsteinpflasterstraßen flüsterten. Die Nächte im Dorf waren wieder ruhig, aber die Erzählung von Fritz, dem Fuchs, der durch Sturm und Schatten tanzte, inspirierte weiterhin Lachen, Cleverness und Mut in den Herzen aller, die davon hörten."
echo "$text_de" | python3 sort.py -l de
text_hi="एक समय की बात है, एक शांत गाँव में, जो हरे-भरे पहाड़ों और बहती नदियों के बीच बसा था, एक चालाक लोमड़ी रहती थी जिसका नाम फेलेक्स था। फेलेक्स अपनी चतुराई और खेल-कूद के लिए पूरे गाँव में प्रसिद्ध थी। हर सुबह वह घास के मैदानों में दौड़ती, खरगोशों और मुर्गियों के पास से निकलती और हमेशा किसान की सतर्क निगाहों से बचती। अपनी शरारतों के बावजूद, फेलेक्स का दिल बहुत अच्छा था और वह अक्सर अपने दोस्तों की मदद करती थी। एक दिन, एक तेज तूफ़ान ने घाटी में कहर ढाया, रास्ते कीचड़ से भर गए और नदियाँ उफन गईं। फेलेक्स ने मदद करने का अवसर देखा: उसने खोए हुए बतख के बच्चों को उनके तालाब तक पहुँचाया और डरी हुई बिल्लियों को मजबूत ओक के पेड़ों के नीचे आश्रय दिखाया। गाँव वाले आश्चर्यचकित होकर देखते रहे कि कैसे लोमड़ी बारिश भरे खेतों में चपलता से घूम रही थी, उसकी नारंगी फर चमक रही थी और उसकी तेज आँखें सतर्क थीं। जब तूफ़ान खत्म हुआ, गाँव वालों ने फेलेक्स के लिए सम्मान बढ़ाया। उसकी बहादुरी की कहानियाँ दूर-दूर तक फैल गईं, घूमते व्यापारियों द्वारा सुनाई गई और बच्चों द्वारा जो पत्थर की सड़कों पर खेलते समय फुसफुसाते। गाँव में रातें फिर से शांत हो गईं, लेकिन फेलेक्स की कहानी, वह लोमड़ी जो तूफ़ान और छाया में नाचे, सभी के दिलों में हँसी, बुद्धिमानी और साहस भरती रही।
"
echo "$text_hi" | python3 sort.py -l hi
text_dk="Engang for længe siden, i en lille landsby mellem bløde bakker og glitrende floder, boede en klog ræv ved navn Felix. Felix var kendt i hele landsbyen for sine snedige påfund og legende natur. Hver morgen sneg han sig gennem markerne, løb forbi kaniner og høns, altid opmærksom på bondens vågne øjne. På trods af sine drilske vaner havde Felix et godt hjerte og hjalp ofte venner i nød. En dag fejede en kraftig storm gennem dalen, og stierne blev mudrede, mens floderne steg over bredden. Felix så sin chance for at hjælpe: han guidede fortabte ællinger tilbage til deres dam og viste de skræmte kattekillinger, hvordan de kunne finde ly under de stærke egetræer. Landsbyboerne så forbløffet på, mens ræven bevægede sig yndefuldt gennem de regnvåde marker, dens orange pels glimtede, og dens skarpe øjne var altid årvågne. Da stormen havde lagt sig, havde landsbyen fået en ny respekt for Felix. Historier om hans mod spredte sig vidt og bredt, fortalt af rejsende handlende og hvisket af børn, der legede på de brostensbelagte gader. Nætterne i landsbyen blev stille igen, men fortællingen om Felix, ræven der dansede gennem storm og skygge, fortsatte med at inspirere latter, snilde og mod i hjertet af alle, der hørte den."
echo "$text_dk" | python3 sort.py -l da
text_nl="Er was eens, in een klein dorpje tussen glooiende heuvels en kabbelende rivieren, een slimme vos genaamd Felix. Felix stond bekend in het hele dorp om zijn sluwe streken en speelse karakter. Elke ochtend sloop hij door de velden, snelde langs konijnen en kippen, altijd oplettend voor de waakzame ogen van de boer. Ondanks zijn ondeugende aard had Felix een goed hart en hielp vaak vrienden in nood. Op een dag raasde een hevige storm door de vallei, de paden werden modderig en de rivieren traden buiten hun oevers. Felix zag zijn kans om te helpen: hij leidde verdwaalde eendjes terug naar hun vijver en liet de angstige kittens zien waar ze beschutting konden vinden onder de stevige eikenbomen. De dorpelingen keken vol verbazing toe terwijl de vos sierlijk door de regenachtige velden bewoog, zijn oranje vacht glanzend en zijn scherpe ogen altijd alert. Toen de storm voorbij was, had het dorp nieuw respect voor Felix gekregen. Verhalen over zijn moed verspreidden zich wijd en zijd, verteld door rondtrekkende handelaren en gefluisterd door kinderen die speelden op de kinderkopjesstraatjes. De nachten in het dorp werden weer rustig, maar het verhaal van Felix, de vos die danste door storm en schaduw, bleef lachen, slimheid en moed inspireren in de harten van iedereen die het hoorde."
echo "$text_nl" | python3 sort.py -l nl
text_fr="Il était une fois, dans un village paisible niché entre des collines ondulantes et des rivières scintillantes, un renard rusé nommé Félix. Félix était connu dans tout le village pour ses astuces ingénieuses et son esprit joueur. Chaque matin, il se faufilait à travers les prairies, filant devant les lapins et les poules, toujours vigilant aux yeux attentifs du fermier. Malgré ses manières espiègles, Félix avait un bon cœur et aidait souvent ses amis dans le besoin. Un jour, une violente tempête balaya la vallée, rendant les chemins boueux et les rivières gonflées. Félix vit là une occasion d'aider : il guida des canetons perdus jusqu'à leur étang et montra aux chatons effrayés comment trouver refuge sous les robustes chênes. Les villageois regardaient avec étonnement le renard se déplacer gracieusement à travers les champs trempés de pluie, son pelage orange brillant et ses yeux perçants toujours alertes. Une fois la tempête passée, le village avait acquis un nouveau respect pour Félix. Des récits de son courage se répandirent loin et large, portés par des marchands itinérants et chuchotés par les enfants jouant près des rues pavées. Les nuits dans le village redevinrent calmes, mais l'histoire de Félix, le renard qui dansait à travers la tempête et l'ombre, continua d'inspirer rire, ingéniosité et courage dans le cœur de tous ceux qui l'entendaient."
echo "$text_fr" | python3 sort.py -l fr

137
pun-sort/sort.py Executable file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
import sys
import string
import subprocess
from functools import lru_cache
import argparse
# -------------------------
# IPA helpers
# -------------------------
def get_ipa(word, lang="en"):
try:
out = subprocess.check_output(
["espeak-ng", "-v", lang, "-q", "--ipa=3", word],
stderr=subprocess.DEVNULL,
text=True
)
return out.strip().strip("/")
except Exception:
return ""
def ipa_tokenize(ipa):
tokens = []
i = 0
while i < len(ipa):
ch = ipa[i]
if ch in "ˈˌ":
i += 1
continue
if i + 1 < len(ipa) and ipa[i:i+2] in {"aɪ", "", "eɪ", "", "ɔɪ"}:
tokens.append(ipa[i:i+2])
i += 2
else:
tokens.append(ch)
i += 1
return tokens
# -------------------------
# Distance
# -------------------------
VOWELS = set("aeiouəɪʊɔɛɜɑæ")
def sub_cost(a, b):
if a == b:
return 0.0
if a in VOWELS and b in VOWELS:
return 0.6
if a in VOWELS or b in VOWELS:
return 2.0
return 1.0
@lru_cache(maxsize=None)
def phonetic_distance(a, b):
a = tuple(a)
b = tuple(b)
n, m = len(a), len(b)
dp = [[0] * (m + 1) for _ in range(n + 1)]
for i in range(n + 1):
dp[i][0] = i
for j in range(m + 1):
dp[0][j] = j
for i in range(1, n + 1):
for j in range(1, m + 1):
dp[i][j] = min(
dp[i - 1][j] + 1,
dp[i][j - 1] + 1,
dp[i - 1][j - 1] + sub_cost(a[i - 1], b[j - 1])
)
return dp[n][m]
# -------------------------
# Seriation
# -------------------------
def seriate(words, ipas):
unused = set(words)
path = [words[0]]
unused.remove(words[0])
while unused:
cur = path[-1]
nxt = min(
unused,
key=lambda w: phonetic_distance(ipas[cur], ipas[w]) / max(len(ipas[cur]), len(ipas[w]), 1)
)
path.append(nxt)
unused.remove(nxt)
return path
# -------------------------
# Main
# -------------------------
def tokenize_stdin():
"""
Reads stdin and returns a list of lowercase words.
Handles:
- Unicode letters (ä, ö, ü, ß, é, ñ, etc.)
- Ignores punctuation
"""
text = sys.stdin.read()
tokens = text.translate(str.maketrans('', '', string.punctuation)).split()
return tokens
def main():
parser = argparse.ArgumentParser(description="Pun-sort words by phonetic similarity")
parser.add_argument(
"--lang", "-l",
type=str,
default="en",
help="Language code for espeak-ng (default: en)"
)
args = parser.parse_args()
LANG = args.lang
words = tokenize_stdin()
words = list(dict.fromkeys(words))
ipas = {w: tuple(ipa_tokenize(get_ipa(w, lang=LANG))) for w in words}
ordered = seriate(words, ipas)
for w in ordered:
print(f"{w}\t/{''.join(ipas[w])}/")
if __name__ == "__main__":
main()

454
pun-sort/sort_api.py Executable file
View File

@@ -0,0 +1,454 @@
#!/usr/bin/env python3
"""
FastAPI backend for phonetic word sorting
Sorts words by their phonetic similarity using espeak-ng IPA transcription
"""
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from typing import List, Dict, Any
import string
import subprocess
from functools import lru_cache
app = FastAPI(
title="Phonetic Word Sorter API",
description="Sort words by phonetic similarity using IPA transcription",
version="1.0.0"
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# -------------------------
# IPA helpers
# -------------------------
def get_ipa(word: str, lang: str = "en") -> str:
"""Get IPA transcription using espeak-ng"""
try:
out = subprocess.check_output(
["espeak-ng", "-v", lang, "-q", "--ipa=3", word],
stderr=subprocess.DEVNULL,
text=True,
timeout=5
)
return out.strip().strip("/")
except subprocess.TimeoutExpired:
raise HTTPException(status_code=504, detail="espeak-ng timeout")
except FileNotFoundError:
raise HTTPException(
status_code=500,
detail="espeak-ng not found. Please install it: apt-get install espeak-ng"
)
except Exception:
return ""
def ipa_tokenize(ipa: str) -> List[str]:
"""Tokenize IPA string into phonemes"""
tokens = []
i = 0
while i < len(ipa):
ch = ipa[i]
if ch in "ˈˌ":
i += 1
continue
if i + 1 < len(ipa) and ipa[i:i+2] in {"aɪ", "", "eɪ", "", "ɔɪ"}:
tokens.append(ipa[i:i+2])
i += 2
else:
tokens.append(ch)
i += 1
return tokens
# -------------------------
# Distance calculation
# -------------------------
VOWELS = set("aeiouəɪʊɔɛɜɑæ")
def sub_cost(a: str, b: str) -> float:
"""Calculate substitution cost between two phonemes"""
if a == b:
return 0.0
if a in VOWELS and b in VOWELS:
return 0.6
if a in VOWELS or b in VOWELS:
return 2.0
return 1.0
@lru_cache(maxsize=None)
def phonetic_distance(a: tuple, b: tuple) -> float:
"""Calculate phonetic edit distance between two IPA token sequences"""
n, m = len(a), len(b)
dp = [[0] * (m + 1) for _ in range(n + 1)]
for i in range(n + 1):
dp[i][0] = i
for j in range(m + 1):
dp[0][j] = j
for i in range(1, n + 1):
for j in range(1, m + 1):
dp[i][j] = min(
dp[i - 1][j] + 1,
dp[i][j - 1] + 1,
dp[i - 1][j - 1] + sub_cost(a[i - 1], b[j - 1])
)
return dp[n][m]
def tokenize_text(text: str) -> List[str]:
"""
Tokenize text into words, removing punctuation.
Handles Unicode letters (ä, ö, ü, ß, é, ñ, etc.)
"""
cleaned = text.translate(str.maketrans('', '', string.punctuation))
tokens = cleaned.split()
return tokens
# -------------------------
# Seriation algorithm
# -------------------------
def seriate(words: List[str], ipas: dict) -> List[str]:
"""
Sort words by phonetic similarity using nearest-neighbor seriation
"""
if len(words) <= 1:
return words
unused = set(words)
path = [words[0]]
unused.remove(words[0])
while unused:
cur = path[-1]
nxt = min(
unused,
key=lambda w: phonetic_distance(ipas[cur], ipas[w]) / max(len(ipas[cur]), len(ipas[w]), 1)
)
path.append(nxt)
unused.remove(nxt)
return path
# -------------------------
# API Endpoints
# -------------------------
@app.get("/", response_class=HTMLResponse)
async def root():
"""Serve HTML interface"""
return """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Phonetic Word Sorter</title>
<style>
body {
font-family: Georgia, serif;
max-width: 650px;
margin: 40px auto;
padding: 0 20px;
line-height: 1.6;
color: #222;
}
h1 {
font-size: 1.8em;
margin-bottom: 0.3em;
font-weight: normal;
}
.subtitle {
color: #666;
margin-bottom: 2em;
font-style: italic;
}
label {
display: block;
margin-top: 1.5em;
margin-bottom: 0.3em;
}
textarea {
width: 100%;
padding: 8px;
border: 1px solid #ccc;
font-family: inherit;
font-size: 1em;
resize: vertical;
min-height: 100px;
}
select {
padding: 6px;
border: 1px solid #ccc;
font-family: inherit;
font-size: 1em;
}
button {
margin-top: 1em;
padding: 8px 16px;
border: 1px solid #333;
background: white;
cursor: pointer;
font-family: inherit;
font-size: 1em;
}
button:hover {
background: #f5f5f5;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
#results {
margin-top: 2em;
padding-top: 2em;
border-top: 1px solid #ddd;
}
.result-header {
margin-bottom: 1em;
font-weight: normal;
}
.stats {
color: #666;
font-size: 0.9em;
margin-bottom: 1.5em;
}
.word-item {
padding: 0.5em 0;
border-bottom: 1px dotted #ddd;
}
.word {
font-weight: bold;
}
.ipa {
color: #666;
font-family: monospace;
margin-left: 1em;
}
.error {
color: #c00;
margin-top: 1em;
padding: 1em;
border-left: 3px solid #c00;
background: #fff5f5;
}
</style>
</head>
<body>
<h1>Phonetic Word Sorter</h1>
<p class="subtitle">Sort words by their phonetic similarity using IPA transcription</p>
<label for="text">Enter your text:</label>
<textarea id="text" placeholder="night knight kite kit bit bite byte">night knight kite kit bit bite byte</textarea>
<label for="lang">Language:</label>
<select id="lang">
<option value="en">English</option>
<option value="de">German</option>
<option value="es">Spanish</option>
<option value="fr">French</option>
<option value="it">Italian</option>
<option value="pt">Portuguese</option>
<option value="nl">Dutch</option>
<option value="sv">Swedish</option>
<option value="no">Norwegian</option>
<option value="da">Danish</option>
</select>
<button id="sortBtn" onclick="sortWords()">Sort Words</button>
<div id="results"></div>
<script>
async function sortWords() {
const text = document.getElementById('text').value;
const lang = document.getElementById('lang').value;
const resultsDiv = document.getElementById('results');
const sortBtn = document.getElementById('sortBtn');
if (!text.trim()) {
resultsDiv.innerHTML = '<div class="error">Please enter some text</div>';
return;
}
sortBtn.disabled = true;
sortBtn.textContent = 'Sorting...';
resultsDiv.innerHTML = '<p>Processing...</p>';
try {
const response = await fetch('/sort', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ text, lang })
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || 'Request failed');
}
const data = await response.json();
let html = '<h2 class="result-header">Sorted Results</h2>';
html += `<div class="stats">${data.original_count} words (${data.unique_count} unique)</div>`;
data.sorted_words.forEach(item => {
html += `<div class="word-item"><span class="word">${item.word}</span><span class="ipa">/${item.ipa}/</span></div>`;
});
resultsDiv.innerHTML = html;
} catch (error) {
resultsDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
} finally {
sortBtn.disabled = false;
sortBtn.textContent = 'Sort Words';
}
}
// Allow Enter key in textarea
document.getElementById('text').addEventListener('keydown', function(e) {
if (e.key === 'Enter' && e.ctrlKey) {
sortWords();
}
});
</script>
</body>
</html>
"""
@app.get("/api", response_class=HTMLResponse)
async def api_info():
"""API information endpoint"""
return {
"name": "Phonetic Word Sorter API",
"version": "1.0.0",
"endpoints": {
"GET /": "Web interface",
"POST /sort": "Sort words by phonetic similarity",
"POST /ipa": "Get IPA transcription for a single word",
"GET /health": "Health check"
}
}
@app.get("/health")
async def health_check():
"""Health check endpoint"""
try:
subprocess.run(
["espeak-ng", "--version"],
capture_output=True,
timeout=2
)
return {"status": "healthy", "espeak_ng": "available"}
except Exception as e:
return {"status": "unhealthy", "error": str(e)}
@app.post("/ipa")
async def get_word_ipa(request: Request):
"""
Get IPA transcription and tokens for a single word
Request body:
{
"word": "hello",
"lang": "en"
}
"""
data = await request.json()
word = data.get("word")
if not word:
raise HTTPException(status_code=400, detail="'word' field is required")
lang = data.get("lang", "en")
ipa = get_ipa(word, lang)
if not ipa:
raise HTTPException(
status_code=400,
detail=f"Could not get IPA for word '{word}'"
)
tokens = ipa_tokenize(ipa)
return {
"word": word,
"ipa": ipa,
"tokens": tokens
}
@app.post("/sort")
async def sort_words(request: Request):
"""
Sort words from text by phonetic similarity
Request body:
{
"text": "The quick brown fox jumps over the lazy dog",
"lang": "en"
}
"""
data = await request.json()
text = data.get("text")
if not text or not text.strip():
raise HTTPException(status_code=400, detail="'text' field is required")
lang = data.get("lang", "en")
words = tokenize_text(text)
if not words:
raise HTTPException(status_code=400, detail="No valid words found in text")
original_count = len(words)
seen = set()
unique_words = []
for word in words:
if word not in seen:
seen.add(word)
unique_words.append(word)
ipas = {}
for word in unique_words:
ipa = get_ipa(word, lang)
if ipa:
ipas[word] = tuple(ipa_tokenize(ipa))
else:
ipas[word] = tuple()
valid_words = [w for w in unique_words if ipas[w]]
if not valid_words:
raise HTTPException(
status_code=400,
detail="Could not get IPA transcription for any words"
)
ordered = seriate(valid_words, ipas)
sorted_words = [
{"word": w, "ipa": "".join(ipas[w])}
for w in ordered
]
return {
"sorted_words": sorted_words,
"original_count": original_count,
"unique_count": len(unique_words)
}
if __name__ == "__main__":
import uvicorn
import os
port = int(os.environ.get("PORT", 8000))
uvicorn.run(app, host="0.0.0.0", port=port)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,76 @@
import pandas as pd
import numpy as np
from typing import List, Tuple, Literal, Generator
from numpy.typing import NDArray
import itertools
import math
import random
type Binary = Literal[0] | Literal[1]
def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]:
cycled_series = series[step:] + series[:step]
return list(zip(series, cycled_series))
def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:
element, next_element = zip(*transitions)
crosstab = pd.crosstab(element, next_element)
return np.matrix(crosstab)
def correlation(matrix: NDArray[np.integer]) -> float:
if matrix.shape != (2, 2):
raise ValueError("The input matrix must be 2x2")
main_diagonal_product = matrix[0, 0] * matrix[1, 1]
other_diagonal_product = matrix[0, 1] * matrix[1, 0]
difference = main_diagonal_product - other_diagonal_product
row_sums = matrix.sum(axis=1)
col_sums = matrix.sum(axis=0)
product_of_sums = np.prod(row_sums) * np.prod(col_sums)
sqrt_product_of_sums = np.sqrt(product_of_sums)
return difference / sqrt_product_of_sums
def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]:
step = 0
while True:
transitions = get_transitions(series, step=step)
matrix = transition_matrix(transitions)
current_correlation = correlation(matrix)
yield current_correlation
step += 1
type MetricalSyllable = Literal["-"] | Literal["u"]
sonett = list(14 * ((5 * "u-") + "u"))
limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u")
def get_hexameter(k: int = 1):
result = []
for _ in range(k):
hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"]))
result.extend(hexameter)
return result
mat = transition_matrix(get_transitions(sonett))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(limerick))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(get_hexameter(k=2000)))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000)))
print(mat)
print(correlation(mat))
print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))

132
roff/letter.txt Normal file
View File

@@ -0,0 +1,132 @@
Title of my paper
Jim Hall
Class
Instructor
Date
left center right
1. Introduction
One cool thing I like about the history of Unix is how
Unix developed into a document processing system. The short
history of that is the Unix team wanted to purchase a new
computer to keep working on Unix. But management said "No."
Around that same time, the Patents team wanted to buy a
new computer to write patent applications, but the vendor
hadn't finished the software yet. The Unix team went to
them and said:
Hey, if you buy us a new computer, we'll update the
existing roff typesetting system so you can write
patent applications.
That's how Unix created the first nroff program, new
roff. Later, they updated nroff to become troff, the type-
setter roff, and even later ditroff, the device idenependent
version of troff.
left 1

2
roff/margins.me Normal file
View File

@@ -0,0 +1,2 @@
.ll 60
.po 10

465
roff/paper-ps Normal file
View File

@@ -0,0 +1,465 @@
x T ps
x res 72000 1 1
x init
p1
V168000
H100000
DFd
x font 38 TB
f38
s10000
h263010
md
tT
H369500
title
wh2500
tof
wh2500
tmy
wh2500
tpaper
n12000 0
x font 5 TR
f5
V204000
H382915
tJim
wh2500
tHall
n12000 0
V228000
H389165
tClass
n12000 0
V252000
H380560
tInstructor
n12000 0
V276000
H390560
tDate
n12000 0
V792000
p1
x font 38 TB
f38
s10000
V60000
H100000
tleft
h272795
tcenter
h265565
tright
n12000 0
V96000
H100000
t1.
wh5000
tIntr
H129540
toduction
h5000
n12000 0
x font 5 TR
f5
V124200
H125000
tOne
wh2574
tcool
wh2574
tthing
wh2573
tI
wh2573
tlik
H203524
te
wh2573
tabout
wh2573
tthe
wh2573
thistory
wh2573
tof
wh2573
tUnix
wh2573
tis
wh2573
tho
H332945
tw
wh2573
tUnix
wh2573
tde
H374501
tv
H379351
teloped
wh2573
tinto
wh2573
ta
wh2573
tdocument
wh2573
tprocessing
wh2573
tsystem.
wh5073
tThe
wh2573
tshort
wh2573
thistory
wh2573
tof
wh2573
tthat
wh2573
tis
wh2573
tthe
wh2573
tUnix
n12000 0
V148200
H100000
tteam
wh2500
tw
H129060
tanted
wh2500
tto
wh2500
tpurchase
wh2500
ta
wh2500
tne
H217670
tw
wh2500
tcomputer
wh2500
tto
wh2500
tk
H282840
teep
wh2500
tw
H306340
torking
wh2500
ton
wh2500
tUnix.
wh5000
tBut
wh2500
tmanagement
wh2500
tsaid
wh2500
Clq
h4440
tNo.
H482570
Crq
h4440
n12000 0
V176400
H125000
tAround
wh2549
tthat
wh2549
tsame
wh2549
ttime,
wh2549
tthe
wh2549
tP
H241755
tatents
wh2549
tteam
wh2549
tw
H296743
tanted
wh2549
tto
wh2549
tb
H336081
tuy
wh2549
ta
wh2549
tne
H364809
tw
wh2549
tcomputer
wh2549
tto
wh2549
twrite
wh2549
tpatent
wh2549
tapplications,
wh2549
tb
H533493
tut
wh2549
tthe
wh2549
tv
H563441
tendor
wh2549
thadn'
H611350
tt
wh2550
Cfi
h5560
tnished
wh2550
tthe
wh2550
tsoftw
H687790
tare
n12000 0
V200400
H100000
tyet.
wh5000
tThe
wh2500
tUnix
wh2500
tteam
wh2500
twent
wh2500
tto
wh2500
tthem
wh2500
tand
wh2500
tsaid:
n12000 0
s9000
V227400
H120000
tHe
H130359
ty
H134274
t,
wh2250
tif
wh2250
tyou
wh2250
tb
H166593
tuy
wh2250
tus
wh2250
ta
wh2250
tne
H202611
tw
wh2250
tcomputer
H244992
t,
wh2250
twe'
H262893
tll
wh2250
tupdate
wh2250
tthe
wh2250
te
H313500
txisting
wf38
h2250
tr
H344091
toff
wf5
h2250
ttypesetting
wh2250
tsystem
wh2250
tso
wh2250
tyou
wh2250
tcan
wh2250
twrite
wh2250
tpatent
wh2250
tapplications.
n10800 0
s10000
V259800
H125000
tThat'
H146110
ts
wh3963
tho
H163713
tw
wh3963
tUnix
wh3963
tcreated
wh3963
tthe
wh3963
Cfi
h5560
trst
wf38
h3963
tnr
H277218
toff
wf5
h3963
tprogram,
wx font 40 TI
f40
h4323
tne
H342834
tw
wh3963
tr
H356907
tof
H364507
tf
f5
h1960
t.
wh6463
tLater
H398910
t,
wh3963
tthe
H417443
ty
wh3963
tupdated
wf38
h3963
tnr
H471849
toff
wf5
h3963
tto
wh3963
tbecome
wf38
h3963
ttr
H541868
toff
f5
t,
wh3962
tthe
wf40
h4092
ttypesetter
wh3962
tr
H622584
tof
H630184
tf
f5
h1960
t,
wh3962
tand
wh3962
te
H663978
tv
H668828
ten
wh3962
tlater
n12000 0
f38
V283800
H100000
tditr
H115930
toff
f5
t,
wh2500
tthe
wf40
h2850
tde
H156950
tvice
wh2500
tidenependent
wf5
h3180
tv
H236900
tersion
wh2500
tof
wf38
h2500
ttr
H282260
toff
f5
t.
n12000 0
f38
V756000
H100000
tleft
h581120
t1
n12000 0
x trailer
V792000
x stop

53
roff/paper.me Normal file
View File

@@ -0,0 +1,53 @@
.tp
.he 'left'center'right'
.fo 'left''%'
.sp 6
.ce 5
.b "Title of my paper"
.sp 2
Jim Hall
.sp
Class
.sp
Instructor
.sp
Date
.bp
.\" .ls 2
.sh 1 Introduction
.lp
One cool thing I like about the history of Unix is how
Unix developed into a document processing system.
The short history of that is the Unix team wanted to
purchase a new computer to keep working on Unix.
But management said
.q No.
.pp
Around that same time, the Patents team wanted to buy a
new computer to write patent applications, but the
vendor hadn't finished the software yet.
The Unix team went to them and said:
.(q
Hey, if you buy us a new computer,
we'll update the existing
.b roff
typesetting system so you can write patent applications.
.)q
.pp
That's how Unix created the first
.b nroff
program,
.i "new roff" .
Later, they updated
.b nroff
to become
.b troff ,
the
.i "typesetter roff" ,
and even later
.b ditroff ,
the
.i "device idenependent"
version of
.b troff .
.sh 2 Introduction

4
roff/shell.nix Normal file
View File

@@ -0,0 +1,4 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
packages = [pkgs.groff];
}

View File

@@ -10,7 +10,7 @@ exec >&2
for i in `seq 8000 1000 10000`; do
echo $i 100
done | ./target/debug/rusty-jeep 1
done | ./target/release/rusty-jeep 1
echo 'if you heard that sound, then goto sleep..^_^'
echo sleep "$@"
@@ -19,4 +19,4 @@ sleep "$@"
echo 'wake up!'
while :; do
echo $(echo "($(od -tu -An -N 2 /dev/urandom)%1000)+500"|bc) $(echo "($(od -tu -An -N 2 /dev/urandom)%500)+100"|bc)
done | ./target/debug/rusty-jeep 1
done | ./target/release/rusty-jeep 1

48
scan2pdf/shell.nix Normal file
View File

@@ -0,0 +1,48 @@
{ pkgs ? import <nixpkgs> {} }:
let
crop_ppm = (pkgs.writers.writePython3Bin "crop_ppm" {libraries = [pkgs.python3Packages.opencv4];} ''
import cv2
import sys
for path in sys.argv[1:]:
img = cv2.imread(path)[30:-30, 30:]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(
thresh,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
cnt = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(cnt)
print(path, x, y, w, h)
crop = img[y:y + h, x:x + w]
cv2.imwrite(path, crop)
'');
in
pkgs.mkShell {
packages = [
pkgs.poppler_utils
pkgs.mupdf
crop_ppm
(pkgs.writers.writeDashBin "generate" ''
set -x
pwd=$(pwd)
tmpdir=$(mktemp -d)
cd "$tmpdir"
[ $# -eq 1 ] || {
echo Please provide one PDF file as argument. >&2
exit 1
}
[ -f "$1" ] || {
echo "$1" does not exist. >&2
exit 1
}
basename=$(basename "$1" .pdf)
${pkgs.poppler_utils}/bin/pdftoppm "$1" exploded
${crop_ppm}/bin/crop_ppm exploded*.ppm
${pkgs.imagemagick}/bin/convert exploded*.ppm "$pwd/cropped-$basename.pdf"
${pkgs.mupdf}/bin/mutool poster -x 2 "$pwd/cropped-$basename.pdf" "$pwd/split-cropped-$basename.pdf"
'')
];
}

View File

@@ -0,0 +1,4 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
packages = [ pkgs.racket ];
}

View File

@@ -0,0 +1,15 @@
#lang typed/racket
(: sum (-> (Listof Number) Number))
(define (sum list) (foldr + 0 list))
(: mean (-> (Listof Number) Number))
(define (mean sample)
(/ (sum sample) (length sample)))
(: standard-deviation (-> (Listof Number) Number))
(define (standard-deviation sample)
(let* ((degrees-of-freedom (- (length sample) 1))
(sample-mean (mean sample))
(sum-of-squared-errors (sum (map (lambda (xi) (expt (- xi sample-mean) 2)) sample))))
(sqrt (/ sum-of-squared-errors degrees-of-freedom))))

51
streaming-link-bot/bot.py Executable file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/env python
from telegram import Update, ForceReply
from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters
import os
import re
import requests
import sys
def convert_link(link: str) -> str:
if "spotify" in link:
to_service = "youtube_music"
elif "youtube" in link:
to_service = "spotify"
link = re.sub(r"//(www\.)?youtube.com", "//music.youtube.com", link)
else:
raise ValueError("Invalid streaming service: " + link)
print(f"Trying to convert {link}", file=sys.stderr)
r = requests.get(f'https://ytm2spotify.com/convert?url={link}&to_service={to_service}')
print(r.text, file=sys.stderr)
json = r.json()
print(json, file=sys.stderr)
return json["results"][0]["url"]
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
user = update.effective_user
await update.message.reply_html(
rf"Hi {user.mention_html()}! You can send me links to Spotify songs and I'll give you the link on YouTube Music—and vice versa.",
reply_markup=ForceReply(selective=True),
)
async def streaming_link(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
try:
converted = convert_link(update.message.text)
await update.message.reply_text(converted)
except Exception as e:
print(e, file=sys.stderr)
await update.message.reply_text("Cannot convert this.")
def main(token: str) -> None:
application = Application.builder().token(token).build()
application.add_handler(CommandHandler("start", start))
application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, streaming_link))
application.run_polling(allowed_updates=Update.ALL_TYPES)
if __name__ == "__main__":
token = os.getenv("TELEGRAM_TOKEN")
if token:
main(token)
else:
print("Missing TELEGRAM_TOKEN environment variable", file=sys.stderr)

View File

@@ -0,0 +1,11 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
packages = [
(pkgs.python3.withPackages (py: [
py.yt-dlp
py.spotipy
py.ytmusicapi
py.python-telegram-bot
]))
];
}

View File

@@ -1,3 +1,4 @@
-- | Module to find and print all repetitions of substrings in a given string.
module Main where
import Control.Monad (forM_)
@@ -6,6 +7,7 @@ import Data.List (sortOn)
import Data.Map qualified as Map
import Data.Set qualified as Set
-- | Creates a sliding window of a given size over a list.
slidingWindow :: Int -> [a] -> [[a]]
slidingWindow size list
| size > 0 && length list >= size =
@@ -29,6 +31,7 @@ pairwiseComparison xs = [(x1, i, j) | (i, x1) <- ixs, (j, x2) <- ixs, x1 == x2,
allPairwiseComparisons :: (Eq a) => [a] -> [[([a], Int, Int)]]
allPairwiseComparisons xs = map (\n -> pairwiseComparison (slidingWindow n xs)) [0 .. length xs]
-- | Finds all repetitions of substrings of a given list.
allRepetitions :: (Ord a) => [a] -> [Map.Map [a] (Set.Set Int)]
allRepetitions xs = map (\n -> findRepetitions (slidingWindow n xs)) $ reverse [0 .. length xs]
@@ -40,11 +43,15 @@ printAllRepetitions str substrRepetitions = do
zipWith
(const $ \x -> if x then '*' else ' ')
str
[i `elem` concatMap (\x -> [x .. x + length key - 1]) (Set.toList value) | i <- [0 ..]]
(isInRange (length key) (Set.toList value))
putStrLn $ "\t" ++ show key
where
score (substring, occurrences) = negate $ length substring ^ 2 * Set.size occurrences
isInRange len indices = [i `elem` concatMap (\x -> [x .. x + len - 1]) indices | i <- [0 ..]]
-- | Prints a pairwise comparison of the substrings in a grid format.
-- The grid will have 'x' for matching pairs and '_' for non-matching pairs.
-- The first row and column will show the indices of the substrings.
printPairwiseComparison :: [(Int, Int)] -> IO ()
printPairwiseComparison [] = return ()
printPairwiseComparison xys =
@@ -64,6 +71,7 @@ printPairwiseComparison xys =
putChar '\n'
putChar '\n'
main :: IO ()
main = do
let str = map toLower $ (" " ++) $ "nachts im dichten fichtendickicht da sind dicke fichten wichtig"
printAllRepetitions str (allRepetitions str)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
packages = [
(pkgs.python3.withPackages (py: [
py.jupyter
py.seaborn
py.matplotlib
py.pandas
py.numpy
]))
];
}