From 612acab4fcb0ea4bb8e76cd228399e87550e3376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kier=C3=A1n=20Meinhardt?= Date: Sun, 28 Dec 2025 21:06:24 +0100 Subject: [PATCH] pun-sort: init --- pun-sort/example.sh | 10 ++++ pun-sort/sort.py | 109 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100755 pun-sort/example.sh create mode 100755 pun-sort/sort.py diff --git a/pun-sort/example.sh b/pun-sort/example.sh new file mode 100755 index 0000000..47cca05 --- /dev/null +++ b/pun-sort/example.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -efu + +tokenize() { + tr -cs '[:alpha:]' '\n' | tr '[:upper:]' '[:lower:]' +} + +text="Once upon a time, in a quiet village nestled between rolling hills and sparkling rivers, there lived a clever fox named Felix. Felix was known throughout the village for his cunning tricks and playful antics. Every morning, he would sneak through the meadows, darting past rabbits and chickens, always careful to avoid the farmer's watchful eyes. Despite his mischievous ways, Felix had a kind heart and often shared his clever solutions with friends in need. One day, a heavy storm swept through the valley, leaving many paths muddy and rivers swollen. Felix saw his chance to help: he guided lost ducklings back to their pond, and showed the frightened kittens how to find shelter under the sturdy oak trees. The villagers watched in amazement as the fox moved gracefully through the rain-soaked fields, his orange fur glistening and his sharp eyes alert. By the time the storm passed, the village had gained a newfound respect for Felix. Tales of his bravery spread far and wide, carried by wandering merchants and whispered by children as they played near the cobblestone streets. Nights in the village were quiet once more, but the story of Felix, the fox who danced through storm and shadow, continued to inspire laughter, cleverness, and courage in the hearts of all who heard it." + +echo "$text" | tokenize | python3 sort.py diff --git a/pun-sort/sort.py b/pun-sort/sort.py new file mode 100755 index 0000000..3f388ed --- /dev/null +++ b/pun-sort/sort.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +import sys +import subprocess +from functools import lru_cache + +# ------------------------- +# IPA helpers +# ------------------------- + +def get_ipa(word): + try: + out = subprocess.check_output( + ["espeak-ng", "-q", "--ipa=3", word], + stderr=subprocess.DEVNULL, + text=True + ) + return out.strip().strip("/") + except Exception: + return "" + +def ipa_tokenize(ipa): + tokens = [] + i = 0 + while i < len(ipa): + ch = ipa[i] + if ch in "ˈˌ": + i += 1 + continue + if i + 1 < len(ipa) and ipa[i:i+2] in {"aɪ", "aʊ", "eɪ", "oʊ", "ɔɪ"}: + tokens.append(ipa[i:i+2]) + i += 2 + else: + tokens.append(ch) + i += 1 + return tokens + +# ------------------------- +# Distance +# ------------------------- + +VOWELS = set("aeiouəɪʊɔɛɜɑæ") + +def sub_cost(a, b): + if a == b: + return 0.0 + if a in VOWELS and b in VOWELS: + return 0.6 + if a in VOWELS or b in VOWELS: + return 2.0 + return 1.0 + +@lru_cache(maxsize=None) +def phonetic_distance(a, b): + a = tuple(a) + b = tuple(b) + n, m = len(a), len(b) + dp = [[0] * (m + 1) for _ in range(n + 1)] + + for i in range(n + 1): + dp[i][0] = i + for j in range(m + 1): + dp[0][j] = j + + for i in range(1, n + 1): + for j in range(1, m + 1): + dp[i][j] = min( + dp[i - 1][j] + 1, + dp[i][j - 1] + 1, + dp[i - 1][j - 1] + sub_cost(a[i - 1], b[j - 1]) + ) + + return dp[n][m] + +# ------------------------- +# Seriation +# ------------------------- + +def seriate(words, ipas): + unused = set(words) + path = [words[0]] + unused.remove(words[0]) + + while unused: + cur = path[-1] + nxt = min( + unused, + key=lambda w: phonetic_distance(ipas[cur], ipas[w]) / max(len(ipas[cur]), len(ipas[w]), 1) + ) + path.append(nxt) + unused.remove(nxt) + + return path + +# ------------------------- +# Main +# ------------------------- + +def main(): + words = [w.strip() for w in sys.stdin if w.strip()] + ipas = {w: tuple(ipa_tokenize(get_ipa(w))) for w in words} + + ordered = seriate(words, ipas) + + for w in ordered: + print(f"{w}\t/{''.join(ipas[w])}/") + +if __name__ == "__main__": + main()