1
0
mirror of https://github.com/kmein/niveum synced 2026-03-23 13:21:06 +01:00

2 Commits

Author SHA1 Message Date
2067944151 tlg-wotd: use cltk for transcription 2024-09-22 17:33:04 +02:00
c34708b989 cltk: init at 1.3.0 2024-09-22 17:32:49 +02:00
3 changed files with 77 additions and 6 deletions

View File

@@ -2,6 +2,7 @@
pkgs,
lib,
config,
niveumPackages,
...
}: {
systemd.services.telegram-bot-tlg-wotd = {
@@ -12,7 +13,7 @@
path = [ pkgs.jq pkgs.curl pkgs.recode pkgs.deno pkgs.imagemagick pkgs.gawk pkgs.gnugrep pkgs.coreutils ];
environment = {
NPM_CONFIG_CACHE = "/tmp";
DENO_DIR = "/tmp";
CLTK_DATA = "/tmp";
};
script = ''
set -efux
@@ -29,9 +30,17 @@
first_occurrence=$(echo "$json_data" | jq -r '.firstOccurrence')
total_occurrences=$(echo "$json_data" | jq -r '.totalOccurrences')
transliteration=$(deno run ${pkgs.writeText "translit.ts" ''
import grc from "npm:greek-transliteration";
console.log(grc.transliterate(Deno.args.join(" ")));
transliteration=$(${pkgs.writers.writePython3 "translit.py" {
libraries = [ niveumPackages.cltk ];
} ''
import sys
from cltk.phonology.grc.transcription import Transcriber
probert = Transcriber("Attic", "Probert")
text = " ".join(sys.argv[1:])
ipa = probert.transcribe(text)
print(ipa)
''} "$compact_word")
@@ -87,9 +96,9 @@
-font "${pkgs.gentium}/share/fonts/truetype/GentiumBookPlus-Regular.ttf" \
-fill "$color2" \
-pointsize 60 -gravity west \
-annotate +100+00 "/$transliteration/" \
-annotate +100+00 "$transliteration" \
-fill "$color1" \
-annotate +100+100 "$definition" \
-annotate +100+120 "$definition" \
-fill "$color2" \
-pointsize 40 -gravity southwest \
-annotate +100+60 "t.me/TLGWotD" \

View File

@@ -331,6 +331,7 @@
brassica = pkgs.callPackage packages/brassica.nix {};
cheat-sh = pkgs.callPackage packages/cheat-sh.nix {};
closest = pkgs.callPackage packages/closest {};
cltk = pkgs.python3Packages.callPackage packages/cltk.nix {};
cro = pkgs.callPackage packages/cro.nix {};
cyberlocker-tools = pkgs.callPackage packages/cyberlocker-tools.nix {};
default-gateway = pkgs.callPackage packages/default-gateway.nix {};

61
packages/cltk.nix Normal file
View File

@@ -0,0 +1,61 @@
{ buildPythonPackage, lib, fetchPypi, gitpython, gensim, tqdm, torch, stringcase, stanza, spacy, scipy, scikit-learn, requests, rapidfuzz, pyyaml, nltk, boltons, poetry-core }:
let
greek-accentuation = buildPythonPackage rec {
pname = "greek-accentuation";
version = "1.2.0";
src = fetchPypi {
inherit pname version;
hash = "sha256-l2HZXdqlLubvy2bWhhZVYGMpF0DXVKTDFehkcGF5xdk=";
};
meta = with lib; {
description = "Python 3 library for accenting (and analyzing the accentuation of) Ancient Greek words";
homepage = "https://github.com/jtauber/greek-accentuation";
license = licenses.mit;
maintainers = with maintainers; [ kmein ];
};
};
in
buildPythonPackage rec {
pname = "cltk";
format = "pyproject";
version = "1.3.0";
src = fetchPypi {
inherit pname version;
hash = "sha256-jAxvToUIo333HSVQDYVyUBY3YP+m1RnlNGelcvktp6s=";
};
postPatch = ''
substituteInPlace pyproject.toml \
--replace "poetry>=1.1.13" poetry-core \
--replace "poetry.masonry.api" "poetry.core.masonry.api" \
--replace 'scipy = "<1.13.0"' 'scipy = "^1"' \
--replace 'boltons = "^21.0.0"' 'boltons = "^24.0.0"'
'';
propagatedBuildInputs = [
gitpython
gensim
boltons
greek-accentuation
pyyaml
nltk
rapidfuzz
requests
scikit-learn
scipy
spacy
stanza
stringcase
torch
tqdm
];
nativeBuildInputs = [ poetry-core ];
meta = with lib; {
description = "Natural language processing (NLP) framework for pre-modern languages";
homepage = "https://cltk.org";
license = licenses.mit;
maintainers = with maintainers; [ kmein ];
};
}