mirror of
https://github.com/kmein/niveum
synced 2026-03-19 03:21:10 +01:00
feat: closest word script
This commit is contained in:
@@ -157,6 +157,7 @@ in {
|
|||||||
scripts.wttr
|
scripts.wttr
|
||||||
scripts.sanskrit-dictionary
|
scripts.sanskrit-dictionary
|
||||||
scripts.unicodmenu
|
scripts.unicodmenu
|
||||||
|
scripts.closest
|
||||||
scripts.trans
|
scripts.trans
|
||||||
scripts.liddel-scott-jones
|
scripts.liddel-scott-jones
|
||||||
scripts.mpv-radio
|
scripts.mpv-radio
|
||||||
|
|||||||
@@ -174,6 +174,16 @@ in rec {
|
|||||||
script = ./favicon.sh;
|
script = ./favicon.sh;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
closest = pkgs.writers.writeDashBin "closest" ''
|
||||||
|
${pkgs.writers.writeHaskellBin "closest" {
|
||||||
|
libraries = with pkgs.haskellPackages; [ parallel optparse-applicative edit-distance ];
|
||||||
|
ghcArgs = ["-O3" "-threaded" ];
|
||||||
|
} (builtins.readFile ./distance.hs)}/bin/closest +RTS -N4 -RTS --dictionary ${pkgs.fetchurl {
|
||||||
|
url = "https://gist.github.com/MarvinJWendt/2f4f4154b8ae218600eb091a5706b5f4/raw/36b70dd6be330aa61cd4d4cdfda6234dcb0b8784/wordlist-german.txt";
|
||||||
|
sha256 = "0vr4lmlckgvj4s8sk502sknq9pf3297rvasj5sqqm05zzbdgpppj";
|
||||||
|
}} "$@"
|
||||||
|
'';
|
||||||
|
|
||||||
genius = wrapScript {
|
genius = wrapScript {
|
||||||
packages = [ pkgs.curl pkgs.gnused pkgs.pandoc ];
|
packages = [ pkgs.curl pkgs.gnused pkgs.pandoc ];
|
||||||
name = "genius";
|
name = "genius";
|
||||||
|
|||||||
36
packages/scripts/distance.hs
Normal file
36
packages/scripts/distance.hs
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
{-# LANGUAGE ApplicativeDo #-}
|
||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
import Control.Arrow ((&&&))
|
||||||
|
import Control.Monad (forM_)
|
||||||
|
import Control.Parallel.Strategies (using, parList, rdeepseq)
|
||||||
|
import Data.Char (toLower)
|
||||||
|
import Data.List (sortOn)
|
||||||
|
import Options.Applicative
|
||||||
|
import Text.EditDistance (levenshteinDistance, defaultEditCosts)
|
||||||
|
|
||||||
|
data Options = Options
|
||||||
|
{ limit :: Int
|
||||||
|
, word :: String
|
||||||
|
, dictionary :: FilePath
|
||||||
|
}
|
||||||
|
|
||||||
|
optionsParser :: Parser Options
|
||||||
|
optionsParser = do
|
||||||
|
limit <- option auto (long "limit" <> short 'l' <> help "maximum edit distance to list" <> value 3 <> metavar "N")
|
||||||
|
word <- strArgument (help "the word to match" <> metavar "WORD")
|
||||||
|
dictionary <- strOption (long "dictionary" <> short 'd' <> help "the dictionary to search")
|
||||||
|
pure Options {..}
|
||||||
|
|
||||||
|
readDictionary :: FilePath -> IO [String]
|
||||||
|
readDictionary path = lines . map toLower <$> readFile path
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
let options = info (optionsParser <**> helper) (fullDesc <> progDesc "Find close words")
|
||||||
|
Options {..} <- execParser options
|
||||||
|
let word' = map toLower word
|
||||||
|
allWords <- readDictionary dictionary
|
||||||
|
let distances = map (levenshteinDistance defaultEditCosts word' &&& id) allWords
|
||||||
|
distances' = distances `using` parList rdeepseq
|
||||||
|
ranking = takeWhile ((<= limit) . fst) $ sortOn fst distances'
|
||||||
|
forM_ ranking $ \(theDistance, theWord) -> putStrLn (show theDistance ++ " " ++ theWord)
|
||||||
Reference in New Issue
Block a user