diff --git a/configs/packages/default.nix b/configs/packages/default.nix index e97735e..89eda61 100644 --- a/configs/packages/default.nix +++ b/configs/packages/default.nix @@ -157,6 +157,7 @@ in { scripts.wttr scripts.sanskrit-dictionary scripts.unicodmenu + scripts.closest scripts.trans scripts.liddel-scott-jones scripts.mpv-radio diff --git a/packages/scripts/default.nix b/packages/scripts/default.nix index dee06d6..e902d25 100644 --- a/packages/scripts/default.nix +++ b/packages/scripts/default.nix @@ -174,6 +174,16 @@ in rec { script = ./favicon.sh; }; + closest = pkgs.writers.writeDashBin "closest" '' + ${pkgs.writers.writeHaskellBin "closest" { + libraries = with pkgs.haskellPackages; [ parallel optparse-applicative edit-distance ]; + ghcArgs = ["-O3" "-threaded" ]; + } (builtins.readFile ./distance.hs)}/bin/closest +RTS -N4 -RTS --dictionary ${pkgs.fetchurl { + url = "https://gist.github.com/MarvinJWendt/2f4f4154b8ae218600eb091a5706b5f4/raw/36b70dd6be330aa61cd4d4cdfda6234dcb0b8784/wordlist-german.txt"; + sha256 = "0vr4lmlckgvj4s8sk502sknq9pf3297rvasj5sqqm05zzbdgpppj"; + }} "$@" + ''; + genius = wrapScript { packages = [ pkgs.curl pkgs.gnused pkgs.pandoc ]; name = "genius"; diff --git a/packages/scripts/distance.hs b/packages/scripts/distance.hs new file mode 100644 index 0000000..f5c39dd --- /dev/null +++ b/packages/scripts/distance.hs @@ -0,0 +1,36 @@ +{-# LANGUAGE ApplicativeDo #-} +{-# LANGUAGE RecordWildCards #-} +import Control.Arrow ((&&&)) +import Control.Monad (forM_) +import Control.Parallel.Strategies (using, parList, rdeepseq) +import Data.Char (toLower) +import Data.List (sortOn) +import Options.Applicative +import Text.EditDistance (levenshteinDistance, defaultEditCosts) + +data Options = Options + { limit :: Int + , word :: String + , dictionary :: FilePath + } + +optionsParser :: Parser Options +optionsParser = do + limit <- option auto (long "limit" <> short 'l' <> help "maximum edit distance to list" <> value 3 <> metavar "N") + word <- strArgument (help "the word to match" <> metavar "WORD") + dictionary <- strOption (long "dictionary" <> short 'd' <> help "the dictionary to search") + pure Options {..} + +readDictionary :: FilePath -> IO [String] +readDictionary path = lines . map toLower <$> readFile path + +main :: IO () +main = do + let options = info (optionsParser <**> helper) (fullDesc <> progDesc "Find close words") + Options {..} <- execParser options + let word' = map toLower word + allWords <- readDictionary dictionary + let distances = map (levenshteinDistance defaultEditCosts word' &&& id) allWords + distances' = distances `using` parList rdeepseq + ranking = takeWhile ((<= limit) . fst) $ sortOn fst distances' + forM_ ranking $ \(theDistance, theWord) -> putStrLn (show theDistance ++ " " ++ theWord)