1
0
mirror of https://github.com/kmein/niveum synced 2026-03-16 18:21:07 +01:00

feat: closest word script

This commit is contained in:
2021-11-15 18:43:34 +01:00
parent 01e3cef34f
commit 683bd2f3be
3 changed files with 47 additions and 0 deletions

View File

@@ -174,6 +174,16 @@ in rec {
script = ./favicon.sh;
};
closest = pkgs.writers.writeDashBin "closest" ''
${pkgs.writers.writeHaskellBin "closest" {
libraries = with pkgs.haskellPackages; [ parallel optparse-applicative edit-distance ];
ghcArgs = ["-O3" "-threaded" ];
} (builtins.readFile ./distance.hs)}/bin/closest +RTS -N4 -RTS --dictionary ${pkgs.fetchurl {
url = "https://gist.github.com/MarvinJWendt/2f4f4154b8ae218600eb091a5706b5f4/raw/36b70dd6be330aa61cd4d4cdfda6234dcb0b8784/wordlist-german.txt";
sha256 = "0vr4lmlckgvj4s8sk502sknq9pf3297rvasj5sqqm05zzbdgpppj";
}} "$@"
'';
genius = wrapScript {
packages = [ pkgs.curl pkgs.gnused pkgs.pandoc ];
name = "genius";

View File

@@ -0,0 +1,36 @@
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE RecordWildCards #-}
import Control.Arrow ((&&&))
import Control.Monad (forM_)
import Control.Parallel.Strategies (using, parList, rdeepseq)
import Data.Char (toLower)
import Data.List (sortOn)
import Options.Applicative
import Text.EditDistance (levenshteinDistance, defaultEditCosts)
data Options = Options
{ limit :: Int
, word :: String
, dictionary :: FilePath
}
optionsParser :: Parser Options
optionsParser = do
limit <- option auto (long "limit" <> short 'l' <> help "maximum edit distance to list" <> value 3 <> metavar "N")
word <- strArgument (help "the word to match" <> metavar "WORD")
dictionary <- strOption (long "dictionary" <> short 'd' <> help "the dictionary to search")
pure Options {..}
readDictionary :: FilePath -> IO [String]
readDictionary path = lines . map toLower <$> readFile path
main :: IO ()
main = do
let options = info (optionsParser <**> helper) (fullDesc <> progDesc "Find close words")
Options {..} <- execParser options
let word' = map toLower word
allWords <- readDictionary dictionary
let distances = map (levenshteinDistance defaultEditCosts word' &&& id) allWords
distances' = distances `using` parList rdeepseq
ranking = takeWhile ((<= limit) . fst) $ sortOn fst distances'
forM_ ranking $ \(theDistance, theWord) -> putStrLn (show theDistance ++ " " ++ theWord)