1
0
mirror of https://github.com/kmein/niveum synced 2026-03-16 10:11:08 +01:00
Files
niveum/packages/closest/distance.hs

38 lines
1.4 KiB
Haskell
Raw Normal View History

2021-11-15 18:43:34 +01:00
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE RecordWildCards #-}
2025-12-28 13:39:42 +01:00
2021-11-15 18:43:34 +01:00
import Control.Arrow ((&&&))
import Control.Monad (forM_)
2025-12-28 13:39:42 +01:00
import Control.Parallel.Strategies (parList, rdeepseq, using)
2021-11-15 18:43:34 +01:00
import Data.Char (toLower)
import Data.List (sortOn)
import Options.Applicative
2025-12-28 13:39:42 +01:00
import Text.EditDistance (defaultEditCosts, levenshteinDistance)
2021-11-15 18:43:34 +01:00
data Options = Options
2025-12-28 13:39:42 +01:00
{ limit :: Int,
word :: String,
dictionary :: FilePath
2021-11-15 18:43:34 +01:00
}
optionsParser :: Parser Options
optionsParser = do
limit <- option auto (long "limit" <> short 'l' <> help "maximum edit distance to list" <> value 3 <> metavar "N")
word <- strArgument (help "the word to match" <> metavar "WORD")
dictionary <- strOption (long "dictionary" <> short 'd' <> help "the dictionary to search")
pure Options {..}
readDictionary :: FilePath -> IO [String]
readDictionary path = lines . map toLower <$> readFile path
main :: IO ()
main = do
let options = info (optionsParser <**> helper) (fullDesc <> progDesc "Find close words")
Options {..} <- execParser options
let word' = map toLower word
allWords <- readDictionary dictionary
let distances = map (levenshteinDistance defaultEditCosts word' &&& id) allWords
distances' = distances `using` parList rdeepseq
ranking = takeWhile ((<= limit) . fst) $ sortOn fst distances'
forM_ ranking $ \(theDistance, theWord) -> putStrLn (show theDistance ++ " " ++ theWord)