From b8d7b530070d45482756a27be367bef5c1b735e0 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sun, 7 Jul 2024 16:53:56 +0200 Subject: Kanji completion basically working --- process-database.hs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) mode change 100644 => 100755 process-database.hs (limited to 'process-database.hs') diff --git a/process-database.hs b/process-database.hs old mode 100644 new mode 100755 index 1fa9813..8ce60fd --- a/process-database.hs +++ b/process-database.hs @@ -36,6 +36,10 @@ blockBy n l = case splitAt n l of hasDuplicates :: Ord a => [a] -> Bool hasDuplicates (sort -> l) = any (uncurry (==)) (zip l (drop 1 l)) +dedup :: Eq a => [a] -> [a] +dedup [] = [] +dedup (x:xs) = x : dedup (filter (/= x) xs) + data Row i = Row { rLexeme :: String , rOrtho :: String @@ -54,9 +58,9 @@ readCSV = map (toRow . splitOn ',') . lines -- (hira->kata, kata->ID, num IDs) readTable :: String -> (Map Char Char, Map Char Int, Int) readTable input = - let list = [(h, k) | [[h], [k]] <- map words (lines input)] - in (Map.fromList list - ,Map.fromList (zip (map snd list) [0..]) + let list = [(h, k) | [h, k] <- map words (lines input)] + in (Map.fromList [(h, k) | ([h], [k]) <- list] + ,Map.fromList (zip [k | (_, [k]) <- list] [0..]) ,length list) normalise :: Map Char Int -> Row Char -> [Row Int] @@ -88,7 +92,7 @@ makeTrie prefixlen rows = longer = Map.fromListWith (++) [(rReading row !! prefixlen, [row]) | row <- rows, length (rReading row) > prefixlen] - in Node [(rLexeme r, rFreq r) | r <- here] + in Node [(target, rFreq r) | r <- here, target <- dedup [rLexeme r, rOrtho r]] (Map.assocs (makeTrie (prefixlen + 1) <$> longer)) -- Serialised format of the trie: -- cgit v1.2.3-70-g09d2