From b8d7b530070d45482756a27be367bef5c1b735e0 Mon Sep 17 00:00:00 2001
From: Tom Smeding <tom@tomsmeding.com>
Date: Sun, 7 Jul 2024 16:53:56 +0200
Subject: Kanji completion basically working

---
 process-database.hs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 process-database.hs

(limited to 'process-database.hs')

diff --git a/process-database.hs b/process-database.hs
old mode 100644
new mode 100755
index 1fa9813..8ce60fd
--- a/process-database.hs
+++ b/process-database.hs
@@ -36,6 +36,10 @@ blockBy n l = case splitAt n l of
 hasDuplicates :: Ord a => [a] -> Bool
 hasDuplicates (sort -> l) = any (uncurry (==)) (zip l (drop 1 l))
 
+dedup :: Eq a => [a] -> [a]
+dedup [] = []
+dedup (x:xs) = x : dedup (filter (/= x) xs)
+
 data Row i = Row
   { rLexeme :: String
   , rOrtho :: String
@@ -54,9 +58,9 @@ readCSV = map (toRow . splitOn ',') . lines
 -- (hira->kata, kata->ID, num IDs)
 readTable :: String -> (Map Char Char, Map Char Int, Int)
 readTable input =
-  let list = [(h, k) | [[h], [k]] <- map words (lines input)]
-  in (Map.fromList list
-     ,Map.fromList (zip (map snd list) [0..])
+  let list = [(h, k) | [h, k] <- map words (lines input)]
+  in (Map.fromList [(h, k) | ([h], [k]) <- list]
+     ,Map.fromList (zip [k | (_, [k]) <- list] [0..])
      ,length list)
 
 normalise :: Map Char Int -> Row Char -> [Row Int]
@@ -88,7 +92,7 @@ makeTrie prefixlen rows =
       longer = Map.fromListWith (++)
                  [(rReading row !! prefixlen, [row])
                  | row <- rows, length (rReading row) > prefixlen]
-  in Node [(rLexeme r, rFreq r) | r <- here]
+  in Node [(target, rFreq r) | r <- here, target <- dedup [rLexeme r, rOrtho r]]
           (Map.assocs (makeTrie (prefixlen + 1) <$> longer))
 
 -- Serialised format of the trie:
-- 
cgit v1.2.3-70-g09d2