1 files changed, 8 insertions, 4 deletions
diff --git a/process-database.hs b/process-database.hs
index 1fa9813..8ce60fd 100644..100755
--- a/process-database.hs
+++ b/process-database.hs
@@ -36,6 +36,10 @@ blockBy n l = case splitAt n l of
 hasDuplicates :: Ord a => [a] -> Bool
 hasDuplicates (sort -> l) = any (uncurry (==)) (zip l (drop 1 l))
 
+dedup :: Eq a => [a] -> [a]
+dedup [] = []
+dedup (x:xs) = x : dedup (filter (/= x) xs)
+
 data Row i = Row
   { rLexeme :: String
   , rOrtho :: String
@@ -54,9 +58,9 @@ readCSV = map (toRow . splitOn ',') . lines
 -- (hira->kata, kata->ID, num IDs)
 readTable :: String -> (Map Char Char, Map Char Int, Int)
 readTable input =
-  let list = [(h, k) | [[h], [k]] <- map words (lines input)]
-  in (Map.fromList list
-     ,Map.fromList (zip (map snd list) [0..])
+  let list = [(h, k) | [h, k] <- map words (lines input)]
+  in (Map.fromList [(h, k) | ([h], [k]) <- list]
+     ,Map.fromList (zip [k | (_, [k]) <- list] [0..])
      ,length list)
 
 normalise :: Map Char Int -> Row Char -> [Row Int]
@@ -88,7 +92,7 @@ makeTrie prefixlen rows =
       longer = Map.fromListWith (++)
                  [(rReading row !! prefixlen, [row])
                  | row <- rows, length (rReading row) > prefixlen]
-  in Node [(rLexeme r, rFreq r) | r <- here]
+  in Node [(target, rFreq r) | r <- here, target <- dedup [rLexeme r, rOrtho r]]
           (Map.assocs (makeTrie (prefixlen + 1) <$> longer))
 
 -- Serialised format of the trie: