aboutsummaryrefslogtreecommitdiff
path: root/CommandHash.hs
diff options
context:
space:
mode:
Diffstat (limited to 'CommandHash.hs')
-rw-r--r--CommandHash.hs89
1 files changed, 89 insertions, 0 deletions
diff --git a/CommandHash.hs b/CommandHash.hs
new file mode 100644
index 0000000..42f0a99
--- /dev/null
+++ b/CommandHash.hs
@@ -0,0 +1,89 @@
+{-| Perfect hash generator for the commands in command.c.
+
+To re-generate the hash after a new command is added, write the list of all
+commands (one per line) to the stdin of this program. If a perfect hash can be
+found with the current maximum values, the hash expression will be printed,
+along with the hash values of all commands to show that they are really
+distinct.
+
+The general hash form considered here is an integer linear combination of the
+length of the command and first N characters of the command (where N <= the
+minimum command length), modulo some small integer.
+
+The possible parameters are generated in the function paramSpace. If no
+possible hash is found, perhaps the limits in paramSpace need to be raised.
+
+Compile this program using:
+ $ ghc -O3 CommandHash.hs
+-}
+module Main where
+
+import Data.Char
+import Data.Ord
+import Data.List
+import System.Exit
+
+
+data Param = Param { pCharCoefs :: [Int]
+ , pLenCoef :: Int
+ , pModulus :: Int }
+ deriving (Show)
+
+showParam :: Param -> String
+showParam par =
+ "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]"
+ | (i, c) <- zip [0..] (pCharCoefs par)]
+ ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % "
+ ++ show (pModulus par)
+
+hash :: Param -> String -> Int
+hash par str =
+ let chars = zipWith (*) (map ord str) (pCharCoefs par)
+ in (sum chars + pLenCoef par * length str) `mod` pModulus par
+
+cartesianProduct :: [[a]] -> [[a]]
+cartesianProduct [] = [[]]
+cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls]
+
+allDistinct :: Ord a => [a] -> Bool
+allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s))
+
+paramSpace :: [String] -> [Param]
+paramSpace commands =
+ let -- These are limits on the maximum coefficient and the maximum modulus,
+ -- respectively. If no perfect hashes are found at all, these may need
+ -- to be raised.
+ maxCoef = 10
+ maxMod = 64
+
+ minCmdLen = minimum (map length commands)
+ numCommands = length commands
+ in [Param coefs lenCoef modulus
+ -- Earlier variables are chosen first: an earlier variable is only
+ -- increased if no perfect hash can be found for any of the possible
+ -- values for the later variables.
+ -- Currently, the number of coefficients (the command prefix inspected)
+ -- is considered most important, then the modulus, then the coefficients
+ -- themselves.
+ | numCoefs <- [1..minCmdLen]
+ , modulus <- [numCommands .. maxMod]
+ , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef])
+ , lenCoef <- [0..maxCoef]
+ ]
+
+main :: IO ()
+main = do
+ -- Read in the commands
+ commands <- lines <$> getContents
+
+ -- Compute (lazily) all parameters that produce perfect hashes for this set
+ -- of commands.
+ let answers = filter (\par -> allDistinct (map (hash par) commands))
+ (paramSpace commands)
+
+ case answers of
+ [] -> die "No perfect hash found, perhaps raise limits"
+ param : _ -> do
+ putStrLn (showParam param)
+ mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h))
+ (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands])