{-| Perfect hash generator for the commands in command.c. To re-generate the hash after a new command is added, write the list of all commands (one per line) to the stdin of this program. If a perfect hash can be found with the current maximum values, the hash expression will be printed, along with the hash values of all commands to show that they are really distinct. The general hash form considered here is an integer linear combination of the length of the command and first N characters of the command (where N <= the minimum command length), modulo some small integer. The possible parameters are generated in the function paramSpace. If no possible hash is found, perhaps the limits in paramSpace need to be raised. Compile this program using: $ ghc -O3 CommandHash.hs -} module Main where import Data.Char import Data.Ord import Data.List import System.Exit data Param = Param { pCharCoefs :: [Int] , pLenCoef :: Int , pModulus :: Int } deriving (Show) showParam :: Param -> String showParam par = "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]" | (i, c) <- zip [0..] (pCharCoefs par)] ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % " ++ show (pModulus par) hash :: Param -> String -> Int hash par str = let chars = zipWith (*) (map ord str) (pCharCoefs par) in (sum chars + pLenCoef par * length str) `mod` pModulus par cartesianProduct :: [[a]] -> [[a]] cartesianProduct [] = [[]] cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls] allDistinct :: Ord a => [a] -> Bool allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s)) paramSpace :: [String] -> [Param] paramSpace commands = let -- These are limits on the maximum coefficient and the maximum modulus, -- respectively. If no perfect hashes are found at all, these may need -- to be raised. maxCoef = 10 maxMod = 64 minCmdLen = minimum (map length commands) numCommands = length commands in [Param coefs lenCoef modulus -- Earlier variables are chosen first: an earlier variable is only -- increased if no perfect hash can be found for any of the possible -- values for the later variables. -- Currently, the number of coefficients (the command prefix inspected) -- is considered most important, then the modulus, then the coefficients -- themselves. | numCoefs <- [1..minCmdLen] , modulus <- [numCommands .. maxMod] , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef]) , lenCoef <- [0..maxCoef] ] main :: IO () main = do -- Read in the commands commands <- lines <$> getContents -- Compute (lazily) all parameters that produce perfect hashes for this set -- of commands. let answers = filter (\par -> allDistinct (map (hash par) commands)) (paramSpace commands) case answers of [] -> die "No perfect hash found, perhaps raise limits" param : _ -> do putStrLn (showParam param) mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h)) (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands])