diff options
Diffstat (limited to 'CommandHash.hs')
-rw-r--r-- | CommandHash.hs | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/CommandHash.hs b/CommandHash.hs new file mode 100644 index 0000000..42f0a99 --- /dev/null +++ b/CommandHash.hs @@ -0,0 +1,89 @@ +{-| Perfect hash generator for the commands in command.c. + +To re-generate the hash after a new command is added, write the list of all +commands (one per line) to the stdin of this program. If a perfect hash can be +found with the current maximum values, the hash expression will be printed, +along with the hash values of all commands to show that they are really +distinct. + +The general hash form considered here is an integer linear combination of the +length of the command and first N characters of the command (where N <= the +minimum command length), modulo some small integer. + +The possible parameters are generated in the function paramSpace. If no +possible hash is found, perhaps the limits in paramSpace need to be raised. + +Compile this program using: + $ ghc -O3 CommandHash.hs +-} +module Main where + +import Data.Char +import Data.Ord +import Data.List +import System.Exit + + +data Param = Param { pCharCoefs :: [Int] + , pLenCoef :: Int + , pModulus :: Int } + deriving (Show) + +showParam :: Param -> String +showParam par = + "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]" + | (i, c) <- zip [0..] (pCharCoefs par)] + ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % " + ++ show (pModulus par) + +hash :: Param -> String -> Int +hash par str = + let chars = zipWith (*) (map ord str) (pCharCoefs par) + in (sum chars + pLenCoef par * length str) `mod` pModulus par + +cartesianProduct :: [[a]] -> [[a]] +cartesianProduct [] = [[]] +cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls] + +allDistinct :: Ord a => [a] -> Bool +allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s)) + +paramSpace :: [String] -> [Param] +paramSpace commands = + let -- These are limits on the maximum coefficient and the maximum modulus, + -- respectively. If no perfect hashes are found at all, these may need + -- to be raised. + maxCoef = 10 + maxMod = 64 + + minCmdLen = minimum (map length commands) + numCommands = length commands + in [Param coefs lenCoef modulus + -- Earlier variables are chosen first: an earlier variable is only + -- increased if no perfect hash can be found for any of the possible + -- values for the later variables. + -- Currently, the number of coefficients (the command prefix inspected) + -- is considered most important, then the modulus, then the coefficients + -- themselves. + | numCoefs <- [1..minCmdLen] + , modulus <- [numCommands .. maxMod] + , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef]) + , lenCoef <- [0..maxCoef] + ] + +main :: IO () +main = do + -- Read in the commands + commands <- lines <$> getContents + + -- Compute (lazily) all parameters that produce perfect hashes for this set + -- of commands. + let answers = filter (\par -> allDistinct (map (hash par) commands)) + (paramSpace commands) + + case answers of + [] -> die "No perfect hash found, perhaps raise limits" + param : _ -> do + putStrLn (showParam param) + mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h)) + (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands]) |