aboutsummaryrefslogtreecommitdiff
path: root/CommandHash.hs
blob: 42f0a992196d5ce8774e2f105d7781869ca8ed08 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
{-| Perfect hash generator for the commands in command.c.

To re-generate the hash after a new command is added, write the list of all
commands (one per line) to the stdin of this program. If a perfect hash can be
found with the current maximum values, the hash expression will be printed,
along with the hash values of all commands to show that they are really
distinct.

The general hash form considered here is an integer linear combination of the
length of the command and first N characters of the command (where N <= the
minimum command length), modulo some small integer.

The possible parameters are generated in the function paramSpace. If no
possible hash is found, perhaps the limits in paramSpace need to be raised.

Compile this program using:
  $ ghc -O3 CommandHash.hs
-}
module Main where

import Data.Char
import Data.Ord
import Data.List
import System.Exit


data Param = Param { pCharCoefs :: [Int]
                   , pLenCoef :: Int
                   , pModulus :: Int }
  deriving (Show)

showParam :: Param -> String
showParam par =
    "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]"
                             | (i, c) <- zip [0..] (pCharCoefs par)]
        ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % "
        ++ show (pModulus par)

hash :: Param -> String -> Int
hash par str =
    let chars = zipWith (*) (map ord str) (pCharCoefs par)
    in (sum chars + pLenCoef par * length str) `mod` pModulus par

cartesianProduct :: [[a]] -> [[a]]
cartesianProduct [] = [[]]
cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls]

allDistinct :: Ord a => [a] -> Bool
allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s))

paramSpace :: [String] -> [Param]
paramSpace commands =
    let -- These are limits on the maximum coefficient and the maximum modulus,
        -- respectively. If no perfect hashes are found at all, these may need
        -- to be raised.
        maxCoef = 10
        maxMod = 64

        minCmdLen = minimum (map length commands)
        numCommands = length commands
    in [Param coefs lenCoef modulus
       -- Earlier variables are chosen first: an earlier variable is only
       -- increased if no perfect hash can be found for any of the possible
       -- values for the later variables.
       -- Currently, the number of coefficients (the command prefix inspected)
       -- is considered most important, then the modulus, then the coefficients
       -- themselves.
       | numCoefs <- [1..minCmdLen]
       , modulus <- [numCommands .. maxMod]
       , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef])
       , lenCoef <- [0..maxCoef]
       ]

main :: IO ()
main = do
    -- Read in the commands
    commands <- lines <$> getContents

    -- Compute (lazily) all parameters that produce perfect hashes for this set
    -- of commands.
    let answers = filter (\par -> allDistinct (map (hash par) commands))
                         (paramSpace commands)

    case answers of
        [] -> die "No perfect hash found, perhaps raise limits"
        param : _ -> do
            putStrLn (showParam param)
            mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h))
                  (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands])