diff options
| author | Tom Smeding <tom.smeding@gmail.com> | 2020-06-29 20:32:42 +0200 | 
|---|---|---|
| committer | Tom Smeding <tom.smeding@gmail.com> | 2020-06-29 20:32:42 +0200 | 
| commit | daca7b9549b8426af1678d039af2f854588a5edd (patch) | |
| tree | b58f7553fc34c7ea2e8ef515c0302cb1de81e515 | |
| parent | 27b2baf8423dda719ddac4926c43bdddb427ee1f (diff) | |
server: Perfect hashing of commands
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | CommandHash.hs | 89 | ||||
| -rw-r--r-- | TODO.txt | 1 | ||||
| -rw-r--r-- | command.c | 74 | 
4 files changed, 130 insertions, 36 deletions
@@ -7,3 +7,5 @@ db.db  *.so  firebaseServiceAccountKey.json  compile_commands.json +CommandHash +CommandHash.hi diff --git a/CommandHash.hs b/CommandHash.hs new file mode 100644 index 0000000..42f0a99 --- /dev/null +++ b/CommandHash.hs @@ -0,0 +1,89 @@ +{-| Perfect hash generator for the commands in command.c. + +To re-generate the hash after a new command is added, write the list of all +commands (one per line) to the stdin of this program. If a perfect hash can be +found with the current maximum values, the hash expression will be printed, +along with the hash values of all commands to show that they are really +distinct. + +The general hash form considered here is an integer linear combination of the +length of the command and first N characters of the command (where N <= the +minimum command length), modulo some small integer. + +The possible parameters are generated in the function paramSpace. If no +possible hash is found, perhaps the limits in paramSpace need to be raised. + +Compile this program using: +  $ ghc -O3 CommandHash.hs +-} +module Main where + +import Data.Char +import Data.Ord +import Data.List +import System.Exit + + +data Param = Param { pCharCoefs :: [Int] +                   , pLenCoef :: Int +                   , pModulus :: Int } +  deriving (Show) + +showParam :: Param -> String +showParam par = +    "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]" +                             | (i, c) <- zip [0..] (pCharCoefs par)] +        ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % " +        ++ show (pModulus par) + +hash :: Param -> String -> Int +hash par str = +    let chars = zipWith (*) (map ord str) (pCharCoefs par) +    in (sum chars + pLenCoef par * length str) `mod` pModulus par + +cartesianProduct :: [[a]] -> [[a]] +cartesianProduct [] = [[]] +cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls] + +allDistinct :: Ord a => [a] -> Bool +allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s)) + +paramSpace :: [String] -> [Param] +paramSpace commands = +    let -- These are limits on the maximum coefficient and the maximum modulus, +        -- respectively. If no perfect hashes are found at all, these may need +        -- to be raised. +        maxCoef = 10 +        maxMod = 64 + +        minCmdLen = minimum (map length commands) +        numCommands = length commands +    in [Param coefs lenCoef modulus +       -- Earlier variables are chosen first: an earlier variable is only +       -- increased if no perfect hash can be found for any of the possible +       -- values for the later variables. +       -- Currently, the number of coefficients (the command prefix inspected) +       -- is considered most important, then the modulus, then the coefficients +       -- themselves. +       | numCoefs <- [1..minCmdLen] +       , modulus <- [numCommands .. maxMod] +       , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef]) +       , lenCoef <- [0..maxCoef] +       ] + +main :: IO () +main = do +    -- Read in the commands +    commands <- lines <$> getContents + +    -- Compute (lazily) all parameters that produce perfect hashes for this set +    -- of commands. +    let answers = filter (\par -> allDistinct (map (hash par) commands)) +                         (paramSpace commands) + +    case answers of +        [] -> die "No perfect hash found, perhaps raise limits" +        param : _ -> do +            putStrLn (showParam param) +            mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h)) +                  (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands]) @@ -6,4 +6,3 @@  - Use poll(2), not select(2)  - Fix OOM dos vector  - Sub-linear hash table of conn_data's in main.c -- Constant-time lookup of commands in command.c @@ -390,25 +390,34 @@ struct cmd_info{  	struct cmd_retval (*handler)(struct conn_data *data,const char *tag,const char **args);  }; -static const struct cmd_info commands[]={ -	{"version",1,false,cmd_version}, -	{"register",2,true,cmd_register}, -	{"login",2,true,cmd_login}, -	{"logout",0,false,cmd_logout}, -	{"list_rooms",0,false,cmd_list_rooms}, -	{"list_members",1,false,cmd_list_members}, -	{"create_room",0,false,cmd_create_room}, -	{"invite",2,false,cmd_invite}, -	{"send",2,true,cmd_send}, -	{"history",2,false,cmd_history}, -	{"history_before",3,false,cmd_history_before}, -	{"ping",0,false,cmd_ping}, -	{"is_online",1,false,cmd_is_online}, -	{"firebase_token",1,false,cmd_firebase_token}, -	{"delete_firebase_token",1,false,cmd_delete_firebase_token}, -	{"user_active",1,false,cmd_user_active}, +// Use CommandHash.hs to re-generate this perfect hash function for a different +// list of commands. +#define COMMAND_HASH_MODULUS 31 +#define COMMAND_HASH(cmd0, len) ((cmd0 + 6 * len) % COMMAND_HASH_MODULUS) + +#define COMMAND_ENTRY(cmd0, cmd, nargs, longlast, handler) \ +	[COMMAND_HASH(cmd0, strlen(cmd))] = {cmd, nargs, longlast, handler} + +// First argument to COMMAND_ENTRY must be command[0]. This is because +// apparently, "abc"[0] is not a constant expression, while strlen("abc") is. +static const struct cmd_info commands[COMMAND_HASH_MODULUS] = { +	COMMAND_ENTRY('v', "version", 1, false, cmd_version), +	COMMAND_ENTRY('r', "register", 2, true, cmd_register), +	COMMAND_ENTRY('l', "login", 2, true, cmd_login), +	COMMAND_ENTRY('l', "logout", 0, false, cmd_logout), +	COMMAND_ENTRY('l', "list_rooms", 0, false, cmd_list_rooms), +	COMMAND_ENTRY('l', "list_members", 1, false, cmd_list_members), +	COMMAND_ENTRY('c', "create_room", 0, false, cmd_create_room), +	COMMAND_ENTRY('i', "invite", 2, false, cmd_invite), +	COMMAND_ENTRY('s', "send", 2, true, cmd_send), +	COMMAND_ENTRY('h', "history", 2, false, cmd_history), +	COMMAND_ENTRY('h', "history_before", 3, false, cmd_history_before), +	COMMAND_ENTRY('p', "ping", 0, false, cmd_ping), +	COMMAND_ENTRY('i', "is_online", 1, false, cmd_is_online), +	COMMAND_ENTRY('f', "firebase_token", 1, false, cmd_firebase_token), +	COMMAND_ENTRY('d', "delete_firebase_token", 1, false, cmd_delete_firebase_token), +	COMMAND_ENTRY('u', "user_active", 1, false, cmd_user_active),  }; -#define NCOMMANDS (sizeof(commands)/sizeof(commands[0]))  bool handle_input_line(struct conn_data *data,char *line,size_t linelen){ @@ -427,36 +436,31 @@ bool handle_input_line(struct conn_data *data,char *line,size_t linelen){  	sepp=memchr(line,' ',linelen);  	if(sepp==NULL)sepp=line+linelen;  	const size_t cmdlen=sepp-line; -	size_t cmdi; -	for(cmdi=0;cmdi<NCOMMANDS;cmdi++){ -		if(cmdlen==strlen(commands[cmdi].cmdname)&& -				memcmp(line,commands[cmdi].cmdname,cmdlen)==0){ -			break; -		} -	} - -	if(cmdi==NCOMMANDS){ +	const struct cmd_info *command=&commands[COMMAND_HASH(line[0],cmdlen)]; +	if(!command->cmdname +			||cmdlen!=strlen(command->cmdname) +			||memcmp(line,command->cmdname,cmdlen)!=0){  		debug("Unknown command %s on connection %d",line,data->fd);  		return true;  	}  	// Ensure first command is 'version' -	if(data->protversion==-1&&strcmp(commands[cmdi].cmdname,"version")!=0){ +	if(data->protversion==-1&&command->handler!=cmd_version){  		debug("Command %s before version negotiation on connection %d", -				commands[cmdi].cmdname,data->fd); +				command->cmdname,data->fd);  		return true;  	} -	int nargs=commands[cmdi].nargs; -	char *args[nargs]; +	const int nargs=command->nargs; +	const char *args[nargs];  	size_t cursor=cmdlen+1;  	for(int i=0;i<nargs;i++){  		if(cursor>linelen){ -			debug("Connection %d sent too few parameters to command %s",data->fd,commands[cmdi].cmdname); +			debug("Connection %d sent too few parameters to command %s",data->fd,command->cmdname);  			return true;  		} -		if(i==nargs-1&&commands[cmdi].longlast){ +		if(i==nargs-1&&command->longlast){  			sepp=line+linelen;  		} else {  			sepp=memchr(line+cursor,' ',linelen-cursor); @@ -467,11 +471,11 @@ bool handle_input_line(struct conn_data *data,char *line,size_t linelen){  		cursor=sepp-line+1;  	}  	if(sepp-line<(i64)linelen){ -		debug("Connection %d sent too many parameters to command %s",data->fd,commands[cmdi].cmdname); +		debug("Connection %d sent too many parameters to command %s",data->fd,command->cmdname);  		return true;  	} -	struct cmd_retval retval=commands[cmdi].handler(data,tag,(const char**)args); +	struct cmd_retval retval=command->handler(data,tag,(const char**)args);  	if(retval.memzero)sodium_memzero(line,linelen);  	return retval.socket_close;  }  | 
