From daca7b9549b8426af1678d039af2f854588a5edd Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Mon, 29 Jun 2020 20:32:42 +0200 Subject: server: Perfect hashing of commands --- .gitignore | 2 ++ CommandHash.hs | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ TODO.txt | 1 - command.c | 74 +++++++++++++++++++++++++----------------------- 4 files changed, 130 insertions(+), 36 deletions(-) create mode 100644 CommandHash.hs diff --git a/.gitignore b/.gitignore index db08d13..8cd07d4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ db.db *.so firebaseServiceAccountKey.json compile_commands.json +CommandHash +CommandHash.hi diff --git a/CommandHash.hs b/CommandHash.hs new file mode 100644 index 0000000..42f0a99 --- /dev/null +++ b/CommandHash.hs @@ -0,0 +1,89 @@ +{-| Perfect hash generator for the commands in command.c. + +To re-generate the hash after a new command is added, write the list of all +commands (one per line) to the stdin of this program. If a perfect hash can be +found with the current maximum values, the hash expression will be printed, +along with the hash values of all commands to show that they are really +distinct. + +The general hash form considered here is an integer linear combination of the +length of the command and first N characters of the command (where N <= the +minimum command length), modulo some small integer. + +The possible parameters are generated in the function paramSpace. If no +possible hash is found, perhaps the limits in paramSpace need to be raised. + +Compile this program using: + $ ghc -O3 CommandHash.hs +-} +module Main where + +import Data.Char +import Data.Ord +import Data.List +import System.Exit + + +data Param = Param { pCharCoefs :: [Int] + , pLenCoef :: Int + , pModulus :: Int } + deriving (Show) + +showParam :: Param -> String +showParam par = + "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]" + | (i, c) <- zip [0..] (pCharCoefs par)] + ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % " + ++ show (pModulus par) + +hash :: Param -> String -> Int +hash par str = + let chars = zipWith (*) (map ord str) (pCharCoefs par) + in (sum chars + pLenCoef par * length str) `mod` pModulus par + +cartesianProduct :: [[a]] -> [[a]] +cartesianProduct [] = [[]] +cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls] + +allDistinct :: Ord a => [a] -> Bool +allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s)) + +paramSpace :: [String] -> [Param] +paramSpace commands = + let -- These are limits on the maximum coefficient and the maximum modulus, + -- respectively. If no perfect hashes are found at all, these may need + -- to be raised. + maxCoef = 10 + maxMod = 64 + + minCmdLen = minimum (map length commands) + numCommands = length commands + in [Param coefs lenCoef modulus + -- Earlier variables are chosen first: an earlier variable is only + -- increased if no perfect hash can be found for any of the possible + -- values for the later variables. + -- Currently, the number of coefficients (the command prefix inspected) + -- is considered most important, then the modulus, then the coefficients + -- themselves. + | numCoefs <- [1..minCmdLen] + , modulus <- [numCommands .. maxMod] + , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef]) + , lenCoef <- [0..maxCoef] + ] + +main :: IO () +main = do + -- Read in the commands + commands <- lines <$> getContents + + -- Compute (lazily) all parameters that produce perfect hashes for this set + -- of commands. + let answers = filter (\par -> allDistinct (map (hash par) commands)) + (paramSpace commands) + + case answers of + [] -> die "No perfect hash found, perhaps raise limits" + param : _ -> do + putStrLn (showParam param) + mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h)) + (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands]) diff --git a/TODO.txt b/TODO.txt index 83b866c..cd20675 100644 --- a/TODO.txt +++ b/TODO.txt @@ -6,4 +6,3 @@ - Use poll(2), not select(2) - Fix OOM dos vector - Sub-linear hash table of conn_data's in main.c -- Constant-time lookup of commands in command.c diff --git a/command.c b/command.c index 6e73ce3..e5b8ae3 100644 --- a/command.c +++ b/command.c @@ -390,25 +390,34 @@ struct cmd_info{ struct cmd_retval (*handler)(struct conn_data *data,const char *tag,const char **args); }; -static const struct cmd_info commands[]={ - {"version",1,false,cmd_version}, - {"register",2,true,cmd_register}, - {"login",2,true,cmd_login}, - {"logout",0,false,cmd_logout}, - {"list_rooms",0,false,cmd_list_rooms}, - {"list_members",1,false,cmd_list_members}, - {"create_room",0,false,cmd_create_room}, - {"invite",2,false,cmd_invite}, - {"send",2,true,cmd_send}, - {"history",2,false,cmd_history}, - {"history_before",3,false,cmd_history_before}, - {"ping",0,false,cmd_ping}, - {"is_online",1,false,cmd_is_online}, - {"firebase_token",1,false,cmd_firebase_token}, - {"delete_firebase_token",1,false,cmd_delete_firebase_token}, - {"user_active",1,false,cmd_user_active}, +// Use CommandHash.hs to re-generate this perfect hash function for a different +// list of commands. +#define COMMAND_HASH_MODULUS 31 +#define COMMAND_HASH(cmd0, len) ((cmd0 + 6 * len) % COMMAND_HASH_MODULUS) + +#define COMMAND_ENTRY(cmd0, cmd, nargs, longlast, handler) \ + [COMMAND_HASH(cmd0, strlen(cmd))] = {cmd, nargs, longlast, handler} + +// First argument to COMMAND_ENTRY must be command[0]. This is because +// apparently, "abc"[0] is not a constant expression, while strlen("abc") is. +static const struct cmd_info commands[COMMAND_HASH_MODULUS] = { + COMMAND_ENTRY('v', "version", 1, false, cmd_version), + COMMAND_ENTRY('r', "register", 2, true, cmd_register), + COMMAND_ENTRY('l', "login", 2, true, cmd_login), + COMMAND_ENTRY('l', "logout", 0, false, cmd_logout), + COMMAND_ENTRY('l', "list_rooms", 0, false, cmd_list_rooms), + COMMAND_ENTRY('l', "list_members", 1, false, cmd_list_members), + COMMAND_ENTRY('c', "create_room", 0, false, cmd_create_room), + COMMAND_ENTRY('i', "invite", 2, false, cmd_invite), + COMMAND_ENTRY('s', "send", 2, true, cmd_send), + COMMAND_ENTRY('h', "history", 2, false, cmd_history), + COMMAND_ENTRY('h', "history_before", 3, false, cmd_history_before), + COMMAND_ENTRY('p', "ping", 0, false, cmd_ping), + COMMAND_ENTRY('i', "is_online", 1, false, cmd_is_online), + COMMAND_ENTRY('f', "firebase_token", 1, false, cmd_firebase_token), + COMMAND_ENTRY('d', "delete_firebase_token", 1, false, cmd_delete_firebase_token), + COMMAND_ENTRY('u', "user_active", 1, false, cmd_user_active), }; -#define NCOMMANDS (sizeof(commands)/sizeof(commands[0])) bool handle_input_line(struct conn_data *data,char *line,size_t linelen){ @@ -427,36 +436,31 @@ bool handle_input_line(struct conn_data *data,char *line,size_t linelen){ sepp=memchr(line,' ',linelen); if(sepp==NULL)sepp=line+linelen; const size_t cmdlen=sepp-line; - size_t cmdi; - for(cmdi=0;cmdicmdname + ||cmdlen!=strlen(command->cmdname) + ||memcmp(line,command->cmdname,cmdlen)!=0){ debug("Unknown command %s on connection %d",line,data->fd); return true; } // Ensure first command is 'version' - if(data->protversion==-1&&strcmp(commands[cmdi].cmdname,"version")!=0){ + if(data->protversion==-1&&command->handler!=cmd_version){ debug("Command %s before version negotiation on connection %d", - commands[cmdi].cmdname,data->fd); + command->cmdname,data->fd); return true; } - int nargs=commands[cmdi].nargs; - char *args[nargs]; + const int nargs=command->nargs; + const char *args[nargs]; size_t cursor=cmdlen+1; for(int i=0;ilinelen){ - debug("Connection %d sent too few parameters to command %s",data->fd,commands[cmdi].cmdname); + debug("Connection %d sent too few parameters to command %s",data->fd,command->cmdname); return true; } - if(i==nargs-1&&commands[cmdi].longlast){ + if(i==nargs-1&&command->longlast){ sepp=line+linelen; } else { sepp=memchr(line+cursor,' ',linelen-cursor); @@ -467,11 +471,11 @@ bool handle_input_line(struct conn_data *data,char *line,size_t linelen){ cursor=sepp-line+1; } if(sepp-line<(i64)linelen){ - debug("Connection %d sent too many parameters to command %s",data->fd,commands[cmdi].cmdname); + debug("Connection %d sent too many parameters to command %s",data->fd,command->cmdname); return true; } - struct cmd_retval retval=commands[cmdi].handler(data,tag,(const char**)args); + struct cmd_retval retval=command->handler(data,tag,(const char**)args); if(retval.memzero)sodium_memzero(line,linelen); return retval.socket_close; } -- cgit v1.2.3-54-g00ecf