aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2020-06-29 20:32:42 +0200
committerTom Smeding <tom.smeding@gmail.com>2020-06-29 20:32:42 +0200
commitdaca7b9549b8426af1678d039af2f854588a5edd (patch)
treeb58f7553fc34c7ea2e8ef515c0302cb1de81e515
parent27b2baf8423dda719ddac4926c43bdddb427ee1f (diff)
server: Perfect hashing of commands
-rw-r--r--.gitignore2
-rw-r--r--CommandHash.hs89
-rw-r--r--TODO.txt1
-rw-r--r--command.c74
4 files changed, 130 insertions, 36 deletions
diff --git a/.gitignore b/.gitignore
index db08d13..8cd07d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ db.db
*.so
firebaseServiceAccountKey.json
compile_commands.json
+CommandHash
+CommandHash.hi
diff --git a/CommandHash.hs b/CommandHash.hs
new file mode 100644
index 0000000..42f0a99
--- /dev/null
+++ b/CommandHash.hs
@@ -0,0 +1,89 @@
+{-| Perfect hash generator for the commands in command.c.
+
+To re-generate the hash after a new command is added, write the list of all
+commands (one per line) to the stdin of this program. If a perfect hash can be
+found with the current maximum values, the hash expression will be printed,
+along with the hash values of all commands to show that they are really
+distinct.
+
+The general hash form considered here is an integer linear combination of the
+length of the command and first N characters of the command (where N <= the
+minimum command length), modulo some small integer.
+
+The possible parameters are generated in the function paramSpace. If no
+possible hash is found, perhaps the limits in paramSpace need to be raised.
+
+Compile this program using:
+ $ ghc -O3 CommandHash.hs
+-}
+module Main where
+
+import Data.Char
+import Data.Ord
+import Data.List
+import System.Exit
+
+
+data Param = Param { pCharCoefs :: [Int]
+ , pLenCoef :: Int
+ , pModulus :: Int }
+ deriving (Show)
+
+showParam :: Param -> String
+showParam par =
+ "(" ++ intercalate " + " [show c ++ "*s[" ++ show i ++ "]"
+ | (i, c) <- zip [0..] (pCharCoefs par)]
+ ++ " + " ++ show (pLenCoef par) ++ "*strlen(s)) % "
+ ++ show (pModulus par)
+
+hash :: Param -> String -> Int
+hash par str =
+ let chars = zipWith (*) (map ord str) (pCharCoefs par)
+ in (sum chars + pLenCoef par * length str) `mod` pModulus par
+
+cartesianProduct :: [[a]] -> [[a]]
+cartesianProduct [] = [[]]
+cartesianProduct (l:ls) = [x : rest | x <- l, rest <- cartesianProduct ls]
+
+allDistinct :: Ord a => [a] -> Bool
+allDistinct l = let s = sort l in all (uncurry (/=)) (zip s (tail s))
+
+paramSpace :: [String] -> [Param]
+paramSpace commands =
+ let -- These are limits on the maximum coefficient and the maximum modulus,
+ -- respectively. If no perfect hashes are found at all, these may need
+ -- to be raised.
+ maxCoef = 10
+ maxMod = 64
+
+ minCmdLen = minimum (map length commands)
+ numCommands = length commands
+ in [Param coefs lenCoef modulus
+ -- Earlier variables are chosen first: an earlier variable is only
+ -- increased if no perfect hash can be found for any of the possible
+ -- values for the later variables.
+ -- Currently, the number of coefficients (the command prefix inspected)
+ -- is considered most important, then the modulus, then the coefficients
+ -- themselves.
+ | numCoefs <- [1..minCmdLen]
+ , modulus <- [numCommands .. maxMod]
+ , coefs <- cartesianProduct (replicate numCoefs [0..maxCoef])
+ , lenCoef <- [0..maxCoef]
+ ]
+
+main :: IO ()
+main = do
+ -- Read in the commands
+ commands <- lines <$> getContents
+
+ -- Compute (lazily) all parameters that produce perfect hashes for this set
+ -- of commands.
+ let answers = filter (\par -> allDistinct (map (hash par) commands))
+ (paramSpace commands)
+
+ case answers of
+ [] -> die "No perfect hash found, perhaps raise limits"
+ param : _ -> do
+ putStrLn (showParam param)
+ mapM_ (\(cmd, h) -> putStrLn (show cmd ++ " -> " ++ show h))
+ (sortBy (comparing snd) [(cmd, hash param cmd) | cmd <- commands])
diff --git a/TODO.txt b/TODO.txt
index 83b866c..cd20675 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -6,4 +6,3 @@
- Use poll(2), not select(2)
- Fix OOM dos vector
- Sub-linear hash table of conn_data's in main.c
-- Constant-time lookup of commands in command.c
diff --git a/command.c b/command.c
index 6e73ce3..e5b8ae3 100644
--- a/command.c
+++ b/command.c
@@ -390,25 +390,34 @@ struct cmd_info{
struct cmd_retval (*handler)(struct conn_data *data,const char *tag,const char **args);
};
-static const struct cmd_info commands[]={
- {"version",1,false,cmd_version},
- {"register",2,true,cmd_register},
- {"login",2,true,cmd_login},
- {"logout",0,false,cmd_logout},
- {"list_rooms",0,false,cmd_list_rooms},
- {"list_members",1,false,cmd_list_members},
- {"create_room",0,false,cmd_create_room},
- {"invite",2,false,cmd_invite},
- {"send",2,true,cmd_send},
- {"history",2,false,cmd_history},
- {"history_before",3,false,cmd_history_before},
- {"ping",0,false,cmd_ping},
- {"is_online",1,false,cmd_is_online},
- {"firebase_token",1,false,cmd_firebase_token},
- {"delete_firebase_token",1,false,cmd_delete_firebase_token},
- {"user_active",1,false,cmd_user_active},
+// Use CommandHash.hs to re-generate this perfect hash function for a different
+// list of commands.
+#define COMMAND_HASH_MODULUS 31
+#define COMMAND_HASH(cmd0, len) ((cmd0 + 6 * len) % COMMAND_HASH_MODULUS)
+
+#define COMMAND_ENTRY(cmd0, cmd, nargs, longlast, handler) \
+ [COMMAND_HASH(cmd0, strlen(cmd))] = {cmd, nargs, longlast, handler}
+
+// First argument to COMMAND_ENTRY must be command[0]. This is because
+// apparently, "abc"[0] is not a constant expression, while strlen("abc") is.
+static const struct cmd_info commands[COMMAND_HASH_MODULUS] = {
+ COMMAND_ENTRY('v', "version", 1, false, cmd_version),
+ COMMAND_ENTRY('r', "register", 2, true, cmd_register),
+ COMMAND_ENTRY('l', "login", 2, true, cmd_login),
+ COMMAND_ENTRY('l', "logout", 0, false, cmd_logout),
+ COMMAND_ENTRY('l', "list_rooms", 0, false, cmd_list_rooms),
+ COMMAND_ENTRY('l', "list_members", 1, false, cmd_list_members),
+ COMMAND_ENTRY('c', "create_room", 0, false, cmd_create_room),
+ COMMAND_ENTRY('i', "invite", 2, false, cmd_invite),
+ COMMAND_ENTRY('s', "send", 2, true, cmd_send),
+ COMMAND_ENTRY('h', "history", 2, false, cmd_history),
+ COMMAND_ENTRY('h', "history_before", 3, false, cmd_history_before),
+ COMMAND_ENTRY('p', "ping", 0, false, cmd_ping),
+ COMMAND_ENTRY('i', "is_online", 1, false, cmd_is_online),
+ COMMAND_ENTRY('f', "firebase_token", 1, false, cmd_firebase_token),
+ COMMAND_ENTRY('d', "delete_firebase_token", 1, false, cmd_delete_firebase_token),
+ COMMAND_ENTRY('u', "user_active", 1, false, cmd_user_active),
};
-#define NCOMMANDS (sizeof(commands)/sizeof(commands[0]))
bool handle_input_line(struct conn_data *data,char *line,size_t linelen){
@@ -427,36 +436,31 @@ bool handle_input_line(struct conn_data *data,char *line,size_t linelen){
sepp=memchr(line,' ',linelen);
if(sepp==NULL)sepp=line+linelen;
const size_t cmdlen=sepp-line;
- size_t cmdi;
- for(cmdi=0;cmdi<NCOMMANDS;cmdi++){
- if(cmdlen==strlen(commands[cmdi].cmdname)&&
- memcmp(line,commands[cmdi].cmdname,cmdlen)==0){
- break;
- }
- }
-
- if(cmdi==NCOMMANDS){
+ const struct cmd_info *command=&commands[COMMAND_HASH(line[0],cmdlen)];
+ if(!command->cmdname
+ ||cmdlen!=strlen(command->cmdname)
+ ||memcmp(line,command->cmdname,cmdlen)!=0){
debug("Unknown command %s on connection %d",line,data->fd);
return true;
}
// Ensure first command is 'version'
- if(data->protversion==-1&&strcmp(commands[cmdi].cmdname,"version")!=0){
+ if(data->protversion==-1&&command->handler!=cmd_version){
debug("Command %s before version negotiation on connection %d",
- commands[cmdi].cmdname,data->fd);
+ command->cmdname,data->fd);
return true;
}
- int nargs=commands[cmdi].nargs;
- char *args[nargs];
+ const int nargs=command->nargs;
+ const char *args[nargs];
size_t cursor=cmdlen+1;
for(int i=0;i<nargs;i++){
if(cursor>linelen){
- debug("Connection %d sent too few parameters to command %s",data->fd,commands[cmdi].cmdname);
+ debug("Connection %d sent too few parameters to command %s",data->fd,command->cmdname);
return true;
}
- if(i==nargs-1&&commands[cmdi].longlast){
+ if(i==nargs-1&&command->longlast){
sepp=line+linelen;
} else {
sepp=memchr(line+cursor,' ',linelen-cursor);
@@ -467,11 +471,11 @@ bool handle_input_line(struct conn_data *data,char *line,size_t linelen){
cursor=sepp-line+1;
}
if(sepp-line<(i64)linelen){
- debug("Connection %d sent too many parameters to command %s",data->fd,commands[cmdi].cmdname);
+ debug("Connection %d sent too many parameters to command %s",data->fd,command->cmdname);
return true;
}
- struct cmd_retval retval=commands[cmdi].handler(data,tag,(const char**)args);
+ struct cmd_retval retval=command->handler(data,tag,(const char**)args);
if(retval.memzero)sodium_memzero(line,linelen);
return retval.socket_close;
}