From f695f9046b5793796ebd703ad816f19637403473 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Wed, 1 Jul 2020 23:11:05 +0200 Subject: Growing hash table of conn_data's in main --- TODO.txt | 1 - hashtable.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hashtable.h | 15 ++++++ main.c | 53 +++++++++++---------- 4 files changed, 190 insertions(+), 28 deletions(-) create mode 100644 hashtable.c create mode 100644 hashtable.h diff --git a/TODO.txt b/TODO.txt index cd20675..1f09bca 100644 --- a/TODO.txt +++ b/TODO.txt @@ -5,4 +5,3 @@ - A session is marked inactive 2 minutes after the latest activity on that session. Make the number "2" configurable and not a hard-coded constant. - Use poll(2), not select(2) - Fix OOM dos vector -- Sub-linear hash table of conn_data's in main.c diff --git a/hashtable.c b/hashtable.c new file mode 100644 index 0000000..ea0f0fc --- /dev/null +++ b/hashtable.c @@ -0,0 +1,149 @@ +#include +#include "hashtable.h" +#include "global.h" + + +// Generated by starting with 5, then repeatedly taking the smallest prime at +// least double the previous number. +// This array must end well before UINT64_MAX, so that the occupancy percentage +// computation does not overflow. +static const size_t doubling_primes[] = { + 5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437, + 102877, 205759, 411527, 823117, 1646237, 3292489, 6584983, 13169977, + 26339969, 52679969, 105359939, 210719881, 421439783, 842879579 +}; +#define NUM_DOUBLING_PRIMES ((size_t)(sizeof(doubling_primes) / sizeof(doubling_primes[0]))) + + +struct keyvalue { + unsigned int key; + void *value; +}; + +struct bucket { + int cap, len; + struct keyvalue *pairs; +}; + +struct hashtable { + // modulus == doubling_primes[modulus_index] + size_t modulus_index, modulus; + size_t num_values; + struct bucket *table; +}; + +static void ht_nullify(struct hashtable *ht) { + for (size_t i = 0; i < ht->modulus; i++) { + free(ht->table[i].pairs); // noop if values == NULL + } + free(ht->table); +} + +static void ht_grow(struct hashtable *ht) { + static bool nested_grow = false; + + // See the ht_insert() call below + if (nested_grow) { + die("Unexpected nested ht_grow() in ht_insert()"); + } + + if (ht->modulus_index == NUM_DOUBLING_PRIMES - 1) { + // TODO: fail more gracefully + die("Hashtable full, cannot insert"); + } + + struct hashtable newht; + newht.modulus_index = ht->modulus_index + 1; + newht.modulus = doubling_primes[newht.modulus_index]; + newht.num_values = 0; + newht.table = calloc(newht.modulus, struct bucket); + + debug("ht: growing from %zu to %zu", ht->modulus, newht.modulus); + + for (size_t i = 0; i < ht->modulus; i++) { + const struct bucket *bucket = &ht->table[i]; + for (int j = 0; j < bucket->len; j++) { + const struct keyvalue *pair = &bucket->pairs[j]; + // A priori, this could recursively call ht_grow() on the new + // table. However, this is in practice impossible, because growing + // occurs at 60% occupancy, and we only just surpassed that with + // the old size -- and we just doubled the size. Therefore, + // recursively arriving here in ht_grow() should be impossible. + // This is dynamically checked using the nested_grow static + // boolean. + ht_insert(&newht, pair->key, pair->value); + } + } + + ht_nullify(ht); + *ht = newht; +} + +struct hashtable* ht_alloc() { + struct hashtable *ht = malloc(1, struct hashtable); + ht->modulus_index = 0; + ht->modulus = doubling_primes[ht->modulus_index]; + ht->num_values = 0; + ht->table = calloc(ht->modulus, struct bucket); + return ht; +} + +void ht_free(struct hashtable *ht) { + ht_nullify(ht); + free(ht); +} + +void ht_insert(struct hashtable *ht, unsigned int key, void *value) { + struct bucket *bucket = &ht->table[key % ht->modulus]; + + debug("ht: insert num_values=%zu key=%u bucket: cap=%d len=%d", ht->num_values, key, bucket->cap, bucket->len); + + if (bucket->cap == 0) { + bucket->cap = 1; + bucket->pairs = malloc(bucket->cap, struct keyvalue); + } else if (bucket->len == bucket->cap) { + if (bucket->cap > INT_MAX / 2) { + die("Hashtable has degraded so far it's not funny anymore"); + } + bucket->cap *= 2; + bucket->pairs = realloc(bucket->pairs, bucket->cap, struct keyvalue); + } + + bucket->pairs[bucket->len].key = key; + bucket->pairs[bucket->len].value = value; + bucket->len++; + ht->num_values++; + + // Reallocate at >60% occupation (counting colliding keys double). + // Note that this does not overflow since the last entry in doubling_primes + // is only on the order of 10^9. + if (5 * ht->num_values > 3 * ht->modulus) { + ht_grow(ht); + } +} + +void* ht_find(const struct hashtable *ht, unsigned int key) { + struct bucket *bucket = &ht->table[key % ht->modulus]; + for (int i = 0; i < bucket->len; i++) { + if (bucket->pairs[i].key == key) { + return bucket->pairs[i].value; + } + } + return NULL; +} + +void ht_delete(struct hashtable *ht, unsigned int key) { + struct bucket *bucket = &ht->table[key % ht->modulus]; + for (int i = 0; i < bucket->len; i++) { + if (bucket->pairs[i].key == key) { + debug("ht: delete key=%u bucket: cap=%d len=%d i=%d", key, bucket->cap, bucket->len, i); + if (i < bucket->len - 1) { + bucket->pairs[i] = bucket->pairs[bucket->len - 1]; + } + bucket->len--; + ht->num_values--; + return; + } + } + debug("ht: delete key=%u not found", key); +} diff --git a/hashtable.h b/hashtable.h new file mode 100644 index 0000000..b85c2e0 --- /dev/null +++ b/hashtable.h @@ -0,0 +1,15 @@ +#pragma once + + +struct hashtable; + +struct hashtable* ht_alloc(); +void ht_free(struct hashtable *ht); + +void ht_insert(struct hashtable *ht, unsigned int key, void *value); + +// Returns NULL if key is not present in the table. +void* ht_find(const struct hashtable *ht, unsigned int key); + +// Does nothing if the key is not present in the table. +void ht_delete(struct hashtable *ht, unsigned int key); diff --git a/main.c b/main.c index 03bf7b0..b925cd1 100644 --- a/main.c +++ b/main.c @@ -19,6 +19,7 @@ #include "plugin.h" #include "runloop.h" #include "user_data.h" +#include "hashtable.h" #define PORT (29536) // python: int("msg",36) @@ -39,39 +40,35 @@ static int create_server_socket(void){ return sock; } -struct hash_item{ - struct conn_data cd; - struct hash_item *next; -}; +static struct hashtable *conn_hash = NULL; -#define CONN_HASH_SIZE (16) -static struct hash_item *conn_hash[CONN_HASH_SIZE]; +static void conn_hash_init(void) { + conn_hash = ht_alloc(); +} + +static void add_conn_data(int fd, struct conn_data *item) { + ht_insert(conn_hash, fd, item); +} static struct conn_data* find_conn_data(int fd){ - struct hash_item *item=conn_hash[fd%CONN_HASH_SIZE]; - while(item&&item->cd.fd!=fd)item=item->next; + struct conn_data *item = (struct conn_data*)ht_find(conn_hash, fd); assert(item); - return &item->cd; + return item; } static void delete_conn_data(int fd){ debug("Deleting conn_data for fd=%d",fd); - struct hash_item *item=conn_hash[fd%CONN_HASH_SIZE]; - assert(item); - struct hash_item *parent=NULL; - while(item&&item->cd.fd!=fd){ - parent=item; - item=item->next; - } - assert(item); - if(item->cd.userid!=-1){ - userdata_unregister(item->cd.userid,fd); - broadcast_online_change(item->cd.userid); + + struct conn_data *item = find_conn_data(fd); + + if (item->userid != -1) { + userdata_unregister(item->userid, fd); + broadcast_online_change(item->userid); } - conn_data_nullify(&item->cd); - if(parent)parent->next=item->next; - else conn_hash[fd%CONN_HASH_SIZE]=item->next; + conn_data_nullify(item); free(item); + + ht_delete(conn_hash, fd); } static bool client_socket_callback(int fd){ @@ -116,10 +113,10 @@ static bool server_socket_callback(int fd){ if(sock<0)die_perror("accept"); runloop_add_fd(sock,client_socket_callback,true); - struct hash_item *item=malloc(1,struct hash_item); - conn_data_init(&item->cd,sock); - item->next=conn_hash[sock%CONN_HASH_SIZE]; - conn_hash[sock%CONN_HASH_SIZE]=item; + struct conn_data *item = malloc(1, struct conn_data); + conn_data_init(item, sock); + add_conn_data(sock, item); + debug("Added conn_data for fd=%d",sock); return false; } @@ -167,6 +164,8 @@ int main(int argc,char **argv){ signal(SIGPIPE,signal_handler); + conn_hash_init(); + plugin_init(); for(int i=1;i