#include #include #include "hashtable.h" // Generated by starting with 5, then repeatedly taking the smallest prime at // least double the previous number. // This array must end well before UINT64_MAX, so that the occupancy percentage // computation does not overflow. static const size_t doubling_primes[] = { 5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437, 102877, 205759, 411527, 823117, 1646237, 3292489, 6584983, 13169977, 26339969, 52679969, 105359939, 210719881, 421439783, 842879579 }; #define NUM_DOUBLING_PRIMES ((size_t)(sizeof(doubling_primes) / sizeof(doubling_primes[0]))) struct keyvalue { u64 key; void *value; }; struct bucket { int cap, len; struct keyvalue *pairs; }; struct hashtable { // modulus == doubling_primes[modulus_index] size_t modulus_index, modulus; size_t num_values; struct bucket *table; }; static void ht_grow(struct hashtable *ht) { static bool nested_grow = false; // See the ht_insert() call below if (nested_grow) { die("Unexpected nested ht_grow() in ht_insert()"); } if (ht->modulus_index == NUM_DOUBLING_PRIMES - 1) { // TODO: fail more gracefully die("Hashtable full, cannot insert"); } struct hashtable newht; newht.modulus_index = ht->modulus_index + 1; newht.modulus = doubling_primes[newht.modulus_index]; newht.num_values = 0; newht.table = calloc(newht.modulus, struct bucket); for (size_t i = 0; i < ht->modulus; i++) { const struct bucket *bucket = &ht->table[i]; for (int j = 0; j < bucket->len; j++) { const struct keyvalue *pair = &bucket->pairs[j]; // A priori, this could recursively call ht_grow() on the new // table. However, this is in practice impossible, because growing // occurs at 60% occupancy, and we only just surpassed that with // the old size -- and we just doubled the size. Therefore, // recursively arriving here in ht_grow() should be impossible. // This is dynamically checked using the nested_grow static // boolean. ht_insert(&newht, pair->key, pair->value); } free(bucket->pairs); // noop if pairs == NULL } free(ht->table); *ht = newht; } struct hashtable* ht_alloc(void) { struct hashtable *ht = malloc(1, struct hashtable); ht->modulus_index = 0; ht->modulus = doubling_primes[ht->modulus_index]; ht->num_values = 0; ht->table = calloc(ht->modulus, struct bucket); return ht; } void ht_free(struct hashtable *ht) { for (size_t i = 0; i < ht->modulus; i++) { free(ht->table[i].pairs); // noop if pairs == NULL } free(ht->table); free(ht); } void ht_insert(struct hashtable *ht, u64 key, void *value) { assert(value != NULL); struct bucket *bucket = &ht->table[key % ht->modulus]; // If key already exists, overwrite that for (int i = 0; i < bucket->len; i++) { if (bucket->pairs[i].key == key) { bucket->pairs[i].value = value; return; } } if (bucket->cap == 0) { bucket->cap = 1; bucket->pairs = malloc(bucket->cap, struct keyvalue); } else if (bucket->len == bucket->cap) { if (bucket->cap > INT_MAX / 2) { die("Hashtable has degraded so far it's not funny anymore"); } bucket->cap *= 2; bucket->pairs = realloc(bucket->pairs, bucket->cap, struct keyvalue); } bucket->pairs[bucket->len].key = key; bucket->pairs[bucket->len].value = value; bucket->len++; ht->num_values++; // Reallocate at >60% occupation (counting colliding keys double). // Note that this does not overflow since the last entry in doubling_primes // is only on the order of 10^9. if (5 * ht->num_values > 3 * ht->modulus) { ht_grow(ht); } } void* ht_find(const struct hashtable *ht, u64 key) { struct bucket *bucket = &ht->table[key % ht->modulus]; for (int i = 0; i < bucket->len; i++) { if (bucket->pairs[i].key == key) { return bucket->pairs[i].value; } } return NULL; } void ht_delete(struct hashtable *ht, u64 key) { struct bucket *bucket = &ht->table[key % ht->modulus]; for (int i = 0; i < bucket->len; i++) { if (bucket->pairs[i].key == key) { if (i < bucket->len - 1) { bucket->pairs[i] = bucket->pairs[bucket->len - 1]; } bucket->len--; ht->num_values--; return; } } }