aboutsummaryrefslogtreecommitdiff
path: root/hashtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'hashtable.c')
-rw-r--r--hashtable.c149
1 files changed, 149 insertions, 0 deletions
diff --git a/hashtable.c b/hashtable.c
new file mode 100644
index 0000000..ea0f0fc
--- /dev/null
+++ b/hashtable.c
@@ -0,0 +1,149 @@
+#include <limits.h>
+#include "hashtable.h"
+#include "global.h"
+
+
+// Generated by starting with 5, then repeatedly taking the smallest prime at
+// least double the previous number.
+// This array must end well before UINT64_MAX, so that the occupancy percentage
+// computation does not overflow.
+static const size_t doubling_primes[] = {
+ 5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437,
+ 102877, 205759, 411527, 823117, 1646237, 3292489, 6584983, 13169977,
+ 26339969, 52679969, 105359939, 210719881, 421439783, 842879579
+};
+#define NUM_DOUBLING_PRIMES ((size_t)(sizeof(doubling_primes) / sizeof(doubling_primes[0])))
+
+
+struct keyvalue {
+ unsigned int key;
+ void *value;
+};
+
+struct bucket {
+ int cap, len;
+ struct keyvalue *pairs;
+};
+
+struct hashtable {
+ // modulus == doubling_primes[modulus_index]
+ size_t modulus_index, modulus;
+ size_t num_values;
+ struct bucket *table;
+};
+
+static void ht_nullify(struct hashtable *ht) {
+ for (size_t i = 0; i < ht->modulus; i++) {
+ free(ht->table[i].pairs); // noop if values == NULL
+ }
+ free(ht->table);
+}
+
+static void ht_grow(struct hashtable *ht) {
+ static bool nested_grow = false;
+
+ // See the ht_insert() call below
+ if (nested_grow) {
+ die("Unexpected nested ht_grow() in ht_insert()");
+ }
+
+ if (ht->modulus_index == NUM_DOUBLING_PRIMES - 1) {
+ // TODO: fail more gracefully
+ die("Hashtable full, cannot insert");
+ }
+
+ struct hashtable newht;
+ newht.modulus_index = ht->modulus_index + 1;
+ newht.modulus = doubling_primes[newht.modulus_index];
+ newht.num_values = 0;
+ newht.table = calloc(newht.modulus, struct bucket);
+
+ debug("ht: growing from %zu to %zu", ht->modulus, newht.modulus);
+
+ for (size_t i = 0; i < ht->modulus; i++) {
+ const struct bucket *bucket = &ht->table[i];
+ for (int j = 0; j < bucket->len; j++) {
+ const struct keyvalue *pair = &bucket->pairs[j];
+ // A priori, this could recursively call ht_grow() on the new
+ // table. However, this is in practice impossible, because growing
+ // occurs at 60% occupancy, and we only just surpassed that with
+ // the old size -- and we just doubled the size. Therefore,
+ // recursively arriving here in ht_grow() should be impossible.
+ // This is dynamically checked using the nested_grow static
+ // boolean.
+ ht_insert(&newht, pair->key, pair->value);
+ }
+ }
+
+ ht_nullify(ht);
+ *ht = newht;
+}
+
+struct hashtable* ht_alloc() {
+ struct hashtable *ht = malloc(1, struct hashtable);
+ ht->modulus_index = 0;
+ ht->modulus = doubling_primes[ht->modulus_index];
+ ht->num_values = 0;
+ ht->table = calloc(ht->modulus, struct bucket);
+ return ht;
+}
+
+void ht_free(struct hashtable *ht) {
+ ht_nullify(ht);
+ free(ht);
+}
+
+void ht_insert(struct hashtable *ht, unsigned int key, void *value) {
+ struct bucket *bucket = &ht->table[key % ht->modulus];
+
+ debug("ht: insert num_values=%zu key=%u bucket: cap=%d len=%d", ht->num_values, key, bucket->cap, bucket->len);
+
+ if (bucket->cap == 0) {
+ bucket->cap = 1;
+ bucket->pairs = malloc(bucket->cap, struct keyvalue);
+ } else if (bucket->len == bucket->cap) {
+ if (bucket->cap > INT_MAX / 2) {
+ die("Hashtable has degraded so far it's not funny anymore");
+ }
+ bucket->cap *= 2;
+ bucket->pairs = realloc(bucket->pairs, bucket->cap, struct keyvalue);
+ }
+
+ bucket->pairs[bucket->len].key = key;
+ bucket->pairs[bucket->len].value = value;
+ bucket->len++;
+ ht->num_values++;
+
+ // Reallocate at >60% occupation (counting colliding keys double).
+ // Note that this does not overflow since the last entry in doubling_primes
+ // is only on the order of 10^9.
+ if (5 * ht->num_values > 3 * ht->modulus) {
+ ht_grow(ht);
+ }
+}
+
+void* ht_find(const struct hashtable *ht, unsigned int key) {
+ struct bucket *bucket = &ht->table[key % ht->modulus];
+ for (int i = 0; i < bucket->len; i++) {
+ if (bucket->pairs[i].key == key) {
+ return bucket->pairs[i].value;
+ }
+ }
+ return NULL;
+}
+
+void ht_delete(struct hashtable *ht, unsigned int key) {
+ struct bucket *bucket = &ht->table[key % ht->modulus];
+ for (int i = 0; i < bucket->len; i++) {
+ if (bucket->pairs[i].key == key) {
+ debug("ht: delete key=%u bucket: cap=%d len=%d i=%d", key, bucket->cap, bucket->len, i);
+ if (i < bucket->len - 1) {
+ bucket->pairs[i] = bucket->pairs[bucket->len - 1];
+ }
+ bucket->len--;
+ ht->num_values--;
+ return;
+ }
+ }
+ debug("ht: delete key=%u not found", key);
+}