aboutsummaryrefslogtreecommitdiff
path: root/hashtable.c
blob: bfda316df33838ab9b955e5ad587078307151859 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#include <limits.h>
#include "hashtable.h"
#include "global.h"


// Generated by starting with 5, then repeatedly taking the smallest prime at
// least double the previous number.
// This array must end well before UINT64_MAX, so that the occupancy percentage
// computation does not overflow.
static const size_t doubling_primes[] = {
	5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437,
	102877, 205759, 411527, 823117, 1646237, 3292489, 6584983, 13169977,
	26339969, 52679969, 105359939, 210719881, 421439783, 842879579
};
#define NUM_DOUBLING_PRIMES ((size_t)(sizeof(doubling_primes) / sizeof(doubling_primes[0])))


struct keyvalue {
	unsigned int key;
	void *value;
};

struct bucket {
	int cap, len;
	struct keyvalue *pairs;
};

struct hashtable {
	// modulus == doubling_primes[modulus_index]
	size_t modulus_index, modulus;
	size_t num_values;
	struct bucket *table;
};

static void ht_grow(struct hashtable *ht) {
	static bool nested_grow = false;

	// See the ht_insert() call below
	if (nested_grow) {
		die("Unexpected nested ht_grow() in ht_insert()");
	}

	if (ht->modulus_index == NUM_DOUBLING_PRIMES - 1) {
		// TODO: fail more gracefully
		die("Hashtable full, cannot insert");
	}

	struct hashtable newht;
	newht.modulus_index = ht->modulus_index + 1;
	newht.modulus = doubling_primes[newht.modulus_index];
	newht.num_values = 0;
	newht.table = calloc(newht.modulus, struct bucket);

	for (size_t i = 0; i < ht->modulus; i++) {
		const struct bucket *bucket = &ht->table[i];
		for (int j = 0; j < bucket->len; j++) {
			const struct keyvalue *pair = &bucket->pairs[j];
			// A priori, this could recursively call ht_grow() on the new
			// table. However, this is in practice impossible, because growing
			// occurs at 60% occupancy, and we only just surpassed that with
			// the old size -- and we just doubled the size. Therefore,
			// recursively arriving here in ht_grow() should be impossible.
			// This is dynamically checked using the nested_grow static
			// boolean.
			ht_insert(&newht, pair->key, pair->value);
		}

		free(bucket->pairs);  // noop if pairs == NULL
	}

	free(ht->table);

	*ht = newht;
}

struct hashtable* ht_alloc() {
	struct hashtable *ht = malloc(1, struct hashtable);
	ht->modulus_index = 0;
	ht->modulus = doubling_primes[ht->modulus_index];
	ht->num_values = 0;
	ht->table = calloc(ht->modulus, struct bucket);
	return ht;
}

void ht_free(struct hashtable *ht) {
	for (size_t i = 0; i < ht->modulus; i++) {
		free(ht->table[i].pairs); // noop if pairs == NULL
	}
	free(ht->table);
	free(ht);
}

void ht_insert(struct hashtable *ht, unsigned int key, void *value) {
	struct bucket *bucket = &ht->table[key % ht->modulus];

	if (bucket->cap == 0) {
		bucket->cap = 1;
		bucket->pairs = malloc(bucket->cap, struct keyvalue);
	} else if (bucket->len == bucket->cap) {
		if (bucket->cap > INT_MAX / 2) {
			die("Hashtable has degraded so far it's not funny anymore");
		}
		bucket->cap *= 2;
		bucket->pairs = realloc(bucket->pairs, bucket->cap, struct keyvalue);
	}

	bucket->pairs[bucket->len].key = key;
	bucket->pairs[bucket->len].value = value;
	bucket->len++;
	ht->num_values++;

	// Reallocate at >60% occupation (counting colliding keys double).
	// Note that this does not overflow since the last entry in doubling_primes
	// is only on the order of 10^9.
	if (5 * ht->num_values > 3 * ht->modulus) {
		ht_grow(ht);
	}
}

void* ht_find(const struct hashtable *ht, unsigned int key) {
	struct bucket *bucket = &ht->table[key % ht->modulus];
	for (int i = 0; i < bucket->len; i++) {
		if (bucket->pairs[i].key == key) {
			return bucket->pairs[i].value;
		}
	}
	return NULL;
}

void ht_delete(struct hashtable *ht, unsigned int key) {
	struct bucket *bucket = &ht->table[key % ht->modulus];
	for (int i = 0; i < bucket->len; i++) {
		if (bucket->pairs[i].key == key) {
			if (i < bucket->len - 1) {
				bucket->pairs[i] = bucket->pairs[bucket->len - 1];
			}
			bucket->len--;
			ht->num_values--;
			return;
		}
	}
}