blob: 9a4c0b5527265bb5455ac15abc08d2d9a7d6ccea [file] [log] [blame]
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07001/* ----------------------------------------------------------------------- *
Cyrill Gorcunov1598a232009-10-11 13:17:52 +04002 *
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -08003 * Copyright 1996-2018 The NASM Authors - All Rights Reserved
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07004 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
Cyrill Gorcunov1598a232009-10-11 13:17:52 +040017 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -070018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * ----------------------------------------------------------------------- */
33
H. Peter Anvincde08292007-09-13 23:34:21 -070034/*
35 * hashtbl.c
36 *
37 * Efficient dictionary hash table class.
38 */
39
H. Peter Anvinfe501952007-10-02 21:53:51 -070040#include "compiler.h"
41
H. Peter Anvincde08292007-09-13 23:34:21 -070042#include "nasm.h"
43#include "hashtbl.h"
44
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080045#define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */
46#define HASH_INIT_SIZE 16 /* Initial size (power of 2, min 4) */
H. Peter Anvincde08292007-09-13 23:34:21 -070047
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080048#define hash_calc(key,keylen) crc64b(CRC64_INIT, (key), (keylen))
49#define hash_calci(key,keylen) crc64ib(CRC64_INIT, (key), (keylen))
Cyrill Gorcunov2967fee2009-11-06 21:58:48 +030050#define hash_max_load(size) ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD)
51#define hash_expand(size) ((size) << 1)
52#define hash_mask(size) ((size) - 1)
53#define hash_pos(hash, mask) ((hash) & (mask))
54#define hash_inc(hash, mask) ((((hash) >> 32) & (mask)) | 1) /* always odd */
55#define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask))
56
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080057static void hash_init(struct hash_table *head)
H. Peter Anvincde08292007-09-13 23:34:21 -070058{
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080059 head->size = HASH_INIT_SIZE;
H. Peter Anvincde08292007-09-13 23:34:21 -070060 head->load = 0;
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080061 head->max_load = hash_max_load(head->size);
62 nasm_newn(head->table, head->size);
H. Peter Anvincde08292007-09-13 23:34:21 -070063}
64
65/*
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080066 * Find an entry in a hash table. The key can be any binary object.
H. Peter Anvincde08292007-09-13 23:34:21 -070067 *
68 * On failure, if "insert" is non-NULL, store data in that structure
69 * which can be used to insert that node using hash_add().
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080070 * See hash_add() for constraints on the uses of the insert object.
H. Peter Anvin97a23472007-09-16 17:57:25 -070071 *
72 * On success, return a pointer to the "data" element of the hash
73 * structure.
H. Peter Anvincde08292007-09-13 23:34:21 -070074 */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080075void **hash_findb(struct hash_table *head, const void *key,
76 size_t keylen, struct hash_insert *insert)
H. Peter Anvincde08292007-09-13 23:34:21 -070077{
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080078 struct hash_node *np = NULL;
79 struct hash_node *tbl = head->table;
80 uint64_t hash = hash_calc(key, keylen);
Cyrill Gorcunov2967fee2009-11-06 21:58:48 +030081 size_t mask = hash_mask(head->size);
82 size_t pos = hash_pos(hash, mask);
83 size_t inc = hash_inc(hash, mask);
H. Peter Anvincde08292007-09-13 23:34:21 -070084
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080085 if (likely(tbl)) {
86 while ((np = &tbl[pos])->key) {
87 if (hash == np->hash &&
88 keylen == np->keylen &&
89 !memcmp(key, np->key, keylen))
90 return &np->data;
91 pos = hash_pos_next(pos, inc, mask);
92 }
H. Peter Anvincde08292007-09-13 23:34:21 -070093 }
94
95 /* Not found. Store info for insert if requested. */
96 if (insert) {
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -080097 insert->node.hash = hash;
98 insert->node.key = key;
99 insert->node.keylen = keylen;
100 insert->node.data = NULL;
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400101 insert->head = head;
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400102 insert->where = np;
H. Peter Anvincde08292007-09-13 23:34:21 -0700103 }
104 return NULL;
105}
106
107/*
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800108 * Same as hash_findb(), but for a C string.
H. Peter Anvincde08292007-09-13 23:34:21 -0700109 */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800110void **hash_find(struct hash_table *head, const char *key,
111 struct hash_insert *insert)
H. Peter Anvin97a23472007-09-16 17:57:25 -0700112{
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800113 return hash_findb(head, key, strlen(key)+1, insert);
114}
115
116/*
117 * Same as hash_findb(), but for case-insensitive hashing.
118 */
119void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
120 struct hash_insert *insert)
121{
122 struct hash_node *np = NULL;
123 struct hash_node *tbl = head->table;
124 uint64_t hash = hash_calci(key, keylen);
Cyrill Gorcunov2967fee2009-11-06 21:58:48 +0300125 size_t mask = hash_mask(head->size);
126 size_t pos = hash_pos(hash, mask);
127 size_t inc = hash_inc(hash, mask);
H. Peter Anvin97a23472007-09-16 17:57:25 -0700128
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800129 if (likely(tbl)) {
130 while ((np = &tbl[pos])->key) {
131 if (hash == np->hash &&
132 keylen == np->keylen &&
133 !nasm_memicmp(key, np->key, keylen))
134 return &np->data;
135 pos = hash_pos_next(pos, inc, mask);
136 }
H. Peter Anvin97a23472007-09-16 17:57:25 -0700137 }
138
139 /* Not found. Store info for insert if requested. */
140 if (insert) {
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800141 insert->node.hash = hash;
142 insert->node.key = key;
143 insert->node.keylen = keylen;
144 insert->node.data = NULL;
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400145 insert->head = head;
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400146 insert->where = np;
H. Peter Anvin97a23472007-09-16 17:57:25 -0700147 }
148 return NULL;
149}
150
151/*
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800152 * Same as hash_find(), but for case-insensitive hashing.
153 */
154void **hash_findi(struct hash_table *head, const char *key,
155 struct hash_insert *insert)
156{
157 return hash_findib(head, key, strlen(key)+1, insert);
158}
159
160/*
H. Peter Anvin97a23472007-09-16 17:57:25 -0700161 * Insert node. Return a pointer to the "data" element of the newly
162 * created hash node.
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800163 *
164 * The following constraints apply:
165 * 1. A call to hash_add() invalidates all other outstanding hash_insert
166 * objects; attempting to use them causes a wild pointer reference.
167 * 2. The key provided must exactly match the key passed to hash_find*(),
168 * but it does not have to point to the same storage address. The key
169 * buffer provided to this function must not be freed for the lifespan
170 * of the hash. NULL will use the same pointer that was passed to
171 * hash_find*().
H. Peter Anvin97a23472007-09-16 17:57:25 -0700172 */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800173void **hash_add(struct hash_insert *insert, const void *key, void *data)
H. Peter Anvincde08292007-09-13 23:34:21 -0700174{
175 struct hash_table *head = insert->head;
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800176 struct hash_node *np = insert->where;
177
178 if (unlikely(!np)) {
179 hash_init(head);
180 /* The hash table is empty, so we don't need to iterate here */
181 np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))];
182 }
H. Peter Anvincde08292007-09-13 23:34:21 -0700183
Cyrill Gorcunov2967fee2009-11-06 21:58:48 +0300184 /*
185 * Insert node. We can always do this, even if we need to
186 * rebalance immediately after.
187 */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800188 *np = insert->node;
H. Peter Anvincde08292007-09-13 23:34:21 -0700189 np->data = data;
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800190 if (key)
191 np->key = key;
H. Peter Anvincde08292007-09-13 23:34:21 -0700192
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800193 if (unlikely(++head->load > head->max_load)) {
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400194 /* Need to expand the table */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800195 size_t newsize = hash_expand(head->size);
196 struct hash_node *newtbl;
197 size_t mask = hash_mask(newsize);
198 struct hash_node *op, *xp;
199 size_t i;
H. Peter Anvincde08292007-09-13 23:34:21 -0700200
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800201 nasm_newn(newtbl, newsize);
H. Peter Anvincde08292007-09-13 23:34:21 -0700202
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800203 /* Rebalance all the entries */
204 for (i = 0, op = head->table; i < head->size; i++, op++) {
205 if (op->key) {
206 size_t pos = hash_pos(op->hash, mask);
207 size_t inc = hash_inc(op->hash, mask);
H. Peter Anvin70653092007-10-19 14:42:29 -0700208
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800209 while ((xp = &newtbl[pos])->key)
210 pos = hash_pos_next(pos, inc, mask);
H. Peter Anvincde08292007-09-13 23:34:21 -0700211
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800212 *xp = *op;
213 if (op == np)
214 np = xp;
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400215 }
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400216 }
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800217 nasm_free(head->table);
H. Peter Anvincde08292007-09-13 23:34:21 -0700218
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400219 head->table = newtbl;
220 head->size = newsize;
Cyrill Gorcunov2967fee2009-11-06 21:58:48 +0300221 head->max_load = hash_max_load(newsize);
H. Peter Anvincde08292007-09-13 23:34:21 -0700222 }
H. Peter Anvin97a23472007-09-16 17:57:25 -0700223
224 return &np->data;
H. Peter Anvincde08292007-09-13 23:34:21 -0700225}
226
H. Peter Anvin97a23472007-09-16 17:57:25 -0700227/*
H. Peter Anvin (Intel)c5593142018-12-14 00:10:15 -0800228 * Iterate over all members of a hash set. For the first call, iter
229 * should be as initialized by hash_iterator_init(). Returns a struct
230 * hash_node representing the current object, or NULL if we have
231 * reached the end of the hash table.
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800232 *
233 * Calling hash_add() will invalidate the iterator.
H. Peter Anvin97a23472007-09-16 17:57:25 -0700234 */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800235const struct hash_node *hash_iterate(struct hash_iterator *iter)
H. Peter Anvincde08292007-09-13 23:34:21 -0700236{
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800237 const struct hash_table *head = iter->head;
238 const struct hash_node *cp = iter->next;
239 const struct hash_node *ep = head->table + head->size;
H. Peter Anvincde08292007-09-13 23:34:21 -0700240
H. Peter Anvin (Intel)c5593142018-12-14 00:10:15 -0800241 /* For an empty table, cp == ep == NULL */
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800242 while (cp < ep) {
H. Peter Anvin (Intel)c5593142018-12-14 00:10:15 -0800243 if (cp->key) {
244 iter->next = cp+1;
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800245 return cp;
Cyrill Gorcunov1598a232009-10-11 13:17:52 +0400246 }
H. Peter Anvin (Intel)c5593142018-12-14 00:10:15 -0800247 cp++;
H. Peter Anvincde08292007-09-13 23:34:21 -0700248 }
249
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800250 iter->next = head->table;
H. Peter Anvin97a23472007-09-16 17:57:25 -0700251 return NULL;
252}
253
254/*
255 * Free the hash itself. Doesn't free the data elements; use
H. Peter Anvinbbb39572017-04-24 00:41:21 -0700256 * hash_iterate() to do that first, if needed. This function is normally
257 * used when the hash data entries are either freed separately, or
258 * compound objects which can't be freed in a single operation.
H. Peter Anvin97a23472007-09-16 17:57:25 -0700259 */
260void hash_free(struct hash_table *head)
261{
H. Peter Anvin166c2472008-05-28 12:28:58 -0700262 void *p = head->table;
H. Peter Anvin (Intel)c5593142018-12-14 00:10:15 -0800263 memset(head, 0, sizeof *head);
264 nasm_free(p);
H. Peter Anvincde08292007-09-13 23:34:21 -0700265}
H. Peter Anvinbbb39572017-04-24 00:41:21 -0700266
267/*
268 * Frees the hash *and* all data elements. This is applicable only in
269 * the case where the data element is a single allocation. If the
270 * second argument is false, the key string is part of the data
271 * allocation or belongs to an allocation which will be freed
272 * separately, if it is true the keys are also freed.
273 */
274void hash_free_all(struct hash_table *head, bool free_keys)
275{
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800276 struct hash_iterator it;
277 const struct hash_node *np;
H. Peter Anvinbbb39572017-04-24 00:41:21 -0700278
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800279 hash_for_each(head, it, np) {
H. Peter Anvin (Intel)f9f37dd2020-06-08 19:01:05 -0700280 if (np->data)
281 nasm_free(np->data);
282 if (free_keys && np->key)
H. Peter Anvin (Intel)ebb05a02018-12-11 12:30:25 -0800283 nasm_free((void *)np->key);
H. Peter Anvinbbb39572017-04-24 00:41:21 -0700284 }
285
286 hash_free(head);
287}