H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 1 | /* |
| 2 | * hashtbl.c |
| 3 | * |
| 4 | * Efficient dictionary hash table class. |
| 5 | */ |
| 6 | |
| 7 | #include <inttypes.h> |
| 8 | #include <string.h> |
| 9 | #include "nasm.h" |
| 10 | #include "hashtbl.h" |
| 11 | |
| 12 | #define HASH_INITIAL_SIZE 64 |
| 13 | #define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */ |
| 14 | |
| 15 | static struct hash_tbl_node *alloc_table(size_t newsize) |
| 16 | { |
| 17 | size_t bytes = newsize*sizeof(struct hash_tbl_node); |
H. Peter Anvin | cfdf646 | 2007-09-25 14:27:34 -0700 | [diff] [blame^] | 18 | struct hash_tbl_node *newtbl = nasm_zalloc(bytes); |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 19 | |
| 20 | return newtbl; |
| 21 | } |
| 22 | |
| 23 | struct hash_table *hash_init(void) |
| 24 | { |
| 25 | struct hash_table *head = nasm_malloc(sizeof(struct hash_table)); |
| 26 | |
| 27 | head->table = alloc_table(HASH_INITIAL_SIZE); |
| 28 | head->load = 0; |
| 29 | head->size = HASH_INITIAL_SIZE; |
| 30 | head->max_load = HASH_INITIAL_SIZE*(HASH_MAX_LOAD-1)/HASH_MAX_LOAD; |
| 31 | |
| 32 | return head; |
| 33 | } |
| 34 | |
| 35 | /* |
| 36 | * Find an entry in a hash table. |
| 37 | * |
| 38 | * On failure, if "insert" is non-NULL, store data in that structure |
| 39 | * which can be used to insert that node using hash_add(). |
| 40 | * |
| 41 | * WARNING: this data is only valid until the very next call of |
| 42 | * hash_add(); it cannot be "saved" to a later date. |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 43 | * |
| 44 | * On success, return a pointer to the "data" element of the hash |
| 45 | * structure. |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 46 | */ |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 47 | void **hash_find(struct hash_table *head, const char *key, |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 48 | struct hash_insert *insert) |
| 49 | { |
| 50 | struct hash_tbl_node *np; |
| 51 | uint64_t hash = crc64(key); |
| 52 | struct hash_tbl_node *tbl = head->table; |
| 53 | size_t mask = head->size-1; |
| 54 | size_t pos = hash & mask; |
| 55 | size_t inc = ((hash >> 32) & mask) | 1; /* Always odd */ |
| 56 | |
| 57 | while ((np = &tbl[pos])->key) { |
| 58 | if (hash == np->hash && !strcmp(key, np->key)) |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 59 | return &np->data; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 60 | pos = (pos+inc) & mask; |
| 61 | } |
| 62 | |
| 63 | /* Not found. Store info for insert if requested. */ |
| 64 | if (insert) { |
| 65 | insert->head = head; |
| 66 | insert->hash = hash; |
| 67 | insert->where = np; |
| 68 | } |
| 69 | return NULL; |
| 70 | } |
| 71 | |
| 72 | /* |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 73 | * Same as hash_find, but for case-insensitive hashing. |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 74 | */ |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 75 | void **hash_findi(struct hash_table *head, const char *key, |
| 76 | struct hash_insert *insert) |
| 77 | { |
| 78 | struct hash_tbl_node *np; |
| 79 | uint64_t hash = crc64i(key); |
| 80 | struct hash_tbl_node *tbl = head->table; |
| 81 | size_t mask = head->size-1; |
| 82 | size_t pos = hash & mask; |
| 83 | size_t inc = ((hash >> 32) & mask) | 1; /* Always odd */ |
| 84 | |
| 85 | while ((np = &tbl[pos])->key) { |
| 86 | if (hash == np->hash && !nasm_stricmp(key, np->key)) |
| 87 | return &np->data; |
| 88 | pos = (pos+inc) & mask; |
| 89 | } |
| 90 | |
| 91 | /* Not found. Store info for insert if requested. */ |
| 92 | if (insert) { |
| 93 | insert->head = head; |
| 94 | insert->hash = hash; |
| 95 | insert->where = np; |
| 96 | } |
| 97 | return NULL; |
| 98 | } |
| 99 | |
| 100 | /* |
| 101 | * Insert node. Return a pointer to the "data" element of the newly |
| 102 | * created hash node. |
| 103 | */ |
| 104 | void **hash_add(struct hash_insert *insert, const char *key, void *data) |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 105 | { |
| 106 | struct hash_table *head = insert->head; |
| 107 | struct hash_tbl_node *np = insert->where; |
| 108 | |
| 109 | /* Insert node. We can always do this, even if we need to |
| 110 | rebalance immediately after. */ |
| 111 | np->hash = insert->hash; |
| 112 | np->key = key; |
| 113 | np->data = data; |
| 114 | |
| 115 | if (++head->load > head->max_load) { |
| 116 | /* Need to expand the table */ |
| 117 | size_t newsize = head->size << 1; |
| 118 | struct hash_tbl_node *newtbl = alloc_table(newsize); |
| 119 | size_t mask = newsize-1; |
| 120 | |
| 121 | if (head->table) { |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 122 | struct hash_tbl_node *op, *xp; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 123 | size_t i; |
| 124 | |
| 125 | /* Rebalance all the entries */ |
| 126 | for (i = 0, op = head->table; i < head->size; i++, op++) { |
| 127 | if (op->key) { |
| 128 | size_t pos = op->hash & mask; |
| 129 | size_t inc = ((op->hash >> 32) & mask) | 1; |
| 130 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 131 | while ((xp = &newtbl[pos])->key) |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 132 | pos = (pos+inc) & mask; |
| 133 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 134 | *xp = *op; |
| 135 | if (op == np) |
| 136 | np = xp; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 137 | } |
| 138 | } |
| 139 | nasm_free(head->table); |
| 140 | } |
| 141 | |
| 142 | head->table = newtbl; |
| 143 | head->size = newsize; |
| 144 | head->max_load = newsize*(HASH_MAX_LOAD-1)/HASH_MAX_LOAD; |
| 145 | } |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 146 | |
| 147 | return &np->data; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 148 | } |
| 149 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 150 | /* |
| 151 | * Iterate over all members of a hash set. For the first call, |
| 152 | * iterator should be initialized to NULL. Returns the data pointer, |
| 153 | * or NULL on failure. |
| 154 | */ |
| 155 | void *hash_iterate(const struct hash_table *head, |
| 156 | struct hash_tbl_node **iterator, |
| 157 | const char **key) |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 158 | { |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 159 | struct hash_tbl_node *np = *iterator; |
| 160 | struct hash_tbl_node *ep = head->table + head->size; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 161 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 162 | if (!np) |
| 163 | np = head->table; |
| 164 | |
| 165 | while (np < ep) { |
| 166 | if (np->key) { |
| 167 | *iterator = np+1; |
| 168 | if (key) |
| 169 | *key = np->key; |
| 170 | return np->data; |
| 171 | } |
| 172 | np++; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 173 | } |
| 174 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 175 | *iterator = NULL; |
| 176 | if (key) |
| 177 | *key = NULL; |
| 178 | return NULL; |
| 179 | } |
| 180 | |
| 181 | /* |
| 182 | * Free the hash itself. Doesn't free the data elements; use |
| 183 | * hash_iterate() to do that first, if needed. |
| 184 | */ |
| 185 | void hash_free(struct hash_table *head) |
| 186 | { |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 187 | nasm_free(head->table); |
| 188 | nasm_free(head); |
| 189 | } |