H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 1 | /* |
| 2 | * hashtbl.c |
| 3 | * |
| 4 | * Efficient dictionary hash table class. |
| 5 | */ |
| 6 | |
H. Peter Anvin | fe50195 | 2007-10-02 21:53:51 -0700 | [diff] [blame^] | 7 | #include "compiler.h" |
| 8 | |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 9 | #include <inttypes.h> |
| 10 | #include <string.h> |
| 11 | #include "nasm.h" |
| 12 | #include "hashtbl.h" |
| 13 | |
| 14 | #define HASH_INITIAL_SIZE 64 |
| 15 | #define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */ |
| 16 | |
| 17 | static struct hash_tbl_node *alloc_table(size_t newsize) |
| 18 | { |
| 19 | size_t bytes = newsize*sizeof(struct hash_tbl_node); |
H. Peter Anvin | cfdf646 | 2007-09-25 14:27:34 -0700 | [diff] [blame] | 20 | struct hash_tbl_node *newtbl = nasm_zalloc(bytes); |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 21 | |
| 22 | return newtbl; |
| 23 | } |
| 24 | |
| 25 | struct hash_table *hash_init(void) |
| 26 | { |
| 27 | struct hash_table *head = nasm_malloc(sizeof(struct hash_table)); |
| 28 | |
| 29 | head->table = alloc_table(HASH_INITIAL_SIZE); |
| 30 | head->load = 0; |
| 31 | head->size = HASH_INITIAL_SIZE; |
| 32 | head->max_load = HASH_INITIAL_SIZE*(HASH_MAX_LOAD-1)/HASH_MAX_LOAD; |
| 33 | |
| 34 | return head; |
| 35 | } |
| 36 | |
| 37 | /* |
| 38 | * Find an entry in a hash table. |
| 39 | * |
| 40 | * On failure, if "insert" is non-NULL, store data in that structure |
| 41 | * which can be used to insert that node using hash_add(). |
| 42 | * |
| 43 | * WARNING: this data is only valid until the very next call of |
| 44 | * hash_add(); it cannot be "saved" to a later date. |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 45 | * |
| 46 | * On success, return a pointer to the "data" element of the hash |
| 47 | * structure. |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 48 | */ |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 49 | void **hash_find(struct hash_table *head, const char *key, |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 50 | struct hash_insert *insert) |
| 51 | { |
| 52 | struct hash_tbl_node *np; |
H. Peter Anvin | a59795c | 2007-10-02 17:40:00 -0700 | [diff] [blame] | 53 | uint64_t hash = crc64(CRC64_INIT, key); |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 54 | struct hash_tbl_node *tbl = head->table; |
| 55 | size_t mask = head->size-1; |
| 56 | size_t pos = hash & mask; |
| 57 | size_t inc = ((hash >> 32) & mask) | 1; /* Always odd */ |
| 58 | |
| 59 | while ((np = &tbl[pos])->key) { |
| 60 | if (hash == np->hash && !strcmp(key, np->key)) |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 61 | return &np->data; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 62 | pos = (pos+inc) & mask; |
| 63 | } |
| 64 | |
| 65 | /* Not found. Store info for insert if requested. */ |
| 66 | if (insert) { |
| 67 | insert->head = head; |
| 68 | insert->hash = hash; |
| 69 | insert->where = np; |
| 70 | } |
| 71 | return NULL; |
| 72 | } |
| 73 | |
| 74 | /* |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 75 | * Same as hash_find, but for case-insensitive hashing. |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 76 | */ |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 77 | void **hash_findi(struct hash_table *head, const char *key, |
| 78 | struct hash_insert *insert) |
| 79 | { |
| 80 | struct hash_tbl_node *np; |
H. Peter Anvin | a59795c | 2007-10-02 17:40:00 -0700 | [diff] [blame] | 81 | uint64_t hash = crc64i(CRC64_INIT, key); |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 82 | struct hash_tbl_node *tbl = head->table; |
| 83 | size_t mask = head->size-1; |
| 84 | size_t pos = hash & mask; |
| 85 | size_t inc = ((hash >> 32) & mask) | 1; /* Always odd */ |
| 86 | |
| 87 | while ((np = &tbl[pos])->key) { |
| 88 | if (hash == np->hash && !nasm_stricmp(key, np->key)) |
| 89 | return &np->data; |
| 90 | pos = (pos+inc) & mask; |
| 91 | } |
| 92 | |
| 93 | /* Not found. Store info for insert if requested. */ |
| 94 | if (insert) { |
| 95 | insert->head = head; |
| 96 | insert->hash = hash; |
| 97 | insert->where = np; |
| 98 | } |
| 99 | return NULL; |
| 100 | } |
| 101 | |
| 102 | /* |
| 103 | * Insert node. Return a pointer to the "data" element of the newly |
| 104 | * created hash node. |
| 105 | */ |
| 106 | void **hash_add(struct hash_insert *insert, const char *key, void *data) |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 107 | { |
| 108 | struct hash_table *head = insert->head; |
| 109 | struct hash_tbl_node *np = insert->where; |
| 110 | |
| 111 | /* Insert node. We can always do this, even if we need to |
| 112 | rebalance immediately after. */ |
| 113 | np->hash = insert->hash; |
| 114 | np->key = key; |
| 115 | np->data = data; |
| 116 | |
| 117 | if (++head->load > head->max_load) { |
| 118 | /* Need to expand the table */ |
| 119 | size_t newsize = head->size << 1; |
| 120 | struct hash_tbl_node *newtbl = alloc_table(newsize); |
| 121 | size_t mask = newsize-1; |
| 122 | |
| 123 | if (head->table) { |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 124 | struct hash_tbl_node *op, *xp; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 125 | size_t i; |
| 126 | |
| 127 | /* Rebalance all the entries */ |
| 128 | for (i = 0, op = head->table; i < head->size; i++, op++) { |
| 129 | if (op->key) { |
| 130 | size_t pos = op->hash & mask; |
| 131 | size_t inc = ((op->hash >> 32) & mask) | 1; |
| 132 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 133 | while ((xp = &newtbl[pos])->key) |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 134 | pos = (pos+inc) & mask; |
| 135 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 136 | *xp = *op; |
| 137 | if (op == np) |
| 138 | np = xp; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 139 | } |
| 140 | } |
| 141 | nasm_free(head->table); |
| 142 | } |
| 143 | |
| 144 | head->table = newtbl; |
| 145 | head->size = newsize; |
| 146 | head->max_load = newsize*(HASH_MAX_LOAD-1)/HASH_MAX_LOAD; |
| 147 | } |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 148 | |
| 149 | return &np->data; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 150 | } |
| 151 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 152 | /* |
| 153 | * Iterate over all members of a hash set. For the first call, |
| 154 | * iterator should be initialized to NULL. Returns the data pointer, |
| 155 | * or NULL on failure. |
| 156 | */ |
| 157 | void *hash_iterate(const struct hash_table *head, |
| 158 | struct hash_tbl_node **iterator, |
| 159 | const char **key) |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 160 | { |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 161 | struct hash_tbl_node *np = *iterator; |
| 162 | struct hash_tbl_node *ep = head->table + head->size; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 163 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 164 | if (!np) |
| 165 | np = head->table; |
| 166 | |
| 167 | while (np < ep) { |
| 168 | if (np->key) { |
| 169 | *iterator = np+1; |
| 170 | if (key) |
| 171 | *key = np->key; |
| 172 | return np->data; |
| 173 | } |
| 174 | np++; |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 175 | } |
| 176 | |
H. Peter Anvin | 97a2347 | 2007-09-16 17:57:25 -0700 | [diff] [blame] | 177 | *iterator = NULL; |
| 178 | if (key) |
| 179 | *key = NULL; |
| 180 | return NULL; |
| 181 | } |
| 182 | |
| 183 | /* |
| 184 | * Free the hash itself. Doesn't free the data elements; use |
| 185 | * hash_iterate() to do that first, if needed. |
| 186 | */ |
| 187 | void hash_free(struct hash_table *head) |
| 188 | { |
H. Peter Anvin | cde0829 | 2007-09-13 23:34:21 -0700 | [diff] [blame] | 189 | nasm_free(head->table); |
| 190 | nasm_free(head); |
| 191 | } |