blob: 349ed8eac2ca461b4037d77eb9e9be9706f42bff [file] [log] [blame]
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00001#include "MurmurHash2.h"
2
3//-----------------------------------------------------------------------------
4// MurmurHash2, by Austin Appleby
5
6// Note - This code makes a few assumptions about how your machine behaves -
7
8// 1. We can read a 4-byte value from any address without crashing
9// 2. sizeof(int) == 4
10
11// And it has a few limitations -
12
13// 1. It will not work incrementally.
14// 2. It will not produce the same results on little-endian and big-endian
15// machines.
16
17uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
18{
19 // 'm' and 'r' are mixing constants generated offline.
20 // They're not really 'magic', they just happen to work well.
21
22 const uint32_t m = 0x5bd1e995;
23 const int r = 24;
24
25 // Initialize the hash to a 'random' value
26
27 uint32_t h = seed ^ len;
28
29 // Mix 4 bytes at a time into the hash
30
31 const unsigned char * data = (const unsigned char *)key;
32
33 while(len >= 4)
34 {
35 uint32_t k = *(uint32_t*)data;
36
37 k *= m;
38 k ^= k >> r;
39 k *= m;
40
41 h *= m;
42 h ^= k;
43
44 data += 4;
45 len -= 4;
46 }
47
48 // Handle the last few bytes of the input array
49
50 switch(len)
51 {
52 case 3: h ^= data[2] << 16;
53 case 2: h ^= data[1] << 8;
54 case 1: h ^= data[0];
55 h *= m;
56 };
57
58 // Do a few final mixes of the hash to ensure the last few
59 // bytes are well-incorporated.
60
61 h ^= h >> 13;
62 h *= m;
63 h ^= h >> 15;
64
65 return h;
66}
67
68//-----------------------------------------------------------------------------
69// MurmurHash2, 64-bit versions, by Austin Appleby
70
71// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
72// and endian-ness issues if used across multiple platforms.
73
74// 64-bit hash for 64-bit platforms
75
76uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
77{
78 const uint64_t m = 0xc6a4a7935bd1e995;
79 const int r = 47;
80
81 uint64_t h = seed ^ (len * m);
82
83 const uint64_t * data = (const uint64_t *)key;
84 const uint64_t * end = data + (len/8);
85
86 while(data != end)
87 {
88 uint64_t k = *data++;
89
90 k *= m;
91 k ^= k >> r;
92 k *= m;
93
94 h ^= k;
95 h *= m;
96 }
97
98 const unsigned char * data2 = (const unsigned char*)data;
99
100 switch(len & 7)
101 {
102 case 7: h ^= uint64_t(data2[6]) << 48;
103 case 6: h ^= uint64_t(data2[5]) << 40;
104 case 5: h ^= uint64_t(data2[4]) << 32;
105 case 4: h ^= uint64_t(data2[3]) << 24;
106 case 3: h ^= uint64_t(data2[2]) << 16;
107 case 2: h ^= uint64_t(data2[1]) << 8;
108 case 1: h ^= uint64_t(data2[0]);
109 h *= m;
110 };
111
112 h ^= h >> r;
113 h *= m;
114 h ^= h >> r;
115
116 return h;
117}
118
119
120// 64-bit hash for 32-bit platforms
121
122uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
123{
124 const uint32_t m = 0x5bd1e995;
125 const int r = 24;
126
127 uint32_t h1 = uint32_t(seed) ^ len;
128 uint32_t h2 = uint32_t(seed >> 32);
129
130 const uint32_t * data = (const uint32_t *)key;
131
132 while(len >= 8)
133 {
134 uint32_t k1 = *data++;
135 k1 *= m; k1 ^= k1 >> r; k1 *= m;
136 h1 *= m; h1 ^= k1;
137 len -= 4;
138
139 uint32_t k2 = *data++;
140 k2 *= m; k2 ^= k2 >> r; k2 *= m;
141 h2 *= m; h2 ^= k2;
142 len -= 4;
143 }
144
145 if(len >= 4)
146 {
147 uint32_t k1 = *data++;
148 k1 *= m; k1 ^= k1 >> r; k1 *= m;
149 h1 *= m; h1 ^= k1;
150 len -= 4;
151 }
152
153 switch(len)
154 {
155 case 3: h2 ^= ((unsigned char*)data)[2] << 16;
156 case 2: h2 ^= ((unsigned char*)data)[1] << 8;
157 case 1: h2 ^= ((unsigned char*)data)[0];
158 h2 *= m;
159 };
160
161 h1 ^= h2 >> 18; h1 *= m;
162 h2 ^= h1 >> 22; h2 *= m;
163 h1 ^= h2 >> 17; h1 *= m;
164 h2 ^= h1 >> 19; h2 *= m;
165
166 uint64_t h = h1;
167
168 h = (h << 32) | h2;
169
170 return h;
171}
172
173//-----------------------------------------------------------------------------
174// MurmurHash2A, by Austin Appleby
175
176// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
177// construction. Bulk speed should be identical to Murmur2, small-key speed
178// will be 10%-20% slower due to the added overhead at the end of the hash.
179
180// This variant fixes a minor issue where null keys were more likely to
181// collide with each other than expected, and also makes the function
182// more amenable to incremental implementations.
183
184#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
185
186uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
187{
188 const uint32_t m = 0x5bd1e995;
189 const int r = 24;
190 uint32_t l = len;
191
192 const unsigned char * data = (const unsigned char *)key;
193
194 uint32_t h = seed;
195
196 while(len >= 4)
197 {
198 uint32_t k = *(uint32_t*)data;
199
200 mmix(h,k);
201
202 data += 4;
203 len -= 4;
204 }
205
206 uint32_t t = 0;
207
208 switch(len)
209 {
210 case 3: t ^= data[2] << 16;
211 case 2: t ^= data[1] << 8;
212 case 1: t ^= data[0];
213 };
214
215 mmix(h,t);
216 mmix(h,l);
217
218 h ^= h >> 13;
219 h *= m;
220 h ^= h >> 15;
221
222 return h;
223}
224
225//-----------------------------------------------------------------------------
226// CMurmurHash2A, by Austin Appleby
227
228// This is a sample implementation of MurmurHash2A designed to work
229// incrementally.
230
231// Usage -
232
233// CMurmurHash2A hasher
234// hasher.Begin(seed);
235// hasher.Add(data1,size1);
236// hasher.Add(data2,size2);
237// ...
238// hasher.Add(dataN,sizeN);
239// uint32_t hash = hasher.End()
240
241class CMurmurHash2A
242{
243public:
244
245 void Begin ( uint32_t seed = 0 )
246 {
247 m_hash = seed;
248 m_tail = 0;
249 m_count = 0;
250 m_size = 0;
251 }
252
253 void Add ( const unsigned char * data, int len )
254 {
255 m_size += len;
256
257 MixTail(data,len);
258
259 while(len >= 4)
260 {
261 uint32_t k = *(uint32_t*)data;
262
263 mmix(m_hash,k);
264
265 data += 4;
266 len -= 4;
267 }
268
269 MixTail(data,len);
270 }
271
272 uint32_t End ( void )
273 {
274 mmix(m_hash,m_tail);
275 mmix(m_hash,m_size);
276
277 m_hash ^= m_hash >> 13;
278 m_hash *= m;
279 m_hash ^= m_hash >> 15;
280
281 return m_hash;
282 }
283
284private:
285
286 static const uint32_t m = 0x5bd1e995;
287 static const int r = 24;
288
289 void MixTail ( const unsigned char * & data, int & len )
290 {
291 while( len && ((len<4) || m_count) )
292 {
293 m_tail |= (*data++) << (m_count * 8);
294
295 m_count++;
296 len--;
297
298 if(m_count == 4)
299 {
300 mmix(m_hash,m_tail);
301 m_tail = 0;
302 m_count = 0;
303 }
304 }
305 }
306
307 uint32_t m_hash;
308 uint32_t m_tail;
309 uint32_t m_count;
310 uint32_t m_size;
311};
312
313//-----------------------------------------------------------------------------
314// MurmurHashNeutral2, by Austin Appleby
315
316// Same as MurmurHash2, but endian- and alignment-neutral.
317// Half the speed though, alas.
318
319uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
320{
321 const uint32_t m = 0x5bd1e995;
322 const int r = 24;
323
324 uint32_t h = seed ^ len;
325
326 const unsigned char * data = (const unsigned char *)key;
327
328 while(len >= 4)
329 {
330 uint32_t k;
331
332 k = data[0];
333 k |= data[1] << 8;
334 k |= data[2] << 16;
335 k |= data[3] << 24;
336
337 k *= m;
338 k ^= k >> r;
339 k *= m;
340
341 h *= m;
342 h ^= k;
343
344 data += 4;
345 len -= 4;
346 }
347
348 switch(len)
349 {
350 case 3: h ^= data[2] << 16;
351 case 2: h ^= data[1] << 8;
352 case 1: h ^= data[0];
353 h *= m;
354 };
355
356 h ^= h >> 13;
357 h *= m;
358 h ^= h >> 15;
359
360 return h;
361}
362
363//-----------------------------------------------------------------------------
364// MurmurHashAligned2, by Austin Appleby
365
366// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
367// on certain platforms.
368
369// Performance will be lower than MurmurHash2
370
371#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
372
373uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
374{
375 const uint32_t m = 0x5bd1e995;
376 const int r = 24;
377
378 const unsigned char * data = (const unsigned char *)key;
379
380 uint32_t h = seed ^ len;
381
382 int align = (int)data & 3;
383
384 if(align && (len >= 4))
385 {
386 // Pre-load the temp registers
387
388 uint32_t t = 0, d = 0;
389
390 switch(align)
391 {
392 case 1: t |= data[2] << 16;
393 case 2: t |= data[1] << 8;
394 case 3: t |= data[0];
395 }
396
397 t <<= (8 * align);
398
399 data += 4-align;
400 len -= 4-align;
401
402 int sl = 8 * (4-align);
403 int sr = 8 * align;
404
405 // Mix
406
407 while(len >= 4)
408 {
409 d = *(uint32_t *)data;
410 t = (t >> sr) | (d << sl);
411
412 uint32_t k = t;
413
414 MIX(h,k,m);
415
416 t = d;
417
418 data += 4;
419 len -= 4;
420 }
421
422 // Handle leftover data in temp registers
423
424 d = 0;
425
426 if(len >= align)
427 {
428 switch(align)
429 {
430 case 3: d |= data[2] << 16;
431 case 2: d |= data[1] << 8;
432 case 1: d |= data[0];
433 }
434
435 uint32_t k = (t >> sr) | (d << sl);
436 MIX(h,k,m);
437
438 data += align;
439 len -= align;
440
441 //----------
442 // Handle tail bytes
443
444 switch(len)
445 {
446 case 3: h ^= data[2] << 16;
447 case 2: h ^= data[1] << 8;
448 case 1: h ^= data[0];
449 h *= m;
450 };
451 }
452 else
453 {
454 switch(len)
455 {
456 case 3: d |= data[2] << 16;
457 case 2: d |= data[1] << 8;
458 case 1: d |= data[0];
459 case 0: h ^= (t >> sr) | (d << sl);
460 h *= m;
461 }
462 }
463
464 h ^= h >> 13;
465 h *= m;
466 h ^= h >> 15;
467
468 return h;
469 }
470 else
471 {
472 while(len >= 4)
473 {
474 uint32_t k = *(uint32_t *)data;
475
476 MIX(h,k,m);
477
478 data += 4;
479 len -= 4;
480 }
481
482 //----------
483 // Handle tail bytes
484
485 switch(len)
486 {
487 case 3: h ^= data[2] << 16;
488 case 2: h ^= data[1] << 8;
489 case 1: h ^= data[0];
490 h *= m;
491 };
492
493 h ^= h >> 13;
494 h *= m;
495 h ^= h >> 15;
496
497 return h;
498 }
499}
500
501//-----------------------------------------------------------------------------
502