blob: 88d08cbd2de1af831925b2530b6d7f8a150bc32f [file] [log] [blame]
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00001#include "MurmurHash3.h"
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00002#include <stdlib.h> // for _rotl
3
tanjent@gmail.combabb5532011-02-28 06:03:12 +00004// Note - The x86 and x64 versions do _not_ produce the same results, as the
5// algorithms are optimized for their respective platforms. You can still
6// compile and run any of them on any platform, but your performance with the
7// non-native version will be less than optimal.
8
9
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000010//-----------------------------------------------------------------------------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000011// Block read - if your platform needs to do endian-swapping or can only
12// handle aligned reads, do the conversion here
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000013
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000014inline uint32_t getblock ( const uint32_t * p, int i )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000015{
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000016 return p[i];
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000017}
18
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000019//----------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000020
21inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
22{
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000023 c1 = c1*5+0x7b7d159c;
24 c2 = c2*5+0x6bce6396;
tanjent@gmail.combabb5532011-02-28 06:03:12 +000025
26 k1 *= c1;
27 k1 = _rotl(k1,11);
28 k1 *= c2;
29
30 h1 = _rotl(h1,13);
31 h1 = h1*5+0x52dce729;
32 h1 ^= k1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000033}
34
35//----------
36
37void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
38{
39 const uint8_t * data = (const uint8_t*)key;
40 const int nblocks = len / 4;
41
42 uint32_t h1 = 0x971e137b ^ seed;
43
44 uint32_t c1 = 0x95543787;
45 uint32_t c2 = 0x2ad7eb25;
46
47 //----------
48 // body
49
50 const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
51
52 for(int i = -nblocks; i; i++)
53 {
54 uint32_t k1 = getblock(blocks,i);
55
56 bmix32(h1,k1,c1,c2);
57 }
58
59 //----------
60 // tail
61
62 const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
63
64 uint32_t k1 = 0;
65
66 switch(len & 3)
67 {
68 case 3: k1 ^= tail[2] << 16;
69 case 2: k1 ^= tail[1] << 8;
70 case 1: k1 ^= tail[0];
71 bmix32(h1,k1,c1,c2);
72 };
73
74 //----------
75 // finalization
76
77 h1 ^= len;
78
tanjent@gmail.combabb5532011-02-28 06:03:12 +000079 h1 *= 0x85ebca6b;
80 h1 ^= h1 >> 13;
81 h1 *= 0xc2b2ae35;
82 h1 ^= h1 >> 16;
83
tanjent@gmail.combabb5532011-02-28 06:03:12 +000084 h1 ^= seed;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000085
86 *(uint32_t*)out = h1;
87}
88
89//-----------------------------------------------------------------------------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000090// This mix is large enough that VC++ refuses to inline it unless we use
91// __forceinline. It's also not all that fast due to register spillage.
92
93__forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4,
94 uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4,
95 uint32_t & c1, uint32_t & c2 )
96{
97 k1 *= c1;
98 k1 = _rotl(k1,11);
99 k1 *= c2;
100 h1 ^= k1;
101 h1 += h2;
102 h1 += h3;
103 h1 += h4;
104
tanjent@gmail.com31a9e8e2010-11-09 20:29:19 +0000105 h1 = _rotl(h1,17);
106
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000107 k2 *= c2;
108 k2 = _rotl(k2,11);
109 k2 *= c1;
110 h2 ^= k2;
111 h2 += h1;
112
113 h1 = h1*3+0x52dce729;
114 h2 = h2*3+0x38495ab5;
115
116 c1 = c1*5+0x7b7d159c;
117 c2 = c2*5+0x6bce6396;
118
119 k3 *= c1;
120 k3 = _rotl(k3,11);
121 k3 *= c2;
122 h3 ^= k3;
123 h3 += h1;
124
125 k4 *= c2;
126 k4 = _rotl(k4,11);
127 k4 *= c1;
128 h4 ^= k4;
129 h4 += h1;
130
131 h3 = h3*3+0x52dce729;
132 h4 = h4*3+0x38495ab5;
133
134 c1 = c1*5+0x7b7d159c;
135 c2 = c2*5+0x6bce6396;
136}
137
138//----------
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000139// Finalization mix - force all bits of a hash block to avalanche
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000140
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000141// avalanches all bits to within 0.25% bias
142
143inline uint32_t fmix32 ( uint32_t h )
144{
145 h ^= h >> 16;
146 h *= 0x85ebca6b;
147 h ^= h >> 13;
148 h *= 0xc2b2ae35;
149 h ^= h >> 16;
150
151 return h;
152}
153
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000154void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000155{
156 const uint8_t * data = (const uint8_t*)key;
157 const int nblocks = len / 16;
158
159 uint32_t h1 = 0x8de1c3ac ^ seed;
160 uint32_t h2 = 0xbab98226 ^ seed;
161 uint32_t h3 = 0xfcba5b2d ^ seed;
162 uint32_t h4 = 0x32452e3e ^ seed;
163
164 uint32_t c1 = 0x95543787;
165 uint32_t c2 = 0x2ad7eb25;
166
167 //----------
168 // body
169
170 const uint32_t * blocks = (const uint32_t *)(data);
171
172 for(int i = 0; i < nblocks; i++)
173 {
174 uint32_t k1 = getblock(blocks,i*4+0);
175 uint32_t k2 = getblock(blocks,i*4+1);
176 uint32_t k3 = getblock(blocks,i*4+2);
177 uint32_t k4 = getblock(blocks,i*4+3);
178
179 bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
180 }
181
182 //----------
183 // tail
184
185 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
186
187 uint32_t k1 = 0;
188 uint32_t k2 = 0;
189 uint32_t k3 = 0;
190 uint32_t k4 = 0;
191
192 switch(len & 15)
193 {
194 case 15: k4 ^= tail[14] << 16;
195 case 14: k4 ^= tail[13] << 8;
196 case 13: k4 ^= tail[12] << 0;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000197
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000198 case 12: k3 ^= tail[11] << 24;
199 case 11: k3 ^= tail[10] << 16;
200 case 10: k3 ^= tail[ 9] << 8;
201 case 9: k3 ^= tail[ 8] << 0;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000202
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000203 case 8: k2 ^= tail[ 7] << 24;
204 case 7: k2 ^= tail[ 6] << 16;
205 case 6: k2 ^= tail[ 5] << 8;
206 case 5: k2 ^= tail[ 4] << 0;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000207
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000208 case 4: k1 ^= tail[ 3] << 24;
209 case 3: k1 ^= tail[ 2] << 16;
210 case 2: k1 ^= tail[ 1] << 8;
211 case 1: k1 ^= tail[ 0] << 0;
212 bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
213 };
214
215 //----------
216 // finalization
217
218 h4 ^= len;
219
220 h1 += h2; h1 += h3; h1 += h4;
221 h2 += h1; h3 += h1; h4 += h1;
222
223 h1 = fmix32(h1);
224 h2 = fmix32(h2);
225 h3 = fmix32(h3);
226 h4 = fmix32(h4);
227
228 h1 += h2; h1 += h3; h1 += h4;
229 h2 += h1; h3 += h1; h4 += h1;
230
231 ((uint32_t*)out)[0] = h1;
232 ((uint32_t*)out)[1] = h2;
233 ((uint32_t*)out)[2] = h3;
234 ((uint32_t*)out)[3] = h4;
235}
236
237//-----------------------------------------------------------------------------
238// Block read - if your platform needs to do endian-swapping or can only
239// handle aligned reads, do the conversion here
240
241inline uint64_t getblock ( const uint64_t * p, int i )
242{
243 return p[i];
244}
245
246//----------
247// Block mix - combine the key bits with the hash bits and scramble everything
248
249inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
250{
251 k1 *= c1;
252 k1 = _rotl64(k1,23);
253 k1 *= c2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000254
255 k2 *= c2;
256 k2 = _rotl64(k2,23);
257 k2 *= c1;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000258
259 h1 = _rotl64(h1,17);
260 h1 += h2;
261 h1 ^= k1;
262
263 h2 = _rotl64(h2,41);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000264 h2 += h1;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000265 h2 ^= k2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000266
267 h1 = h1*3+0x52dce729;
268 h2 = h2*3+0x38495ab5;
269
270 c1 = c1*5+0x7b7d159c;
271 c2 = c2*5+0x6bce6396;
272}
273
274//----------
275// Finalization mix - avalanches all bits to within 0.05% bias
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000276
277inline uint64_t fmix64 ( uint64_t k )
278{
279 k ^= k >> 33;
280 k *= 0xff51afd7ed558ccd;
281 k ^= k >> 33;
282 k *= 0xc4ceb9fe1a85ec53;
283 k ^= k >> 33;
284
285 return k;
286}
287
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000288//----------
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000289
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000290void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000291{
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000292 const uint8_t * data = (const uint8_t*)key;
293 const int nblocks = len / 16;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000294
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000295 uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
296 uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
297
298 uint64_t c1 = 0x87c37b91114253d5;
299 uint64_t c2 = 0x4cf5ad432745937f;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000300
301 //----------
302 // body
303
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000304 const uint64_t * blocks = (const uint64_t *)(data);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000305
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000306 for(int i = 0; i < nblocks; i++)
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000307 {
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000308 uint64_t k1 = getblock(blocks,i*2+0);
309 uint64_t k2 = getblock(blocks,i*2+1);
310
311 bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000312 }
313
314 //----------
315 // tail
316
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000317 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000318
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000319 uint64_t k1 = 0;
320 uint64_t k2 = 0;
321
322 switch(len & 15)
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000323 {
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000324 case 15: k2 ^= uint64_t(tail[14]) << 48;
325 case 14: k2 ^= uint64_t(tail[13]) << 40;
326 case 13: k2 ^= uint64_t(tail[12]) << 32;
327 case 12: k2 ^= uint64_t(tail[11]) << 24;
328 case 11: k2 ^= uint64_t(tail[10]) << 16;
329 case 10: k2 ^= uint64_t(tail[ 9]) << 8;
330 case 9: k2 ^= uint64_t(tail[ 8]) << 0;
331
332 case 8: k1 ^= uint64_t(tail[ 7]) << 56;
333 case 7: k1 ^= uint64_t(tail[ 6]) << 48;
334 case 6: k1 ^= uint64_t(tail[ 5]) << 40;
335 case 5: k1 ^= uint64_t(tail[ 4]) << 32;
336 case 4: k1 ^= uint64_t(tail[ 3]) << 24;
337 case 3: k1 ^= uint64_t(tail[ 2]) << 16;
338 case 2: k1 ^= uint64_t(tail[ 1]) << 8;
339 case 1: k1 ^= uint64_t(tail[ 0]) << 0;
340 bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000341 };
342
343 //----------
344 // finalization
345
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000346 h2 ^= len;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000347
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000348 h1 += h2;
349 h2 += h1;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000350
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000351 h1 = fmix64(h1);
352 h2 = fmix64(h2);
353
354 h1 += h2;
355 h2 += h1;
356
357 ((uint64_t*)out)[0] = h1;
358 ((uint64_t*)out)[1] = h2;
359}
360
361//-----------------------------------------------------------------------------