blob: 0bf738662baef13291d3c7725b042b79f2c59ba2 [file] [log] [blame]
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +00001//-----------------------------------------------------------------------------
2// MurmurHash3 was written by Austin Appleby, and is placed in the public
3// domain. The author hereby disclaims copyright to this source code.
4
tanjent@gmail.combabb5532011-02-28 06:03:12 +00005// Note - The x86 and x64 versions do _not_ produce the same results, as the
6// algorithms are optimized for their respective platforms. You can still
7// compile and run any of them on any platform, but your performance with the
8// non-native version will be less than optimal.
9
tanjent@gmail.com58dd8862011-04-08 19:39:16 +000010#include "MurmurHash3.h"
tanjent@gmail.combabb5532011-02-28 06:03:12 +000011
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000012//-----------------------------------------------------------------------------
aappleby@google.com7af0ee02011-04-08 19:46:54 +000013// Platform-specific functions and macros
14
15// Microsoft Visual Studio
16
17#if defined(_MSC_VER)
18
19#define FORCE_INLINE __forceinline
20
21#include <stdlib.h>
22
23#define ROTL32(x,y) _rotl(x,y)
24#define ROTL64(x,y) _rotl64(x,y)
25
26#define BIG_CONSTANT(x) (x)
27
28// Other compilers
29
30#else // defined(_MSC_VER)
31
32#define FORCE_INLINE __attribute__((always_inline))
33
34inline uint32_t rotl32 ( uint32_t x, int8_t r )
35{
36 return (x << r) | (x >> (32 - r));
37}
38
39inline uint64_t rotl64 ( uint64_t x, int8_t r )
40{
41 return (x << r) | (x >> (64 - r));
42}
43
44#define ROTL32(x,y) rotl32(x,y)
45#define ROTL64(x,y) rotl64(x,y)
46
47#define BIG_CONSTANT(x) (x##LLU)
48
49#endif // !defined(_MSC_VER)
50
51//-----------------------------------------------------------------------------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000052// Block read - if your platform needs to do endian-swapping or can only
53// handle aligned reads, do the conversion here
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000054
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000055FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000056{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000057 return p[i];
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000058}
59
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000060FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000061{
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000062 return p[i];
63}
tanjent@gmail.combabb5532011-02-28 06:03:12 +000064
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000065//-----------------------------------------------------------------------------
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000066// Finalization mix - force all bits of a hash block to avalanche
67
68FORCE_INLINE uint32_t fmix ( uint32_t h )
69{
70 h ^= h >> 16;
71 h *= 0x85ebca6b;
72 h ^= h >> 13;
73 h *= 0xc2b2ae35;
74 h ^= h >> 16;
75
76 return h;
77}
78
79//----------
80
81FORCE_INLINE uint64_t fmix ( uint64_t k )
82{
83 k ^= k >> 33;
84 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
85 k ^= k >> 33;
86 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
87 k ^= k >> 33;
88
89 return k;
90}
91
92//-----------------------------------------------------------------------------
93
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000094void MurmurHash3_x86_32 ( const void * key, int len,
95 uint32_t seed, void * out )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000096{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000097 const uint8_t * data = (const uint8_t*)key;
98 const int nblocks = len / 4;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000099
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000100 uint32_t h1 = seed;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000101
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000102 uint32_t c1 = 0xcc9e2d51;
103 uint32_t c2 = 0x1b873593;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000104
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000105 //----------
106 // body
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000107
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000108 const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000109
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000110 for(int i = -nblocks; i; i++)
111 {
112 uint32_t k1 = getblock(blocks,i);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000113
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000114 k1 *= c1;
115 k1 = ROTL32(k1,15);
116 k1 *= c2;
117
118 h1 ^= k1;
119 h1 = ROTL32(h1,13);
120 h1 = h1*5+0xe6546b64;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000121 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000122
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000123 //----------
124 // tail
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000125
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000126 const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000127
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000128 uint32_t k1 = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000129
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000130 switch(len & 3)
131 {
132 case 3: k1 ^= tail[2] << 16;
133 case 2: k1 ^= tail[1] << 8;
134 case 1: k1 ^= tail[0];
tanjent@gmail.comb35e5622011-05-20 23:00:53 +0000135 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000136 };
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000137
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000138 //----------
139 // finalization
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000140
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000141 h1 ^= len;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000142
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000143 h1 = fmix(h1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000144
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000145 *(uint32_t*)out = h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000146}
147
148//-----------------------------------------------------------------------------
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000149
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000150void MurmurHash3_x86_128 ( const void * key, const int len,
151 uint32_t seed, void * out )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000152{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000153 const uint8_t * data = (const uint8_t*)key;
154 const int nblocks = len / 16;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000155
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000156 uint32_t h1 = seed;
157 uint32_t h2 = seed;
158 uint32_t h3 = seed;
159 uint32_t h4 = seed;
160
161 uint32_t c1 = 0x239b961b;
162 uint32_t c2 = 0xab0e9789;
163 uint32_t c3 = 0x38b34ae5;
164 uint32_t c4 = 0xa1e38b93;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000165
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000166 //----------
167 // body
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000168
tanjent@gmail.comc365c962011-04-01 21:34:37 +0000169 const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000170
tanjent@gmail.comc365c962011-04-01 21:34:37 +0000171 for(int i = -nblocks; i; i++)
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000172 {
173 uint32_t k1 = getblock(blocks,i*4+0);
174 uint32_t k2 = getblock(blocks,i*4+1);
175 uint32_t k3 = getblock(blocks,i*4+2);
176 uint32_t k4 = getblock(blocks,i*4+3);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000177
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000178 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
179
180 h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
181
182 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
183
184 h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
185
186 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
187
188 h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
189
190 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
191
192 h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000193 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000194
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000195 //----------
196 // tail
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000197
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000198 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000199
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000200 uint32_t k1 = 0;
201 uint32_t k2 = 0;
202 uint32_t k3 = 0;
203 uint32_t k4 = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000204
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000205 switch(len & 15)
206 {
207 case 15: k4 ^= tail[14] << 16;
208 case 14: k4 ^= tail[13] << 8;
209 case 13: k4 ^= tail[12] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000210 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000211
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000212 case 12: k3 ^= tail[11] << 24;
213 case 11: k3 ^= tail[10] << 16;
214 case 10: k3 ^= tail[ 9] << 8;
215 case 9: k3 ^= tail[ 8] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000216 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000217
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000218 case 8: k2 ^= tail[ 7] << 24;
219 case 7: k2 ^= tail[ 6] << 16;
220 case 6: k2 ^= tail[ 5] << 8;
221 case 5: k2 ^= tail[ 4] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000222 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000223
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000224 case 4: k1 ^= tail[ 3] << 24;
225 case 3: k1 ^= tail[ 2] << 16;
226 case 2: k1 ^= tail[ 1] << 8;
227 case 1: k1 ^= tail[ 0] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000228 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000229 };
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000230
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000231 //----------
232 // finalization
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000233
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000234 h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000235
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000236 h1 += h2; h1 += h3; h1 += h4;
237 h2 += h1; h3 += h1; h4 += h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000238
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000239 h1 = fmix(h1);
240 h2 = fmix(h2);
241 h3 = fmix(h3);
242 h4 = fmix(h4);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000243
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000244 h1 += h2; h1 += h3; h1 += h4;
245 h2 += h1; h3 += h1; h4 += h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000246
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000247 ((uint32_t*)out)[0] = h1;
248 ((uint32_t*)out)[1] = h2;
249 ((uint32_t*)out)[2] = h3;
250 ((uint32_t*)out)[3] = h4;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000251}
252
253//-----------------------------------------------------------------------------
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000254
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000255void MurmurHash3_x64_128 ( const void * key, const int len,
256 const uint32_t seed, void * out )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000257{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000258 const uint8_t * data = (const uint8_t*)key;
259 const int nblocks = len / 16;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000260
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000261 uint64_t h1 = seed;
262 uint64_t h2 = seed;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000263
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000264 uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
265 uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000266
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000267 //----------
268 // body
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000269
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000270 const uint64_t * blocks = (const uint64_t *)(data);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000271
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000272 for(int i = 0; i < nblocks; i++)
273 {
274 uint64_t k1 = getblock(blocks,i*2+0);
275 uint64_t k2 = getblock(blocks,i*2+1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000276
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000277 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
278
279 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
280
281 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
282
283 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000284 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000285
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000286 //----------
287 // tail
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000288
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000289 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000290
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000291 uint64_t k1 = 0;
292 uint64_t k2 = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000293
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000294 switch(len & 15)
295 {
296 case 15: k2 ^= uint64_t(tail[14]) << 48;
297 case 14: k2 ^= uint64_t(tail[13]) << 40;
298 case 13: k2 ^= uint64_t(tail[12]) << 32;
299 case 12: k2 ^= uint64_t(tail[11]) << 24;
300 case 11: k2 ^= uint64_t(tail[10]) << 16;
301 case 10: k2 ^= uint64_t(tail[ 9]) << 8;
302 case 9: k2 ^= uint64_t(tail[ 8]) << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000303 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000304
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000305 case 8: k1 ^= uint64_t(tail[ 7]) << 56;
306 case 7: k1 ^= uint64_t(tail[ 6]) << 48;
307 case 6: k1 ^= uint64_t(tail[ 5]) << 40;
308 case 5: k1 ^= uint64_t(tail[ 4]) << 32;
309 case 4: k1 ^= uint64_t(tail[ 3]) << 24;
310 case 3: k1 ^= uint64_t(tail[ 2]) << 16;
311 case 2: k1 ^= uint64_t(tail[ 1]) << 8;
312 case 1: k1 ^= uint64_t(tail[ 0]) << 0;
tanjent@gmail.com833fd8d2011-04-11 20:45:44 +0000313 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000314 };
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000315
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000316 //----------
317 // finalization
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000318
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000319 h1 ^= len; h2 ^= len;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000320
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000321 h1 += h2;
322 h2 += h1;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000323
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000324 h1 = fmix(h1);
325 h2 = fmix(h2);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000326
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000327 h1 += h2;
328 h2 += h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000329
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000330 ((uint64_t*)out)[0] = h1;
331 ((uint64_t*)out)[1] = h2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000332}
333
334//-----------------------------------------------------------------------------
aappleby@google.comf068a582011-04-05 00:15:28 +0000335