blob: 95d2e264e7290bbd666ded68b015c70513892c9d [file] [log] [blame]
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +00001//-----------------------------------------------------------------------------
2// MurmurHash3 was written by Austin Appleby, and is placed in the public
3// domain. The author hereby disclaims copyright to this source code.
4
tanjent@gmail.combabb5532011-02-28 06:03:12 +00005// Note - The x86 and x64 versions do _not_ produce the same results, as the
6// algorithms are optimized for their respective platforms. You can still
7// compile and run any of them on any platform, but your performance with the
8// non-native version will be less than optimal.
9
tanjent@gmail.com58dd8862011-04-08 19:39:16 +000010#include "MurmurHash3.h"
tanjent@gmail.combabb5532011-02-28 06:03:12 +000011
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000012//-----------------------------------------------------------------------------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000013// Block read - if your platform needs to do endian-swapping or can only
14// handle aligned reads, do the conversion here
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000015
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000016FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000017{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000018 return p[i];
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000019}
20
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000021FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000022{
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000023 return p[i];
24}
tanjent@gmail.combabb5532011-02-28 06:03:12 +000025
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000026//-----------------------------------------------------------------------------
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000027// Finalization mix - force all bits of a hash block to avalanche
28
29FORCE_INLINE uint32_t fmix ( uint32_t h )
30{
31 h ^= h >> 16;
32 h *= 0x85ebca6b;
33 h ^= h >> 13;
34 h *= 0xc2b2ae35;
35 h ^= h >> 16;
36
37 return h;
38}
39
40//----------
41
42FORCE_INLINE uint64_t fmix ( uint64_t k )
43{
44 k ^= k >> 33;
45 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
46 k ^= k >> 33;
47 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
48 k ^= k >> 33;
49
50 return k;
51}
52
53//-----------------------------------------------------------------------------
54
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000055void MurmurHash3_x86_32 ( const void * key, int len,
56 uint32_t seed, void * out )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000057{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000058 const uint8_t * data = (const uint8_t*)key;
59 const int nblocks = len / 4;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000060
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000061 uint32_t h1 = seed;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000062
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000063 uint32_t c1 = 0xcc9e2d51;
64 uint32_t c2 = 0x1b873593;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000065
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000066 //----------
67 // body
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000068
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000069 const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000070
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000071 for(int i = -nblocks; i; i++)
72 {
73 uint32_t k1 = getblock(blocks,i);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000074
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +000075 k1 *= c1;
76 k1 = ROTL32(k1,15);
77 k1 *= c2;
78
79 h1 ^= k1;
80 h1 = ROTL32(h1,13);
81 h1 = h1*5+0xe6546b64;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000082 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000083
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000084 //----------
85 // tail
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000086
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000087 const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000088
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000089 uint32_t k1 = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000090
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000091 switch(len & 3)
92 {
93 case 3: k1 ^= tail[2] << 16;
94 case 2: k1 ^= tail[1] << 8;
95 case 1: k1 ^= tail[0];
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +000096 k1 *= c1; k1 = ROTL32(k1,16); k1 *= c2; h1 ^= k1;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000097 };
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000098
tanjent@gmail.comf67ce942011-03-14 09:11:18 +000099 //----------
100 // finalization
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000101
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000102 h1 ^= len;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000103
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000104 h1 = fmix(h1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000105
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000106 *(uint32_t*)out = h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000107}
108
109//-----------------------------------------------------------------------------
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000110
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000111void MurmurHash3_x86_128 ( const void * key, const int len,
112 uint32_t seed, void * out )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000113{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000114 const uint8_t * data = (const uint8_t*)key;
115 const int nblocks = len / 16;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000116
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000117 uint32_t h1 = seed;
118 uint32_t h2 = seed;
119 uint32_t h3 = seed;
120 uint32_t h4 = seed;
121
122 uint32_t c1 = 0x239b961b;
123 uint32_t c2 = 0xab0e9789;
124 uint32_t c3 = 0x38b34ae5;
125 uint32_t c4 = 0xa1e38b93;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000126
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000127 //----------
128 // body
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000129
tanjent@gmail.comc365c962011-04-01 21:34:37 +0000130 const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000131
tanjent@gmail.comc365c962011-04-01 21:34:37 +0000132 for(int i = -nblocks; i; i++)
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000133 {
134 uint32_t k1 = getblock(blocks,i*4+0);
135 uint32_t k2 = getblock(blocks,i*4+1);
136 uint32_t k3 = getblock(blocks,i*4+2);
137 uint32_t k4 = getblock(blocks,i*4+3);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000138
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000139 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
140
141 h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
142
143 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
144
145 h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
146
147 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
148
149 h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
150
151 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
152
153 h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000154 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000155
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000156 //----------
157 // tail
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000158
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000159 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000160
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000161 uint32_t k1 = 0;
162 uint32_t k2 = 0;
163 uint32_t k3 = 0;
164 uint32_t k4 = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000165
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000166 switch(len & 15)
167 {
168 case 15: k4 ^= tail[14] << 16;
169 case 14: k4 ^= tail[13] << 8;
170 case 13: k4 ^= tail[12] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000171 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000172
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000173 case 12: k3 ^= tail[11] << 24;
174 case 11: k3 ^= tail[10] << 16;
175 case 10: k3 ^= tail[ 9] << 8;
176 case 9: k3 ^= tail[ 8] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000177 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000178
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000179 case 8: k2 ^= tail[ 7] << 24;
180 case 7: k2 ^= tail[ 6] << 16;
181 case 6: k2 ^= tail[ 5] << 8;
182 case 5: k2 ^= tail[ 4] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000183 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000184
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000185 case 4: k1 ^= tail[ 3] << 24;
186 case 3: k1 ^= tail[ 2] << 16;
187 case 2: k1 ^= tail[ 1] << 8;
188 case 1: k1 ^= tail[ 0] << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000189 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000190 };
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000191
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000192 //----------
193 // finalization
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000194
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000195 h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000196
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000197 h1 += h2; h1 += h3; h1 += h4;
198 h2 += h1; h3 += h1; h4 += h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000199
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000200 h1 = fmix(h1);
201 h2 = fmix(h2);
202 h3 = fmix(h3);
203 h4 = fmix(h4);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000204
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000205 h1 += h2; h1 += h3; h1 += h4;
206 h2 += h1; h3 += h1; h4 += h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000207
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000208 ((uint32_t*)out)[0] = h1;
209 ((uint32_t*)out)[1] = h2;
210 ((uint32_t*)out)[2] = h3;
211 ((uint32_t*)out)[3] = h4;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000212}
213
214//-----------------------------------------------------------------------------
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000215
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000216void MurmurHash3_x64_128 ( const void * key, const int len,
217 const uint32_t seed, void * out )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000218{
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000219 const uint8_t * data = (const uint8_t*)key;
220 const int nblocks = len / 16;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000221
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000222 uint64_t h1 = seed;
223 uint64_t h2 = seed;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000224
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000225 uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
226 uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000227
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000228 //----------
229 // body
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000230
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000231 const uint64_t * blocks = (const uint64_t *)(data);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000232
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000233 for(int i = 0; i < nblocks; i++)
234 {
235 uint64_t k1 = getblock(blocks,i*2+0);
236 uint64_t k2 = getblock(blocks,i*2+1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000237
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000238 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
239
240 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
241
242 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
243
244 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000245 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000246
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000247 //----------
248 // tail
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000249
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000250 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000251
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000252 uint64_t k1 = 0;
253 uint64_t k2 = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000254
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000255 switch(len & 15)
256 {
257 case 15: k2 ^= uint64_t(tail[14]) << 48;
258 case 14: k2 ^= uint64_t(tail[13]) << 40;
259 case 13: k2 ^= uint64_t(tail[12]) << 32;
260 case 12: k2 ^= uint64_t(tail[11]) << 24;
261 case 11: k2 ^= uint64_t(tail[10]) << 16;
262 case 10: k2 ^= uint64_t(tail[ 9]) << 8;
263 case 9: k2 ^= uint64_t(tail[ 8]) << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000264 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000265
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000266 case 8: k1 ^= uint64_t(tail[ 7]) << 56;
267 case 7: k1 ^= uint64_t(tail[ 6]) << 48;
268 case 6: k1 ^= uint64_t(tail[ 5]) << 40;
269 case 5: k1 ^= uint64_t(tail[ 4]) << 32;
270 case 4: k1 ^= uint64_t(tail[ 3]) << 24;
271 case 3: k1 ^= uint64_t(tail[ 2]) << 16;
272 case 2: k1 ^= uint64_t(tail[ 1]) << 8;
273 case 1: k1 ^= uint64_t(tail[ 0]) << 0;
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000274 k1 *= c1; k1 = ROTL64(k1,29); k1 *= c2; h1 ^= k1;
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000275 };
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000276
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000277 //----------
278 // finalization
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000279
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000280 h1 ^= len; h2 ^= len;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000281
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000282 h1 += h2;
283 h2 += h1;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000284
tanjent@gmail.com2ff5e9b2011-04-03 06:30:51 +0000285 h1 = fmix(h1);
286 h2 = fmix(h2);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000287
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000288 h1 += h2;
289 h2 += h1;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000290
tanjent@gmail.comf67ce942011-03-14 09:11:18 +0000291 ((uint64_t*)out)[0] = h1;
292 ((uint64_t*)out)[1] = h2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000293}
294
295//-----------------------------------------------------------------------------
aappleby@google.comf068a582011-04-05 00:15:28 +0000296