tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | #include "MurmurHash3.h"
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 2 | #include <stdlib.h> // for _rotl
|
| 3 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 4 | // Note - The x86 and x64 versions do _not_ produce the same results, as the
|
| 5 | // algorithms are optimized for their respective platforms. You can still
|
| 6 | // compile and run any of them on any platform, but your performance with the
|
| 7 | // non-native version will be less than optimal.
|
| 8 |
|
| 9 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 10 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 11 | // Block read - if your platform needs to do endian-swapping or can only
|
| 12 | // handle aligned reads, do the conversion here
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 13 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 14 | inline uint32_t getblock ( const uint32_t * p, int i )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 15 | {
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 16 | return p[i];
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 17 | }
|
| 18 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 19 | //----------
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 20 |
|
| 21 | inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
|
| 22 | {
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 23 | c1 = c1*5+0x7b7d159c;
|
| 24 | c2 = c2*5+0x6bce6396;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 25 |
|
| 26 | k1 *= c1;
|
| 27 | k1 = _rotl(k1,11);
|
| 28 | k1 *= c2;
|
| 29 |
|
| 30 | h1 = _rotl(h1,13);
|
| 31 | h1 = h1*5+0x52dce729;
|
| 32 | h1 ^= k1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 33 | }
|
| 34 |
|
| 35 | //----------
|
| 36 |
|
| 37 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
|
| 38 | {
|
| 39 | const uint8_t * data = (const uint8_t*)key;
|
| 40 | const int nblocks = len / 4;
|
| 41 |
|
| 42 | uint32_t h1 = 0x971e137b ^ seed;
|
| 43 |
|
| 44 | uint32_t c1 = 0x95543787;
|
| 45 | uint32_t c2 = 0x2ad7eb25;
|
| 46 |
|
| 47 | //----------
|
| 48 | // body
|
| 49 |
|
| 50 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
|
| 51 |
|
| 52 | for(int i = -nblocks; i; i++)
|
| 53 | {
|
| 54 | uint32_t k1 = getblock(blocks,i);
|
| 55 |
|
| 56 | bmix32(h1,k1,c1,c2);
|
| 57 | }
|
| 58 |
|
| 59 | //----------
|
| 60 | // tail
|
| 61 |
|
| 62 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
| 63 |
|
| 64 | uint32_t k1 = 0;
|
| 65 |
|
| 66 | switch(len & 3)
|
| 67 | {
|
| 68 | case 3: k1 ^= tail[2] << 16;
|
| 69 | case 2: k1 ^= tail[1] << 8;
|
| 70 | case 1: k1 ^= tail[0];
|
| 71 | bmix32(h1,k1,c1,c2);
|
| 72 | };
|
| 73 |
|
| 74 | //----------
|
| 75 | // finalization
|
| 76 |
|
| 77 | h1 ^= len;
|
| 78 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 79 | h1 *= 0x85ebca6b;
|
| 80 | h1 ^= h1 >> 13;
|
| 81 | h1 *= 0xc2b2ae35;
|
| 82 | h1 ^= h1 >> 16;
|
| 83 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 84 | h1 ^= seed;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 85 |
|
| 86 | *(uint32_t*)out = h1;
|
| 87 | }
|
| 88 |
|
| 89 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 90 | // This mix is large enough that VC++ refuses to inline it unless we use
|
| 91 | // __forceinline. It's also not all that fast due to register spillage.
|
| 92 |
|
| 93 | __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4,
|
| 94 | uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4,
|
| 95 | uint32_t & c1, uint32_t & c2 )
|
| 96 | {
|
| 97 | k1 *= c1;
|
| 98 | k1 = _rotl(k1,11);
|
| 99 | k1 *= c2;
|
| 100 | h1 ^= k1;
|
| 101 | h1 += h2;
|
| 102 | h1 += h3;
|
| 103 | h1 += h4;
|
| 104 |
|
tanjent@gmail.com | 31a9e8e | 2010-11-09 20:29:19 +0000 | [diff] [blame] | 105 | h1 = _rotl(h1,17);
|
| 106 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 107 | k2 *= c2;
|
| 108 | k2 = _rotl(k2,11);
|
| 109 | k2 *= c1;
|
| 110 | h2 ^= k2;
|
| 111 | h2 += h1;
|
| 112 |
|
| 113 | h1 = h1*3+0x52dce729;
|
| 114 | h2 = h2*3+0x38495ab5;
|
| 115 |
|
| 116 | c1 = c1*5+0x7b7d159c;
|
| 117 | c2 = c2*5+0x6bce6396;
|
| 118 |
|
| 119 | k3 *= c1;
|
| 120 | k3 = _rotl(k3,11);
|
| 121 | k3 *= c2;
|
| 122 | h3 ^= k3;
|
| 123 | h3 += h1;
|
| 124 |
|
| 125 | k4 *= c2;
|
| 126 | k4 = _rotl(k4,11);
|
| 127 | k4 *= c1;
|
| 128 | h4 ^= k4;
|
| 129 | h4 += h1;
|
| 130 |
|
| 131 | h3 = h3*3+0x52dce729;
|
| 132 | h4 = h4*3+0x38495ab5;
|
| 133 |
|
| 134 | c1 = c1*5+0x7b7d159c;
|
| 135 | c2 = c2*5+0x6bce6396;
|
| 136 | }
|
| 137 |
|
| 138 | //----------
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 139 | // Finalization mix - force all bits of a hash block to avalanche
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 140 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 141 | // avalanches all bits to within 0.25% bias
|
| 142 |
|
| 143 | inline uint32_t fmix32 ( uint32_t h )
|
| 144 | {
|
| 145 | h ^= h >> 16;
|
| 146 | h *= 0x85ebca6b;
|
| 147 | h ^= h >> 13;
|
| 148 | h *= 0xc2b2ae35;
|
| 149 | h ^= h >> 16;
|
| 150 |
|
| 151 | return h;
|
| 152 | }
|
| 153 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 154 | void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 155 | {
|
| 156 | const uint8_t * data = (const uint8_t*)key;
|
| 157 | const int nblocks = len / 16;
|
| 158 |
|
| 159 | uint32_t h1 = 0x8de1c3ac ^ seed;
|
| 160 | uint32_t h2 = 0xbab98226 ^ seed;
|
| 161 | uint32_t h3 = 0xfcba5b2d ^ seed;
|
| 162 | uint32_t h4 = 0x32452e3e ^ seed;
|
| 163 |
|
| 164 | uint32_t c1 = 0x95543787;
|
| 165 | uint32_t c2 = 0x2ad7eb25;
|
| 166 |
|
| 167 | //----------
|
| 168 | // body
|
| 169 |
|
| 170 | const uint32_t * blocks = (const uint32_t *)(data);
|
| 171 |
|
| 172 | for(int i = 0; i < nblocks; i++)
|
| 173 | {
|
| 174 | uint32_t k1 = getblock(blocks,i*4+0);
|
| 175 | uint32_t k2 = getblock(blocks,i*4+1);
|
| 176 | uint32_t k3 = getblock(blocks,i*4+2);
|
| 177 | uint32_t k4 = getblock(blocks,i*4+3);
|
| 178 |
|
| 179 | bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
|
| 180 | }
|
| 181 |
|
| 182 | //----------
|
| 183 | // tail
|
| 184 |
|
| 185 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
| 186 |
|
| 187 | uint32_t k1 = 0;
|
| 188 | uint32_t k2 = 0;
|
| 189 | uint32_t k3 = 0;
|
| 190 | uint32_t k4 = 0;
|
| 191 |
|
| 192 | switch(len & 15)
|
| 193 | {
|
| 194 | case 15: k4 ^= tail[14] << 16;
|
| 195 | case 14: k4 ^= tail[13] << 8;
|
| 196 | case 13: k4 ^= tail[12] << 0;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 197 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 198 | case 12: k3 ^= tail[11] << 24;
|
| 199 | case 11: k3 ^= tail[10] << 16;
|
| 200 | case 10: k3 ^= tail[ 9] << 8;
|
| 201 | case 9: k3 ^= tail[ 8] << 0;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 202 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 203 | case 8: k2 ^= tail[ 7] << 24;
|
| 204 | case 7: k2 ^= tail[ 6] << 16;
|
| 205 | case 6: k2 ^= tail[ 5] << 8;
|
| 206 | case 5: k2 ^= tail[ 4] << 0;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 207 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 208 | case 4: k1 ^= tail[ 3] << 24;
|
| 209 | case 3: k1 ^= tail[ 2] << 16;
|
| 210 | case 2: k1 ^= tail[ 1] << 8;
|
| 211 | case 1: k1 ^= tail[ 0] << 0;
|
| 212 | bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
|
| 213 | };
|
| 214 |
|
| 215 | //----------
|
| 216 | // finalization
|
| 217 |
|
| 218 | h4 ^= len;
|
| 219 |
|
| 220 | h1 += h2; h1 += h3; h1 += h4;
|
| 221 | h2 += h1; h3 += h1; h4 += h1;
|
| 222 |
|
| 223 | h1 = fmix32(h1);
|
| 224 | h2 = fmix32(h2);
|
| 225 | h3 = fmix32(h3);
|
| 226 | h4 = fmix32(h4);
|
| 227 |
|
| 228 | h1 += h2; h1 += h3; h1 += h4;
|
| 229 | h2 += h1; h3 += h1; h4 += h1;
|
| 230 |
|
| 231 | ((uint32_t*)out)[0] = h1;
|
| 232 | ((uint32_t*)out)[1] = h2;
|
| 233 | ((uint32_t*)out)[2] = h3;
|
| 234 | ((uint32_t*)out)[3] = h4;
|
| 235 | }
|
| 236 |
|
| 237 | //-----------------------------------------------------------------------------
|
| 238 | // Block read - if your platform needs to do endian-swapping or can only
|
| 239 | // handle aligned reads, do the conversion here
|
| 240 |
|
| 241 | inline uint64_t getblock ( const uint64_t * p, int i )
|
| 242 | {
|
| 243 | return p[i];
|
| 244 | }
|
| 245 |
|
| 246 | //----------
|
| 247 | // Block mix - combine the key bits with the hash bits and scramble everything
|
| 248 |
|
| 249 | inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
|
| 250 | {
|
| 251 | k1 *= c1;
|
| 252 | k1 = _rotl64(k1,23);
|
| 253 | k1 *= c2;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 254 |
|
| 255 | k2 *= c2;
|
| 256 | k2 = _rotl64(k2,23);
|
| 257 | k2 *= c1;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 258 |
|
| 259 | h1 = _rotl64(h1,17);
|
| 260 | h1 += h2;
|
| 261 | h1 ^= k1;
|
| 262 |
|
| 263 | h2 = _rotl64(h2,41);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 264 | h2 += h1;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 265 | h2 ^= k2;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 266 |
|
| 267 | h1 = h1*3+0x52dce729;
|
| 268 | h2 = h2*3+0x38495ab5;
|
| 269 |
|
| 270 | c1 = c1*5+0x7b7d159c;
|
| 271 | c2 = c2*5+0x6bce6396;
|
| 272 | }
|
| 273 |
|
| 274 | //----------
|
| 275 | // Finalization mix - avalanches all bits to within 0.05% bias
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 276 |
|
| 277 | inline uint64_t fmix64 ( uint64_t k )
|
| 278 | {
|
| 279 | k ^= k >> 33;
|
| 280 | k *= 0xff51afd7ed558ccd;
|
| 281 | k ^= k >> 33;
|
| 282 | k *= 0xc4ceb9fe1a85ec53;
|
| 283 | k ^= k >> 33;
|
| 284 |
|
| 285 | return k;
|
| 286 | }
|
| 287 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 288 | //----------
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 289 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 290 | void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 291 | {
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 292 | const uint8_t * data = (const uint8_t*)key;
|
| 293 | const int nblocks = len / 16;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 294 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 295 | uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
|
| 296 | uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
|
| 297 |
|
| 298 | uint64_t c1 = 0x87c37b91114253d5;
|
| 299 | uint64_t c2 = 0x4cf5ad432745937f;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 300 |
|
| 301 | //----------
|
| 302 | // body
|
| 303 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 304 | const uint64_t * blocks = (const uint64_t *)(data);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 305 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 306 | for(int i = 0; i < nblocks; i++)
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 307 | {
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 308 | uint64_t k1 = getblock(blocks,i*2+0);
|
| 309 | uint64_t k2 = getblock(blocks,i*2+1);
|
| 310 |
|
| 311 | bmix64(h1,h2,k1,k2,c1,c2);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 312 | }
|
| 313 |
|
| 314 | //----------
|
| 315 | // tail
|
| 316 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 317 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 318 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 319 | uint64_t k1 = 0;
|
| 320 | uint64_t k2 = 0;
|
| 321 |
|
| 322 | switch(len & 15)
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 323 | {
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 324 | case 15: k2 ^= uint64_t(tail[14]) << 48;
|
| 325 | case 14: k2 ^= uint64_t(tail[13]) << 40;
|
| 326 | case 13: k2 ^= uint64_t(tail[12]) << 32;
|
| 327 | case 12: k2 ^= uint64_t(tail[11]) << 24;
|
| 328 | case 11: k2 ^= uint64_t(tail[10]) << 16;
|
| 329 | case 10: k2 ^= uint64_t(tail[ 9]) << 8;
|
| 330 | case 9: k2 ^= uint64_t(tail[ 8]) << 0;
|
| 331 |
|
| 332 | case 8: k1 ^= uint64_t(tail[ 7]) << 56;
|
| 333 | case 7: k1 ^= uint64_t(tail[ 6]) << 48;
|
| 334 | case 6: k1 ^= uint64_t(tail[ 5]) << 40;
|
| 335 | case 5: k1 ^= uint64_t(tail[ 4]) << 32;
|
| 336 | case 4: k1 ^= uint64_t(tail[ 3]) << 24;
|
| 337 | case 3: k1 ^= uint64_t(tail[ 2]) << 16;
|
| 338 | case 2: k1 ^= uint64_t(tail[ 1]) << 8;
|
| 339 | case 1: k1 ^= uint64_t(tail[ 0]) << 0;
|
| 340 | bmix64(h1,h2,k1,k2,c1,c2);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 341 | };
|
| 342 |
|
| 343 | //----------
|
| 344 | // finalization
|
| 345 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 346 | h2 ^= len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 347 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 348 | h1 += h2;
|
| 349 | h2 += h1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 350 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 351 | h1 = fmix64(h1);
|
| 352 | h2 = fmix64(h2);
|
| 353 |
|
| 354 | h1 += h2;
|
| 355 | h2 += h1;
|
| 356 |
|
| 357 | ((uint64_t*)out)[0] = h1;
|
| 358 | ((uint64_t*)out)[1] = h2;
|
| 359 | }
|
| 360 |
|
| 361 | //-----------------------------------------------------------------------------
|