tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | #include "MurmurHash3.h"
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 2 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 3 | // Note - The x86 and x64 versions do _not_ produce the same results, as the
|
| 4 | // algorithms are optimized for their respective platforms. You can still
|
| 5 | // compile and run any of them on any platform, but your performance with the
|
| 6 | // non-native version will be less than optimal.
|
| 7 |
|
| 8 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 9 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 10 | // Block read - if your platform needs to do endian-swapping or can only
|
| 11 | // handle aligned reads, do the conversion here
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 12 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 13 | FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 14 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 15 | return p[i];
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 16 | }
|
| 17 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 18 | //----------
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 19 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 20 | FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & k1,
|
| 21 | uint32_t & c1, uint32_t & c2 )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 22 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 23 | c1 = c1*5+0x7b7d159c;
|
| 24 | c2 = c2*5+0x6bce6396;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 25 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 26 | k1 *= c1;
|
| 27 | k1 = ROTL32(k1,11);
|
| 28 | k1 *= c2;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 29 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 30 | h1 = ROTL32(h1,13);
|
| 31 | h1 = h1*5+0x52dce729;
|
| 32 | h1 ^= k1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 33 | }
|
| 34 |
|
| 35 | //----------
|
| 36 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 37 | void MurmurHash3_x86_32 ( const void * key, int len,
|
| 38 | uint32_t seed, void * out )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 39 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 40 | const uint8_t * data = (const uint8_t*)key;
|
| 41 | const int nblocks = len / 4;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 42 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 43 | uint32_t h1 = 0x971e137b ^ seed;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 44 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 45 | uint32_t c1 = 0x95543787;
|
| 46 | uint32_t c2 = 0x2ad7eb25;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 47 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 48 | //----------
|
| 49 | // body
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 50 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 51 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 52 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 53 | for(int i = -nblocks; i; i++)
|
| 54 | {
|
| 55 | uint32_t k1 = getblock(blocks,i);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 56 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 57 | bmix32(h1,k1,c1,c2);
|
| 58 | }
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 59 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 60 | //----------
|
| 61 | // tail
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 62 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 63 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 64 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 65 | uint32_t k1 = 0;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 66 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 67 | switch(len & 3)
|
| 68 | {
|
| 69 | case 3: k1 ^= tail[2] << 16;
|
| 70 | case 2: k1 ^= tail[1] << 8;
|
| 71 | case 1: k1 ^= tail[0];
|
| 72 | bmix32(h1,k1,c1,c2);
|
| 73 | };
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 74 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 75 | //----------
|
| 76 | // finalization
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 77 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 78 | h1 ^= len;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 79 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 80 | h1 *= 0x85ebca6b;
|
| 81 | h1 ^= h1 >> 13;
|
| 82 | h1 *= 0xc2b2ae35;
|
| 83 | h1 ^= h1 >> 16;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 84 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 85 | h1 ^= seed;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 86 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 87 | *(uint32_t*)out = h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 88 | }
|
| 89 |
|
| 90 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 91 | // This mix is large enough that VC++ refuses to inline it unless we use
|
| 92 | // __forceinline. It's also not all that fast due to register spillage.
|
| 93 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 94 | FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & h2,
|
| 95 | uint32_t & h3, uint32_t & h4,
|
| 96 | uint32_t & k1, uint32_t & k2,
|
| 97 | uint32_t & k3, uint32_t & k4,
|
| 98 | uint32_t & c1, uint32_t & c2 )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 99 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 100 | k1 *= c1;
|
| 101 | k1 = ROTL32(k1,11);
|
| 102 | k1 *= c2;
|
| 103 | h1 ^= k1;
|
| 104 | h1 += h2;
|
| 105 | h1 += h3;
|
| 106 | h1 += h4;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 107 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 108 | h1 = ROTL32(h1,17);
|
tanjent@gmail.com | 31a9e8e | 2010-11-09 20:29:19 +0000 | [diff] [blame] | 109 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 110 | k2 *= c2;
|
| 111 | k2 = ROTL32(k2,11);
|
| 112 | k2 *= c1;
|
| 113 | h2 ^= k2;
|
| 114 | h2 += h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 115 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 116 | h1 = h1*3+0x52dce729;
|
| 117 | h2 = h2*3+0x38495ab5;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 118 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 119 | c1 = c1*5+0x7b7d159c;
|
| 120 | c2 = c2*5+0x6bce6396;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 121 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 122 | k3 *= c1;
|
| 123 | k3 = ROTL32(k3,11);
|
| 124 | k3 *= c2;
|
| 125 | h3 ^= k3;
|
| 126 | h3 += h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 127 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 128 | k4 *= c2;
|
| 129 | k4 = ROTL32(k4,11);
|
| 130 | k4 *= c1;
|
| 131 | h4 ^= k4;
|
| 132 | h4 += h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 133 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 134 | h3 = h3*3+0x52dce729;
|
| 135 | h4 = h4*3+0x38495ab5;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 136 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 137 | c1 = c1*5+0x7b7d159c;
|
| 138 | c2 = c2*5+0x6bce6396;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 139 | }
|
| 140 |
|
| 141 | //----------
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 142 | // Finalization mix - force all bits of a hash block to avalanche
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 143 |
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 144 | // avalanches all bits to within 0.25% bias
|
| 145 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 146 | FORCE_INLINE uint32_t fmix32 ( uint32_t h )
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 147 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 148 | h ^= h >> 16;
|
| 149 | h *= 0x85ebca6b;
|
| 150 | h ^= h >> 13;
|
| 151 | h *= 0xc2b2ae35;
|
| 152 | h ^= h >> 16;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 153 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 154 | return h;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 155 | }
|
| 156 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 157 | void MurmurHash3_x86_128 ( const void * key, const int len,
|
| 158 | uint32_t seed, void * out )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 159 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 160 | const uint8_t * data = (const uint8_t*)key;
|
| 161 | const int nblocks = len / 16;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 162 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 163 | uint32_t h1 = 0x8de1c3ac ^ seed;
|
| 164 | uint32_t h2 = 0xbab98226 ^ seed;
|
| 165 | uint32_t h3 = 0xfcba5b2d ^ seed;
|
| 166 | uint32_t h4 = 0x32452e3e ^ seed;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 167 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 168 | uint32_t c1 = 0x95543787;
|
| 169 | uint32_t c2 = 0x2ad7eb25;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 170 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 171 | //----------
|
| 172 | // body
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 173 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 174 | const uint32_t * blocks = (const uint32_t *)(data);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 175 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 176 | for(int i = 0; i < nblocks; i++)
|
| 177 | {
|
| 178 | uint32_t k1 = getblock(blocks,i*4+0);
|
| 179 | uint32_t k2 = getblock(blocks,i*4+1);
|
| 180 | uint32_t k3 = getblock(blocks,i*4+2);
|
| 181 | uint32_t k4 = getblock(blocks,i*4+3);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 182 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 183 | bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
|
| 184 | }
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 185 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 186 | //----------
|
| 187 | // tail
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 188 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 189 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 190 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 191 | uint32_t k1 = 0;
|
| 192 | uint32_t k2 = 0;
|
| 193 | uint32_t k3 = 0;
|
| 194 | uint32_t k4 = 0;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 195 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 196 | switch(len & 15)
|
| 197 | {
|
| 198 | case 15: k4 ^= tail[14] << 16;
|
| 199 | case 14: k4 ^= tail[13] << 8;
|
| 200 | case 13: k4 ^= tail[12] << 0;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 201 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 202 | case 12: k3 ^= tail[11] << 24;
|
| 203 | case 11: k3 ^= tail[10] << 16;
|
| 204 | case 10: k3 ^= tail[ 9] << 8;
|
| 205 | case 9: k3 ^= tail[ 8] << 0;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 206 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 207 | case 8: k2 ^= tail[ 7] << 24;
|
| 208 | case 7: k2 ^= tail[ 6] << 16;
|
| 209 | case 6: k2 ^= tail[ 5] << 8;
|
| 210 | case 5: k2 ^= tail[ 4] << 0;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 211 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 212 | case 4: k1 ^= tail[ 3] << 24;
|
| 213 | case 3: k1 ^= tail[ 2] << 16;
|
| 214 | case 2: k1 ^= tail[ 1] << 8;
|
| 215 | case 1: k1 ^= tail[ 0] << 0;
|
| 216 | bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
|
| 217 | };
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 218 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 219 | //----------
|
| 220 | // finalization
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 221 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 222 | h4 ^= len;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 223 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 224 | h1 += h2; h1 += h3; h1 += h4;
|
| 225 | h2 += h1; h3 += h1; h4 += h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 226 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 227 | h1 = fmix32(h1);
|
| 228 | h2 = fmix32(h2);
|
| 229 | h3 = fmix32(h3);
|
| 230 | h4 = fmix32(h4);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 231 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 232 | h1 += h2; h1 += h3; h1 += h4;
|
| 233 | h2 += h1; h3 += h1; h4 += h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 234 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 235 | ((uint32_t*)out)[0] = h1;
|
| 236 | ((uint32_t*)out)[1] = h2;
|
| 237 | ((uint32_t*)out)[2] = h3;
|
| 238 | ((uint32_t*)out)[3] = h4;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 239 | }
|
| 240 |
|
| 241 | //-----------------------------------------------------------------------------
|
| 242 | // Block read - if your platform needs to do endian-swapping or can only
|
| 243 | // handle aligned reads, do the conversion here
|
| 244 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 245 | FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 246 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 247 | return p[i];
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 248 | }
|
| 249 |
|
| 250 | //----------
|
| 251 | // Block mix - combine the key bits with the hash bits and scramble everything
|
| 252 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 253 | FORCE_INLINE void bmix64 ( uint64_t & h1, uint64_t & h2,
|
| 254 | uint64_t & k1, uint64_t & k2,
|
| 255 | uint64_t & c1, uint64_t & c2 )
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 256 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 257 | k1 *= c1;
|
| 258 | k1 = ROTL64(k1,23);
|
| 259 | k1 *= c2;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 260 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 261 | k2 *= c2;
|
| 262 | k2 = ROTL64(k2,23);
|
| 263 | k2 *= c1;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 264 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 265 | h1 = ROTL64(h1,17);
|
| 266 | h1 += h2;
|
| 267 | h1 ^= k1;
|
tanjent@gmail.com | babb553 | 2011-02-28 06:03:12 +0000 | [diff] [blame] | 268 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 269 | h2 = ROTL64(h2,41);
|
| 270 | h2 += h1;
|
| 271 | h2 ^= k2;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 272 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 273 | h1 = h1*3+0x52dce729;
|
| 274 | h2 = h2*3+0x38495ab5;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 275 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 276 | c1 = c1*5+0x7b7d159c;
|
| 277 | c2 = c2*5+0x6bce6396;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 278 | }
|
| 279 |
|
| 280 | //----------
|
| 281 | // Finalization mix - avalanches all bits to within 0.05% bias
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 282 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 283 | FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 284 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 285 | k ^= k >> 33;
|
| 286 | k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
| 287 | k ^= k >> 33;
|
| 288 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
| 289 | k ^= k >> 33;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 290 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 291 | return k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 292 | }
|
| 293 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 294 | //----------
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 295 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 296 | void MurmurHash3_x64_128 ( const void * key, const int len,
|
| 297 | const uint32_t seed, void * out )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 298 | {
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 299 | const uint8_t * data = (const uint8_t*)key;
|
| 300 | const int nblocks = len / 16;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 301 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 302 | uint64_t h1 = BIG_CONSTANT(0x9368e53c2f6af274) ^ seed;
|
| 303 | uint64_t h2 = BIG_CONSTANT(0x586dcd208f7cd3fd) ^ seed;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 304 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 305 | uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
| 306 | uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 307 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 308 | //----------
|
| 309 | // body
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 310 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 311 | const uint64_t * blocks = (const uint64_t *)(data);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 312 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 313 | for(int i = 0; i < nblocks; i++)
|
| 314 | {
|
| 315 | uint64_t k1 = getblock(blocks,i*2+0);
|
| 316 | uint64_t k2 = getblock(blocks,i*2+1);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 317 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 318 | bmix64(h1,h2,k1,k2,c1,c2);
|
| 319 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 320 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 321 | //----------
|
| 322 | // tail
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 323 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 324 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 325 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 326 | uint64_t k1 = 0;
|
| 327 | uint64_t k2 = 0;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 328 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 329 | switch(len & 15)
|
| 330 | {
|
| 331 | case 15: k2 ^= uint64_t(tail[14]) << 48;
|
| 332 | case 14: k2 ^= uint64_t(tail[13]) << 40;
|
| 333 | case 13: k2 ^= uint64_t(tail[12]) << 32;
|
| 334 | case 12: k2 ^= uint64_t(tail[11]) << 24;
|
| 335 | case 11: k2 ^= uint64_t(tail[10]) << 16;
|
| 336 | case 10: k2 ^= uint64_t(tail[ 9]) << 8;
|
| 337 | case 9: k2 ^= uint64_t(tail[ 8]) << 0;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 338 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 339 | case 8: k1 ^= uint64_t(tail[ 7]) << 56;
|
| 340 | case 7: k1 ^= uint64_t(tail[ 6]) << 48;
|
| 341 | case 6: k1 ^= uint64_t(tail[ 5]) << 40;
|
| 342 | case 5: k1 ^= uint64_t(tail[ 4]) << 32;
|
| 343 | case 4: k1 ^= uint64_t(tail[ 3]) << 24;
|
| 344 | case 3: k1 ^= uint64_t(tail[ 2]) << 16;
|
| 345 | case 2: k1 ^= uint64_t(tail[ 1]) << 8;
|
| 346 | case 1: k1 ^= uint64_t(tail[ 0]) << 0;
|
| 347 | bmix64(h1,h2,k1,k2,c1,c2);
|
| 348 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 349 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 350 | //----------
|
| 351 | // finalization
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 352 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 353 | h2 ^= len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 354 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 355 | h1 += h2;
|
| 356 | h2 += h1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 357 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 358 | h1 = fmix64(h1);
|
| 359 | h2 = fmix64(h2);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 360 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 361 | h1 += h2;
|
| 362 | h2 += h1;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 363 |
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 364 | ((uint64_t*)out)[0] = h1;
|
| 365 | ((uint64_t*)out)[1] = h2;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 366 | }
|
| 367 |
|
| 368 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | f67ce94 | 2011-03-14 09:11:18 +0000 | [diff] [blame^] | 369 | // Quick copy-pasted test code for GCC build
|
| 370 |
|
| 371 | // This should print -
|
| 372 |
|
| 373 | // "The quick brown fox jumps over the lazy dog" => { 0x38585ecf, 0x5f6d752a, 0x0157c98a, 0x8c686b9b, }
|
| 374 | // "The quick brown fox jumps over the lazy cog" => { 0x6d3fd6f0, 0xc86a98a0, 0x4d6fac1c, 0x8f3e52b4, }
|
| 375 |
|
| 376 | #ifndef _MSC_VER
|
| 377 |
|
| 378 | /*
|
| 379 | #include <assert.h>
|
| 380 | #include <stdio.h>
|
| 381 | #include <string.h>
|
| 382 |
|
| 383 | typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
|
| 384 |
|
| 385 | void printhex32 ( void * blob, int len )
|
| 386 | {
|
| 387 | assert((len & 3) == 0);
|
| 388 |
|
| 389 | uint32_t * d = (uint32_t*)blob;
|
| 390 |
|
| 391 | printf("{ ");
|
| 392 |
|
| 393 | for(int i = 0; i < len/4; i++)
|
| 394 | {
|
| 395 | printf("0x%08x, ",d[i]);
|
| 396 | }
|
| 397 |
|
| 398 | printf("}");
|
| 399 | }
|
| 400 |
|
| 401 | void QuickBrownFox ( pfHash hash, const int hashbits )
|
| 402 | {
|
| 403 | const int hashbytes = hashbits / 8;
|
| 404 |
|
| 405 | const char * text1 = "The quick brown fox jumps over the lazy dog";
|
| 406 | const char * text2 = "The quick brown fox jumps over the lazy cog";
|
| 407 |
|
| 408 | uint8_t h1[128];
|
| 409 | uint8_t h2[128];
|
| 410 |
|
| 411 | hash(text1,(int)strlen(text1),0,h1);
|
| 412 | hash(text2,(int)strlen(text2),0,h2);
|
| 413 |
|
| 414 | printf("\"%s\" => ",text1);
|
| 415 | printhex32(h1,hashbytes);
|
| 416 | printf("\n");
|
| 417 |
|
| 418 | printf("\"%s\" => ",text2);
|
| 419 | printhex32(h2,hashbytes);
|
| 420 | printf("\n");
|
| 421 |
|
| 422 | printf("\n");
|
| 423 | }
|
| 424 |
|
| 425 | int main ( int argc, char** argv )
|
| 426 | {
|
| 427 | QuickBrownFox(&MurmurHash3_x64_128,128);
|
| 428 | }
|
| 429 | */
|
| 430 |
|
| 431 | #endif
|