tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | //-----------------------------------------------------------------------------
|
aappleby@google.com | c2b49e0 | 2011-04-08 21:13:25 +0000 | [diff] [blame] | 2 | // MurmurHash2 was written by Austin Appleby, and is placed in the public
|
aappleby@google.com | c8e8bf8 | 2011-04-08 21:07:48 +0000 | [diff] [blame] | 3 | // domain. The author hereby disclaims copyright to this source code.
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 4 |
|
| 5 | // Note - This code makes a few assumptions about how your machine behaves -
|
| 6 |
|
| 7 | // 1. We can read a 4-byte value from any address without crashing
|
| 8 | // 2. sizeof(int) == 4
|
| 9 |
|
| 10 | // And it has a few limitations -
|
| 11 |
|
| 12 | // 1. It will not work incrementally.
|
| 13 | // 2. It will not produce the same results on little-endian and big-endian
|
| 14 | // machines.
|
| 15 |
|
aappleby@google.com | c8e8bf8 | 2011-04-08 21:07:48 +0000 | [diff] [blame] | 16 | #include "MurmurHash2.h"
|
| 17 |
|
| 18 | //-----------------------------------------------------------------------------
|
| 19 | // Platform-specific functions and macros
|
| 20 |
|
| 21 | // Microsoft Visual Studio
|
| 22 |
|
| 23 | #if defined(_MSC_VER)
|
| 24 |
|
| 25 | #define BIG_CONSTANT(x) (x)
|
| 26 |
|
| 27 | // Other compilers
|
| 28 |
|
| 29 | #else // defined(_MSC_VER)
|
| 30 |
|
| 31 | #define BIG_CONSTANT(x) (x##LLU)
|
| 32 |
|
| 33 | #endif // !defined(_MSC_VER)
|
| 34 |
|
| 35 | //-----------------------------------------------------------------------------
|
| 36 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 37 | uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
|
| 38 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 39 | // 'm' and 'r' are mixing constants generated offline.
|
| 40 | // They're not really 'magic', they just happen to work well.
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 41 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 42 | const uint32_t m = 0x5bd1e995;
|
| 43 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 44 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 45 | // Initialize the hash to a 'random' value
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 46 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 47 | uint32_t h = seed ^ len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 48 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 49 | // Mix 4 bytes at a time into the hash
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 50 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 51 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 52 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 53 | while(len >= 4)
|
| 54 | {
|
| 55 | uint32_t k = *(uint32_t*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 56 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 57 | k *= m;
|
| 58 | k ^= k >> r;
|
| 59 | k *= m;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 60 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 61 | h *= m;
|
| 62 | h ^= k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 63 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 64 | data += 4;
|
| 65 | len -= 4;
|
| 66 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 67 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 68 | // Handle the last few bytes of the input array
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 69 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 70 | switch(len)
|
| 71 | {
|
| 72 | case 3: h ^= data[2] << 16;
|
| 73 | case 2: h ^= data[1] << 8;
|
| 74 | case 1: h ^= data[0];
|
| 75 | h *= m;
|
| 76 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 77 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 78 | // Do a few final mixes of the hash to ensure the last few
|
| 79 | // bytes are well-incorporated.
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 80 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 81 | h ^= h >> 13;
|
| 82 | h *= m;
|
| 83 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 84 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 85 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 86 | }
|
| 87 |
|
| 88 | //-----------------------------------------------------------------------------
|
| 89 | // MurmurHash2, 64-bit versions, by Austin Appleby
|
| 90 |
|
| 91 | // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
|
| 92 | // and endian-ness issues if used across multiple platforms.
|
| 93 |
|
| 94 | // 64-bit hash for 64-bit platforms
|
| 95 |
|
| 96 | uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
|
| 97 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 98 | const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
|
| 99 | const int r = 47;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 100 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 101 | uint64_t h = seed ^ (len * m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 102 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 103 | const uint64_t * data = (const uint64_t *)key;
|
| 104 | const uint64_t * end = data + (len/8);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 105 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 106 | while(data != end)
|
| 107 | {
|
| 108 | uint64_t k = *data++;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 109 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 110 | k *= m;
|
| 111 | k ^= k >> r;
|
| 112 | k *= m;
|
| 113 |
|
| 114 | h ^= k;
|
| 115 | h *= m;
|
| 116 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 117 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 118 | const unsigned char * data2 = (const unsigned char*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 119 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 120 | switch(len & 7)
|
| 121 | {
|
| 122 | case 7: h ^= uint64_t(data2[6]) << 48;
|
| 123 | case 6: h ^= uint64_t(data2[5]) << 40;
|
| 124 | case 5: h ^= uint64_t(data2[4]) << 32;
|
| 125 | case 4: h ^= uint64_t(data2[3]) << 24;
|
| 126 | case 3: h ^= uint64_t(data2[2]) << 16;
|
| 127 | case 2: h ^= uint64_t(data2[1]) << 8;
|
| 128 | case 1: h ^= uint64_t(data2[0]);
|
| 129 | h *= m;
|
| 130 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 131 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 132 | h ^= h >> r;
|
| 133 | h *= m;
|
| 134 | h ^= h >> r;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 135 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 136 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 137 | }
|
| 138 |
|
| 139 |
|
| 140 | // 64-bit hash for 32-bit platforms
|
| 141 |
|
| 142 | uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
|
| 143 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 144 | const uint32_t m = 0x5bd1e995;
|
| 145 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 146 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 147 | uint32_t h1 = uint32_t(seed) ^ len;
|
| 148 | uint32_t h2 = uint32_t(seed >> 32);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 149 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 150 | const uint32_t * data = (const uint32_t *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 151 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 152 | while(len >= 8)
|
| 153 | {
|
| 154 | uint32_t k1 = *data++;
|
| 155 | k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
| 156 | h1 *= m; h1 ^= k1;
|
| 157 | len -= 4;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 158 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 159 | uint32_t k2 = *data++;
|
| 160 | k2 *= m; k2 ^= k2 >> r; k2 *= m;
|
| 161 | h2 *= m; h2 ^= k2;
|
| 162 | len -= 4;
|
| 163 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 164 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 165 | if(len >= 4)
|
| 166 | {
|
| 167 | uint32_t k1 = *data++;
|
| 168 | k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
| 169 | h1 *= m; h1 ^= k1;
|
| 170 | len -= 4;
|
| 171 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 172 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 173 | switch(len)
|
| 174 | {
|
| 175 | case 3: h2 ^= ((unsigned char*)data)[2] << 16;
|
| 176 | case 2: h2 ^= ((unsigned char*)data)[1] << 8;
|
| 177 | case 1: h2 ^= ((unsigned char*)data)[0];
|
| 178 | h2 *= m;
|
| 179 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 180 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 181 | h1 ^= h2 >> 18; h1 *= m;
|
| 182 | h2 ^= h1 >> 22; h2 *= m;
|
| 183 | h1 ^= h2 >> 17; h1 *= m;
|
| 184 | h2 ^= h1 >> 19; h2 *= m;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 185 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 186 | uint64_t h = h1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 187 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 188 | h = (h << 32) | h2;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 189 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 190 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 191 | }
|
| 192 |
|
| 193 | //-----------------------------------------------------------------------------
|
| 194 | // MurmurHash2A, by Austin Appleby
|
| 195 |
|
| 196 | // This is a variant of MurmurHash2 modified to use the Merkle-Damgard
|
| 197 | // construction. Bulk speed should be identical to Murmur2, small-key speed
|
| 198 | // will be 10%-20% slower due to the added overhead at the end of the hash.
|
| 199 |
|
| 200 | // This variant fixes a minor issue where null keys were more likely to
|
| 201 | // collide with each other than expected, and also makes the function
|
| 202 | // more amenable to incremental implementations.
|
| 203 |
|
| 204 | #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
| 205 |
|
| 206 | uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
|
| 207 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 208 | const uint32_t m = 0x5bd1e995;
|
| 209 | const int r = 24;
|
| 210 | uint32_t l = len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 211 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 212 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 213 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 214 | uint32_t h = seed;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 215 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 216 | while(len >= 4)
|
| 217 | {
|
| 218 | uint32_t k = *(uint32_t*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 219 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 220 | mmix(h,k);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 221 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 222 | data += 4;
|
| 223 | len -= 4;
|
| 224 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 225 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 226 | uint32_t t = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 227 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 228 | switch(len)
|
| 229 | {
|
| 230 | case 3: t ^= data[2] << 16;
|
| 231 | case 2: t ^= data[1] << 8;
|
| 232 | case 1: t ^= data[0];
|
| 233 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 234 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 235 | mmix(h,t);
|
| 236 | mmix(h,l);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 237 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 238 | h ^= h >> 13;
|
| 239 | h *= m;
|
| 240 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 241 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 242 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 243 | }
|
| 244 |
|
| 245 | //-----------------------------------------------------------------------------
|
| 246 | // CMurmurHash2A, by Austin Appleby
|
| 247 |
|
| 248 | // This is a sample implementation of MurmurHash2A designed to work
|
| 249 | // incrementally.
|
| 250 |
|
| 251 | // Usage -
|
| 252 |
|
| 253 | // CMurmurHash2A hasher
|
| 254 | // hasher.Begin(seed);
|
| 255 | // hasher.Add(data1,size1);
|
| 256 | // hasher.Add(data2,size2);
|
| 257 | // ...
|
| 258 | // hasher.Add(dataN,sizeN);
|
| 259 | // uint32_t hash = hasher.End()
|
| 260 |
|
| 261 | class CMurmurHash2A
|
| 262 | {
|
| 263 | public:
|
| 264 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 265 | void Begin ( uint32_t seed = 0 )
|
| 266 | {
|
| 267 | m_hash = seed;
|
| 268 | m_tail = 0;
|
| 269 | m_count = 0;
|
| 270 | m_size = 0;
|
| 271 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 272 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 273 | void Add ( const unsigned char * data, int len )
|
| 274 | {
|
| 275 | m_size += len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 276 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 277 | MixTail(data,len);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 278 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 279 | while(len >= 4)
|
| 280 | {
|
| 281 | uint32_t k = *(uint32_t*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 282 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 283 | mmix(m_hash,k);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 284 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 285 | data += 4;
|
| 286 | len -= 4;
|
| 287 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 288 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 289 | MixTail(data,len);
|
| 290 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 291 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 292 | uint32_t End ( void )
|
| 293 | {
|
| 294 | mmix(m_hash,m_tail);
|
| 295 | mmix(m_hash,m_size);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 296 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 297 | m_hash ^= m_hash >> 13;
|
| 298 | m_hash *= m;
|
| 299 | m_hash ^= m_hash >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 300 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 301 | return m_hash;
|
| 302 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 303 |
|
| 304 | private:
|
| 305 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 306 | static const uint32_t m = 0x5bd1e995;
|
| 307 | static const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 308 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 309 | void MixTail ( const unsigned char * & data, int & len )
|
| 310 | {
|
| 311 | while( len && ((len<4) || m_count) )
|
| 312 | {
|
| 313 | m_tail |= (*data++) << (m_count * 8);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 314 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 315 | m_count++;
|
| 316 | len--;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 317 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 318 | if(m_count == 4)
|
| 319 | {
|
| 320 | mmix(m_hash,m_tail);
|
| 321 | m_tail = 0;
|
| 322 | m_count = 0;
|
| 323 | }
|
| 324 | }
|
| 325 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 326 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 327 | uint32_t m_hash;
|
| 328 | uint32_t m_tail;
|
| 329 | uint32_t m_count;
|
| 330 | uint32_t m_size;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 331 | };
|
| 332 |
|
| 333 | //-----------------------------------------------------------------------------
|
| 334 | // MurmurHashNeutral2, by Austin Appleby
|
| 335 |
|
| 336 | // Same as MurmurHash2, but endian- and alignment-neutral.
|
| 337 | // Half the speed though, alas.
|
| 338 |
|
| 339 | uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
|
| 340 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 341 | const uint32_t m = 0x5bd1e995;
|
| 342 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 343 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 344 | uint32_t h = seed ^ len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 345 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 346 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 347 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 348 | while(len >= 4)
|
| 349 | {
|
| 350 | uint32_t k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 351 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 352 | k = data[0];
|
| 353 | k |= data[1] << 8;
|
| 354 | k |= data[2] << 16;
|
| 355 | k |= data[3] << 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 356 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 357 | k *= m;
|
| 358 | k ^= k >> r;
|
| 359 | k *= m;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 360 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 361 | h *= m;
|
| 362 | h ^= k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 363 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 364 | data += 4;
|
| 365 | len -= 4;
|
| 366 | }
|
| 367 |
|
| 368 | switch(len)
|
| 369 | {
|
| 370 | case 3: h ^= data[2] << 16;
|
| 371 | case 2: h ^= data[1] << 8;
|
| 372 | case 1: h ^= data[0];
|
| 373 | h *= m;
|
| 374 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 375 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 376 | h ^= h >> 13;
|
| 377 | h *= m;
|
| 378 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 379 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 380 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 381 | }
|
| 382 |
|
| 383 | //-----------------------------------------------------------------------------
|
| 384 | // MurmurHashAligned2, by Austin Appleby
|
| 385 |
|
| 386 | // Same algorithm as MurmurHash2, but only does aligned reads - should be safer
|
| 387 | // on certain platforms.
|
| 388 |
|
| 389 | // Performance will be lower than MurmurHash2
|
| 390 |
|
| 391 | #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
| 392 |
|
aappleby@google.com | c2b49e0 | 2011-04-08 21:13:25 +0000 | [diff] [blame] | 393 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 394 | uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
|
| 395 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 396 | const uint32_t m = 0x5bd1e995;
|
| 397 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 398 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 399 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 400 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 401 | uint32_t h = seed ^ len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 402 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 403 | int align = (uint64_t)data & 3;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 404 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 405 | if(align && (len >= 4))
|
| 406 | {
|
| 407 | // Pre-load the temp registers
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 408 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 409 | uint32_t t = 0, d = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 410 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 411 | switch(align)
|
| 412 | {
|
| 413 | case 1: t |= data[2] << 16;
|
| 414 | case 2: t |= data[1] << 8;
|
| 415 | case 3: t |= data[0];
|
| 416 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 417 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 418 | t <<= (8 * align);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 419 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 420 | data += 4-align;
|
| 421 | len -= 4-align;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 422 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 423 | int sl = 8 * (4-align);
|
| 424 | int sr = 8 * align;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 425 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 426 | // Mix
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 427 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 428 | while(len >= 4)
|
| 429 | {
|
| 430 | d = *(uint32_t *)data;
|
| 431 | t = (t >> sr) | (d << sl);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 432 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 433 | uint32_t k = t;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 434 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 435 | MIX(h,k,m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 436 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 437 | t = d;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 438 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 439 | data += 4;
|
| 440 | len -= 4;
|
| 441 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 442 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 443 | // Handle leftover data in temp registers
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 444 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 445 | d = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 446 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 447 | if(len >= align)
|
| 448 | {
|
| 449 | switch(align)
|
| 450 | {
|
| 451 | case 3: d |= data[2] << 16;
|
| 452 | case 2: d |= data[1] << 8;
|
| 453 | case 1: d |= data[0];
|
| 454 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 455 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 456 | uint32_t k = (t >> sr) | (d << sl);
|
| 457 | MIX(h,k,m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 458 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 459 | data += align;
|
| 460 | len -= align;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 461 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 462 | //----------
|
| 463 | // Handle tail bytes
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 464 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 465 | switch(len)
|
| 466 | {
|
| 467 | case 3: h ^= data[2] << 16;
|
| 468 | case 2: h ^= data[1] << 8;
|
| 469 | case 1: h ^= data[0];
|
| 470 | h *= m;
|
| 471 | };
|
| 472 | }
|
| 473 | else
|
| 474 | {
|
| 475 | switch(len)
|
| 476 | {
|
| 477 | case 3: d |= data[2] << 16;
|
| 478 | case 2: d |= data[1] << 8;
|
| 479 | case 1: d |= data[0];
|
| 480 | case 0: h ^= (t >> sr) | (d << sl);
|
| 481 | h *= m;
|
| 482 | }
|
| 483 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 484 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 485 | h ^= h >> 13;
|
| 486 | h *= m;
|
| 487 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 488 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 489 | return h;
|
| 490 | }
|
| 491 | else
|
| 492 | {
|
| 493 | while(len >= 4)
|
| 494 | {
|
| 495 | uint32_t k = *(uint32_t *)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 496 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 497 | MIX(h,k,m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 498 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 499 | data += 4;
|
| 500 | len -= 4;
|
| 501 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 502 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 503 | //----------
|
| 504 | // Handle tail bytes
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 505 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 506 | switch(len)
|
| 507 | {
|
| 508 | case 3: h ^= data[2] << 16;
|
| 509 | case 2: h ^= data[1] << 8;
|
| 510 | case 1: h ^= data[0];
|
| 511 | h *= m;
|
| 512 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 513 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 514 | h ^= h >> 13;
|
| 515 | h *= m;
|
| 516 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 517 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 518 | return h;
|
| 519 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 520 | }
|
| 521 |
|
| 522 | //-----------------------------------------------------------------------------
|
| 523 |
|