tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | #include "MurmurHash2.h"
|
| 2 |
|
| 3 | //-----------------------------------------------------------------------------
|
| 4 | // MurmurHash2, by Austin Appleby
|
| 5 |
|
| 6 | // Note - This code makes a few assumptions about how your machine behaves -
|
| 7 |
|
| 8 | // 1. We can read a 4-byte value from any address without crashing
|
| 9 | // 2. sizeof(int) == 4
|
| 10 |
|
| 11 | // And it has a few limitations -
|
| 12 |
|
| 13 | // 1. It will not work incrementally.
|
| 14 | // 2. It will not produce the same results on little-endian and big-endian
|
| 15 | // machines.
|
| 16 |
|
| 17 | uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
|
| 18 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 19 | // 'm' and 'r' are mixing constants generated offline.
|
| 20 | // They're not really 'magic', they just happen to work well.
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 21 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 22 | const uint32_t m = 0x5bd1e995;
|
| 23 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 24 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 25 | // Initialize the hash to a 'random' value
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 26 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 27 | uint32_t h = seed ^ len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 28 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 29 | // Mix 4 bytes at a time into the hash
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 30 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 31 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 32 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 33 | while(len >= 4)
|
| 34 | {
|
| 35 | uint32_t k = *(uint32_t*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 36 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 37 | k *= m;
|
| 38 | k ^= k >> r;
|
| 39 | k *= m;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 40 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 41 | h *= m;
|
| 42 | h ^= k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 43 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 44 | data += 4;
|
| 45 | len -= 4;
|
| 46 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 47 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 48 | // Handle the last few bytes of the input array
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 49 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 50 | switch(len)
|
| 51 | {
|
| 52 | case 3: h ^= data[2] << 16;
|
| 53 | case 2: h ^= data[1] << 8;
|
| 54 | case 1: h ^= data[0];
|
| 55 | h *= m;
|
| 56 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 57 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 58 | // Do a few final mixes of the hash to ensure the last few
|
| 59 | // bytes are well-incorporated.
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 60 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 61 | h ^= h >> 13;
|
| 62 | h *= m;
|
| 63 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 64 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 65 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 66 | }
|
| 67 |
|
| 68 | //-----------------------------------------------------------------------------
|
| 69 | // MurmurHash2, 64-bit versions, by Austin Appleby
|
| 70 |
|
| 71 | // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
|
| 72 | // and endian-ness issues if used across multiple platforms.
|
| 73 |
|
| 74 | // 64-bit hash for 64-bit platforms
|
| 75 |
|
| 76 | uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
|
| 77 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 78 | const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
|
| 79 | const int r = 47;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 80 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 81 | uint64_t h = seed ^ (len * m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 82 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 83 | const uint64_t * data = (const uint64_t *)key;
|
| 84 | const uint64_t * end = data + (len/8);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 85 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 86 | while(data != end)
|
| 87 | {
|
| 88 | uint64_t k = *data++;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 89 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 90 | k *= m;
|
| 91 | k ^= k >> r;
|
| 92 | k *= m;
|
| 93 |
|
| 94 | h ^= k;
|
| 95 | h *= m;
|
| 96 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 97 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 98 | const unsigned char * data2 = (const unsigned char*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 99 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 100 | switch(len & 7)
|
| 101 | {
|
| 102 | case 7: h ^= uint64_t(data2[6]) << 48;
|
| 103 | case 6: h ^= uint64_t(data2[5]) << 40;
|
| 104 | case 5: h ^= uint64_t(data2[4]) << 32;
|
| 105 | case 4: h ^= uint64_t(data2[3]) << 24;
|
| 106 | case 3: h ^= uint64_t(data2[2]) << 16;
|
| 107 | case 2: h ^= uint64_t(data2[1]) << 8;
|
| 108 | case 1: h ^= uint64_t(data2[0]);
|
| 109 | h *= m;
|
| 110 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 111 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 112 | h ^= h >> r;
|
| 113 | h *= m;
|
| 114 | h ^= h >> r;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 115 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 116 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 117 | }
|
| 118 |
|
| 119 |
|
| 120 | // 64-bit hash for 32-bit platforms
|
| 121 |
|
| 122 | uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
|
| 123 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 124 | const uint32_t m = 0x5bd1e995;
|
| 125 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 126 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 127 | uint32_t h1 = uint32_t(seed) ^ len;
|
| 128 | uint32_t h2 = uint32_t(seed >> 32);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 129 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 130 | const uint32_t * data = (const uint32_t *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 131 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 132 | while(len >= 8)
|
| 133 | {
|
| 134 | uint32_t k1 = *data++;
|
| 135 | k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
| 136 | h1 *= m; h1 ^= k1;
|
| 137 | len -= 4;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 138 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 139 | uint32_t k2 = *data++;
|
| 140 | k2 *= m; k2 ^= k2 >> r; k2 *= m;
|
| 141 | h2 *= m; h2 ^= k2;
|
| 142 | len -= 4;
|
| 143 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 144 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 145 | if(len >= 4)
|
| 146 | {
|
| 147 | uint32_t k1 = *data++;
|
| 148 | k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
| 149 | h1 *= m; h1 ^= k1;
|
| 150 | len -= 4;
|
| 151 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 152 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 153 | switch(len)
|
| 154 | {
|
| 155 | case 3: h2 ^= ((unsigned char*)data)[2] << 16;
|
| 156 | case 2: h2 ^= ((unsigned char*)data)[1] << 8;
|
| 157 | case 1: h2 ^= ((unsigned char*)data)[0];
|
| 158 | h2 *= m;
|
| 159 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 160 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 161 | h1 ^= h2 >> 18; h1 *= m;
|
| 162 | h2 ^= h1 >> 22; h2 *= m;
|
| 163 | h1 ^= h2 >> 17; h1 *= m;
|
| 164 | h2 ^= h1 >> 19; h2 *= m;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 165 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 166 | uint64_t h = h1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 167 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 168 | h = (h << 32) | h2;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 169 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 170 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 171 | }
|
| 172 |
|
| 173 | //-----------------------------------------------------------------------------
|
| 174 | // MurmurHash2A, by Austin Appleby
|
| 175 |
|
| 176 | // This is a variant of MurmurHash2 modified to use the Merkle-Damgard
|
| 177 | // construction. Bulk speed should be identical to Murmur2, small-key speed
|
| 178 | // will be 10%-20% slower due to the added overhead at the end of the hash.
|
| 179 |
|
| 180 | // This variant fixes a minor issue where null keys were more likely to
|
| 181 | // collide with each other than expected, and also makes the function
|
| 182 | // more amenable to incremental implementations.
|
| 183 |
|
| 184 | #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
| 185 |
|
| 186 | uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
|
| 187 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 188 | const uint32_t m = 0x5bd1e995;
|
| 189 | const int r = 24;
|
| 190 | uint32_t l = len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 191 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 192 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 193 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 194 | uint32_t h = seed;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 195 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 196 | while(len >= 4)
|
| 197 | {
|
| 198 | uint32_t k = *(uint32_t*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 199 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 200 | mmix(h,k);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 201 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 202 | data += 4;
|
| 203 | len -= 4;
|
| 204 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 205 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 206 | uint32_t t = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 207 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 208 | switch(len)
|
| 209 | {
|
| 210 | case 3: t ^= data[2] << 16;
|
| 211 | case 2: t ^= data[1] << 8;
|
| 212 | case 1: t ^= data[0];
|
| 213 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 214 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 215 | mmix(h,t);
|
| 216 | mmix(h,l);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 217 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 218 | h ^= h >> 13;
|
| 219 | h *= m;
|
| 220 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 221 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 222 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 223 | }
|
| 224 |
|
| 225 | //-----------------------------------------------------------------------------
|
| 226 | // CMurmurHash2A, by Austin Appleby
|
| 227 |
|
| 228 | // This is a sample implementation of MurmurHash2A designed to work
|
| 229 | // incrementally.
|
| 230 |
|
| 231 | // Usage -
|
| 232 |
|
| 233 | // CMurmurHash2A hasher
|
| 234 | // hasher.Begin(seed);
|
| 235 | // hasher.Add(data1,size1);
|
| 236 | // hasher.Add(data2,size2);
|
| 237 | // ...
|
| 238 | // hasher.Add(dataN,sizeN);
|
| 239 | // uint32_t hash = hasher.End()
|
| 240 |
|
| 241 | class CMurmurHash2A
|
| 242 | {
|
| 243 | public:
|
| 244 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 245 | void Begin ( uint32_t seed = 0 )
|
| 246 | {
|
| 247 | m_hash = seed;
|
| 248 | m_tail = 0;
|
| 249 | m_count = 0;
|
| 250 | m_size = 0;
|
| 251 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 252 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 253 | void Add ( const unsigned char * data, int len )
|
| 254 | {
|
| 255 | m_size += len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 256 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 257 | MixTail(data,len);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 258 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 259 | while(len >= 4)
|
| 260 | {
|
| 261 | uint32_t k = *(uint32_t*)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 262 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 263 | mmix(m_hash,k);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 264 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 265 | data += 4;
|
| 266 | len -= 4;
|
| 267 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 268 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 269 | MixTail(data,len);
|
| 270 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 271 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 272 | uint32_t End ( void )
|
| 273 | {
|
| 274 | mmix(m_hash,m_tail);
|
| 275 | mmix(m_hash,m_size);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 276 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 277 | m_hash ^= m_hash >> 13;
|
| 278 | m_hash *= m;
|
| 279 | m_hash ^= m_hash >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 280 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 281 | return m_hash;
|
| 282 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 283 |
|
| 284 | private:
|
| 285 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 286 | static const uint32_t m = 0x5bd1e995;
|
| 287 | static const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 288 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 289 | void MixTail ( const unsigned char * & data, int & len )
|
| 290 | {
|
| 291 | while( len && ((len<4) || m_count) )
|
| 292 | {
|
| 293 | m_tail |= (*data++) << (m_count * 8);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 294 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 295 | m_count++;
|
| 296 | len--;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 297 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 298 | if(m_count == 4)
|
| 299 | {
|
| 300 | mmix(m_hash,m_tail);
|
| 301 | m_tail = 0;
|
| 302 | m_count = 0;
|
| 303 | }
|
| 304 | }
|
| 305 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 306 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 307 | uint32_t m_hash;
|
| 308 | uint32_t m_tail;
|
| 309 | uint32_t m_count;
|
| 310 | uint32_t m_size;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 311 | };
|
| 312 |
|
| 313 | //-----------------------------------------------------------------------------
|
| 314 | // MurmurHashNeutral2, by Austin Appleby
|
| 315 |
|
| 316 | // Same as MurmurHash2, but endian- and alignment-neutral.
|
| 317 | // Half the speed though, alas.
|
| 318 |
|
| 319 | uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
|
| 320 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 321 | const uint32_t m = 0x5bd1e995;
|
| 322 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 323 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 324 | uint32_t h = seed ^ len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 325 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 326 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 327 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 328 | while(len >= 4)
|
| 329 | {
|
| 330 | uint32_t k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 331 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 332 | k = data[0];
|
| 333 | k |= data[1] << 8;
|
| 334 | k |= data[2] << 16;
|
| 335 | k |= data[3] << 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 336 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 337 | k *= m;
|
| 338 | k ^= k >> r;
|
| 339 | k *= m;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 340 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 341 | h *= m;
|
| 342 | h ^= k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 343 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 344 | data += 4;
|
| 345 | len -= 4;
|
| 346 | }
|
| 347 |
|
| 348 | switch(len)
|
| 349 | {
|
| 350 | case 3: h ^= data[2] << 16;
|
| 351 | case 2: h ^= data[1] << 8;
|
| 352 | case 1: h ^= data[0];
|
| 353 | h *= m;
|
| 354 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 355 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 356 | h ^= h >> 13;
|
| 357 | h *= m;
|
| 358 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 359 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 360 | return h;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 361 | }
|
| 362 |
|
| 363 | //-----------------------------------------------------------------------------
|
| 364 | // MurmurHashAligned2, by Austin Appleby
|
| 365 |
|
| 366 | // Same algorithm as MurmurHash2, but only does aligned reads - should be safer
|
| 367 | // on certain platforms.
|
| 368 |
|
| 369 | // Performance will be lower than MurmurHash2
|
| 370 |
|
| 371 | #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
| 372 |
|
| 373 | uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
|
| 374 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 375 | const uint32_t m = 0x5bd1e995;
|
| 376 | const int r = 24;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 377 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 378 | const unsigned char * data = (const unsigned char *)key;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 379 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 380 | uint32_t h = seed ^ len;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 381 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 382 | int align = (uint64_t)data & 3;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 383 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 384 | if(align && (len >= 4))
|
| 385 | {
|
| 386 | // Pre-load the temp registers
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 387 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 388 | uint32_t t = 0, d = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 389 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 390 | switch(align)
|
| 391 | {
|
| 392 | case 1: t |= data[2] << 16;
|
| 393 | case 2: t |= data[1] << 8;
|
| 394 | case 3: t |= data[0];
|
| 395 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 396 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 397 | t <<= (8 * align);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 398 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 399 | data += 4-align;
|
| 400 | len -= 4-align;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 401 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 402 | int sl = 8 * (4-align);
|
| 403 | int sr = 8 * align;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 404 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 405 | // Mix
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 406 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 407 | while(len >= 4)
|
| 408 | {
|
| 409 | d = *(uint32_t *)data;
|
| 410 | t = (t >> sr) | (d << sl);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 411 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 412 | uint32_t k = t;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 413 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 414 | MIX(h,k,m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 415 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 416 | t = d;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 417 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 418 | data += 4;
|
| 419 | len -= 4;
|
| 420 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 421 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 422 | // Handle leftover data in temp registers
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 423 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 424 | d = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 425 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 426 | if(len >= align)
|
| 427 | {
|
| 428 | switch(align)
|
| 429 | {
|
| 430 | case 3: d |= data[2] << 16;
|
| 431 | case 2: d |= data[1] << 8;
|
| 432 | case 1: d |= data[0];
|
| 433 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 434 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 435 | uint32_t k = (t >> sr) | (d << sl);
|
| 436 | MIX(h,k,m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 437 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 438 | data += align;
|
| 439 | len -= align;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 440 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 441 | //----------
|
| 442 | // Handle tail bytes
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 443 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 444 | switch(len)
|
| 445 | {
|
| 446 | case 3: h ^= data[2] << 16;
|
| 447 | case 2: h ^= data[1] << 8;
|
| 448 | case 1: h ^= data[0];
|
| 449 | h *= m;
|
| 450 | };
|
| 451 | }
|
| 452 | else
|
| 453 | {
|
| 454 | switch(len)
|
| 455 | {
|
| 456 | case 3: d |= data[2] << 16;
|
| 457 | case 2: d |= data[1] << 8;
|
| 458 | case 1: d |= data[0];
|
| 459 | case 0: h ^= (t >> sr) | (d << sl);
|
| 460 | h *= m;
|
| 461 | }
|
| 462 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 463 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 464 | h ^= h >> 13;
|
| 465 | h *= m;
|
| 466 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 467 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 468 | return h;
|
| 469 | }
|
| 470 | else
|
| 471 | {
|
| 472 | while(len >= 4)
|
| 473 | {
|
| 474 | uint32_t k = *(uint32_t *)data;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 475 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 476 | MIX(h,k,m);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 477 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 478 | data += 4;
|
| 479 | len -= 4;
|
| 480 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 481 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 482 | //----------
|
| 483 | // Handle tail bytes
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 484 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 485 | switch(len)
|
| 486 | {
|
| 487 | case 3: h ^= data[2] << 16;
|
| 488 | case 2: h ^= data[1] << 8;
|
| 489 | case 1: h ^= data[0];
|
| 490 | h *= m;
|
| 491 | };
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 492 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 493 | h ^= h >> 13;
|
| 494 | h *= m;
|
| 495 | h ^= h >> 15;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 496 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 497 | return h;
|
| 498 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 499 | }
|
| 500 |
|
| 501 | //-----------------------------------------------------------------------------
|
| 502 |
|