blob: b21e9f785ddec99f95e0afd10c9e2de20a9fc9b1 [file] [log] [blame]
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00001//-----------------------------------------------------------------------------
aappleby@google.comc8e8bf82011-04-08 21:07:48 +00002// MurmurHash was written by Austin Appleby, and is placed in the public
3// domain. The author hereby disclaims copyright to this source code.
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00004
5// Note - This code makes a few assumptions about how your machine behaves -
6
7// 1. We can read a 4-byte value from any address without crashing
8// 2. sizeof(int) == 4
9
10// And it has a few limitations -
11
12// 1. It will not work incrementally.
13// 2. It will not produce the same results on little-endian and big-endian
14// machines.
15
aappleby@google.comc8e8bf82011-04-08 21:07:48 +000016#include "MurmurHash1.h"
17
18//-----------------------------------------------------------------------------
19
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000020uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
21{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000022 const unsigned int m = 0xc6a4a793;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000023
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000024 const int r = 16;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000025
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000026 unsigned int h = seed ^ (len * m);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000027
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000028 //----------
29
30 const unsigned char * data = (const unsigned char *)key;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000031
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000032 while(len >= 4)
33 {
34 unsigned int k = *(unsigned int *)data;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000035
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000036 h += k;
37 h *= m;
38 h ^= h >> 16;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000039
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000040 data += 4;
41 len -= 4;
42 }
43
44 //----------
45
46 switch(len)
47 {
48 case 3:
49 h += data[2] << 16;
50 case 2:
51 h += data[1] << 8;
52 case 1:
53 h += data[0];
54 h *= m;
55 h ^= h >> r;
56 };
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000057
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000058 //----------
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000059
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000060 h *= m;
61 h ^= h >> 10;
62 h *= m;
63 h ^= h >> 17;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000064
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000065 return h;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000066}
67
68//-----------------------------------------------------------------------------
69// MurmurHash1Aligned, by Austin Appleby
70
71// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
72// on certain platforms.
73
74// Performance should be equal to or better than the simple version.
75
76unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
77{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000078 const unsigned int m = 0xc6a4a793;
79 const int r = 16;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000080
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000081 const unsigned char * data = (const unsigned char *)key;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000082
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000083 unsigned int h = seed ^ (len * m);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000084
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000085 int align = (uint64_t)data & 3;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000086
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000087 if(align && (len >= 4))
88 {
89 // Pre-load the temp registers
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000090
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000091 unsigned int t = 0, d = 0;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000092
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000093 switch(align)
94 {
95 case 1: t |= data[2] << 16;
96 case 2: t |= data[1] << 8;
97 case 3: t |= data[0];
98 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000099
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000100 t <<= (8 * align);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000101
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000102 data += 4-align;
103 len -= 4-align;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000104
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000105 int sl = 8 * (4-align);
106 int sr = 8 * align;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000107
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000108 // Mix
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000109
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000110 while(len >= 4)
111 {
112 d = *(unsigned int *)data;
113 t = (t >> sr) | (d << sl);
114 h += t;
115 h *= m;
116 h ^= h >> r;
117 t = d;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000118
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000119 data += 4;
120 len -= 4;
121 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000122
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000123 // Handle leftover data in temp registers
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000124
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000125 int pack = len < align ? len : align;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000126
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000127 d = 0;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000128
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000129 switch(pack)
130 {
131 case 3: d |= data[2] << 16;
132 case 2: d |= data[1] << 8;
133 case 1: d |= data[0];
134 case 0: h += (t >> sr) | (d << sl);
135 h *= m;
136 h ^= h >> r;
137 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000138
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000139 data += pack;
140 len -= pack;
141 }
142 else
143 {
144 while(len >= 4)
145 {
146 h += *(unsigned int *)data;
147 h *= m;
148 h ^= h >> r;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000149
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000150 data += 4;
151 len -= 4;
152 }
153 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000154
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000155 //----------
156 // Handle tail bytes
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000157
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000158 switch(len)
159 {
160 case 3: h += data[2] << 16;
161 case 2: h += data[1] << 8;
162 case 1: h += data[0];
163 h *= m;
164 h ^= h >> r;
165 };
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000166
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000167 h *= m;
168 h ^= h >> 10;
169 h *= m;
170 h ^= h >> 17;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000171
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000172 return h;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000173}
174