Blame - MurmurHash3.cpp - chromium.googlesource.com/external/smhasher

blob: 88d08cbd2de1af831925b2530b6d7f8a150bc32f [file] [log] [blame]

tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	1	#include "MurmurHash3.h"
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	2	#include <stdlib.h> // for _rotl
				3
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	4	// Note - The x86 and x64 versions do _not_ produce the same results, as the
				5	// algorithms are optimized for their respective platforms. You can still
				6	// compile and run any of them on any platform, but your performance with the
				7	// non-native version will be less than optimal.
				8
				9
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	10	//-----------------------------------------------------------------------------
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	11	// Block read - if your platform needs to do endian-swapping or can only
				12	// handle aligned reads, do the conversion here
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	13
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	14	inline uint32_t getblock ( const uint32_t * p, int i )
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	15	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	16	return p[i];
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	17	}
				18
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	19	//----------
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	20
				21	inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
				22	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	23	c1 = c1*5+0x7b7d159c;
				24	c2 = c2*5+0x6bce6396;
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	25
				26	k1 *= c1;
				27	k1 = _rotl(k1,11);
				28	k1 *= c2;
				29
				30	h1 = _rotl(h1,13);
				31	h1 = h1*5+0x52dce729;
				32	h1 ^= k1;
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	33	}
				34
				35	//----------
				36
				37	void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
				38	{
				39	const uint8_t * data = (const uint8_t*)key;
				40	const int nblocks = len / 4;
				41
				42	uint32_t h1 = 0x971e137b ^ seed;
				43
				44	uint32_t c1 = 0x95543787;
				45	uint32_t c2 = 0x2ad7eb25;
				46
				47	//----------
				48	// body
				49
				50	const uint32_t * blocks = (const uint32_t )(data + nblocks4);
				51
				52	for(int i = -nblocks; i; i++)
				53	{
				54	uint32_t k1 = getblock(blocks,i);
				55
				56	bmix32(h1,k1,c1,c2);
				57	}
				58
				59	//----------
				60	// tail
				61
				62	const uint8_t * tail = (const uint8_t)(data + nblocks4);
				63
				64	uint32_t k1 = 0;
				65
				66	switch(len & 3)
				67	{
				68	case 3: k1 ^= tail[2] << 16;
				69	case 2: k1 ^= tail[1] << 8;
				70	case 1: k1 ^= tail[0];
				71	bmix32(h1,k1,c1,c2);
				72	};
				73
				74	//----------
				75	// finalization
				76
				77	h1 ^= len;
				78
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	79	h1 *= 0x85ebca6b;
				80	h1 ^= h1 >> 13;
				81	h1 *= 0xc2b2ae35;
				82	h1 ^= h1 >> 16;
				83
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	84	h1 ^= seed;
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	85
				86	(uint32_t)out = h1;
				87	}
				88
				89	//-----------------------------------------------------------------------------
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	90	// This mix is large enough that VC++ refuses to inline it unless we use
				91	// __forceinline. It's also not all that fast due to register spillage.
				92
				93	__forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4,
				94	uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4,
				95	uint32_t & c1, uint32_t & c2 )
				96	{
				97	k1 *= c1;
				98	k1 = _rotl(k1,11);
				99	k1 *= c2;
				100	h1 ^= k1;
				101	h1 += h2;
				102	h1 += h3;
				103	h1 += h4;
				104
tanjent@gmail.com	31a9e8e	2010-11-09 20:29:19 +0000	[diff] [blame]	105	h1 = _rotl(h1,17);
				106
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	107	k2 *= c2;
				108	k2 = _rotl(k2,11);
				109	k2 *= c1;
				110	h2 ^= k2;
				111	h2 += h1;
				112
				113	h1 = h1*3+0x52dce729;
				114	h2 = h2*3+0x38495ab5;
				115
				116	c1 = c1*5+0x7b7d159c;
				117	c2 = c2*5+0x6bce6396;
				118
				119	k3 *= c1;
				120	k3 = _rotl(k3,11);
				121	k3 *= c2;
				122	h3 ^= k3;
				123	h3 += h1;
				124
				125	k4 *= c2;
				126	k4 = _rotl(k4,11);
				127	k4 *= c1;
				128	h4 ^= k4;
				129	h4 += h1;
				130
				131	h3 = h3*3+0x52dce729;
				132	h4 = h4*3+0x38495ab5;
				133
				134	c1 = c1*5+0x7b7d159c;
				135	c2 = c2*5+0x6bce6396;
				136	}
				137
				138	//----------
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	139	// Finalization mix - force all bits of a hash block to avalanche
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	140
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	141	// avalanches all bits to within 0.25% bias
				142
				143	inline uint32_t fmix32 ( uint32_t h )
				144	{
				145	h ^= h >> 16;
				146	h *= 0x85ebca6b;
				147	h ^= h >> 13;
				148	h *= 0xc2b2ae35;
				149	h ^= h >> 16;
				150
				151	return h;
				152	}
				153
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	154	void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	155	{
				156	const uint8_t * data = (const uint8_t*)key;
				157	const int nblocks = len / 16;
				158
				159	uint32_t h1 = 0x8de1c3ac ^ seed;
				160	uint32_t h2 = 0xbab98226 ^ seed;
				161	uint32_t h3 = 0xfcba5b2d ^ seed;
				162	uint32_t h4 = 0x32452e3e ^ seed;
				163
				164	uint32_t c1 = 0x95543787;
				165	uint32_t c2 = 0x2ad7eb25;
				166
				167	//----------
				168	// body
				169
				170	const uint32_t * blocks = (const uint32_t *)(data);
				171
				172	for(int i = 0; i < nblocks; i++)
				173	{
				174	uint32_t k1 = getblock(blocks,i*4+0);
				175	uint32_t k2 = getblock(blocks,i*4+1);
				176	uint32_t k3 = getblock(blocks,i*4+2);
				177	uint32_t k4 = getblock(blocks,i*4+3);
				178
				179	bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
				180	}
				181
				182	//----------
				183	// tail
				184
				185	const uint8_t * tail = (const uint8_t)(data + nblocks16);
				186
				187	uint32_t k1 = 0;
				188	uint32_t k2 = 0;
				189	uint32_t k3 = 0;
				190	uint32_t k4 = 0;
				191
				192	switch(len & 15)
				193	{
				194	case 15: k4 ^= tail[14] << 16;
				195	case 14: k4 ^= tail[13] << 8;
				196	case 13: k4 ^= tail[12] << 0;
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	197
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	198	case 12: k3 ^= tail[11] << 24;
				199	case 11: k3 ^= tail[10] << 16;
				200	case 10: k3 ^= tail[ 9] << 8;
				201	case 9: k3 ^= tail[ 8] << 0;
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	202
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	203	case 8: k2 ^= tail[ 7] << 24;
				204	case 7: k2 ^= tail[ 6] << 16;
				205	case 6: k2 ^= tail[ 5] << 8;
				206	case 5: k2 ^= tail[ 4] << 0;
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	207
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	208	case 4: k1 ^= tail[ 3] << 24;
				209	case 3: k1 ^= tail[ 2] << 16;
				210	case 2: k1 ^= tail[ 1] << 8;
				211	case 1: k1 ^= tail[ 0] << 0;
				212	bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
				213	};
				214
				215	//----------
				216	// finalization
				217
				218	h4 ^= len;
				219
				220	h1 += h2; h1 += h3; h1 += h4;
				221	h2 += h1; h3 += h1; h4 += h1;
				222
				223	h1 = fmix32(h1);
				224	h2 = fmix32(h2);
				225	h3 = fmix32(h3);
				226	h4 = fmix32(h4);
				227
				228	h1 += h2; h1 += h3; h1 += h4;
				229	h2 += h1; h3 += h1; h4 += h1;
				230
				231	((uint32_t*)out)[0] = h1;
				232	((uint32_t*)out)[1] = h2;
				233	((uint32_t*)out)[2] = h3;
				234	((uint32_t*)out)[3] = h4;
				235	}
				236
				237	//-----------------------------------------------------------------------------
				238	// Block read - if your platform needs to do endian-swapping or can only
				239	// handle aligned reads, do the conversion here
				240
				241	inline uint64_t getblock ( const uint64_t * p, int i )
				242	{
				243	return p[i];
				244	}
				245
				246	//----------
				247	// Block mix - combine the key bits with the hash bits and scramble everything
				248
				249	inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
				250	{
				251	k1 *= c1;
				252	k1 = _rotl64(k1,23);
				253	k1 *= c2;
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	254
				255	k2 *= c2;
				256	k2 = _rotl64(k2,23);
				257	k2 *= c1;
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	258
				259	h1 = _rotl64(h1,17);
				260	h1 += h2;
				261	h1 ^= k1;
				262
				263	h2 = _rotl64(h2,41);
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	264	h2 += h1;
tanjent@gmail.com	babb553	2011-02-28 06:03:12 +0000	[diff] [blame]	265	h2 ^= k2;
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	266
				267	h1 = h1*3+0x52dce729;
				268	h2 = h2*3+0x38495ab5;
				269
				270	c1 = c1*5+0x7b7d159c;
				271	c2 = c2*5+0x6bce6396;
				272	}
				273
				274	//----------
				275	// Finalization mix - avalanches all bits to within 0.05% bias
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	276
				277	inline uint64_t fmix64 ( uint64_t k )
				278	{
				279	k ^= k >> 33;
				280	k *= 0xff51afd7ed558ccd;
				281	k ^= k >> 33;
				282	k *= 0xc4ceb9fe1a85ec53;
				283	k ^= k >> 33;
				284
				285	return k;
				286	}
				287
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	288	//----------
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	289
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	290	void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	291	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	292	const uint8_t * data = (const uint8_t*)key;
				293	const int nblocks = len / 16;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	294
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	295	uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
				296	uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
				297
				298	uint64_t c1 = 0x87c37b91114253d5;
				299	uint64_t c2 = 0x4cf5ad432745937f;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	300
				301	//----------
				302	// body
				303
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	304	const uint64_t * blocks = (const uint64_t *)(data);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	305
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	306	for(int i = 0; i < nblocks; i++)
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	307	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	308	uint64_t k1 = getblock(blocks,i*2+0);
				309	uint64_t k2 = getblock(blocks,i*2+1);
				310
				311	bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	312	}
				313
				314	//----------
				315	// tail
				316
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	317	const uint8_t * tail = (const uint8_t)(data + nblocks16);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	318
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	319	uint64_t k1 = 0;
				320	uint64_t k2 = 0;
				321
				322	switch(len & 15)
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	323	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	324	case 15: k2 ^= uint64_t(tail[14]) << 48;
				325	case 14: k2 ^= uint64_t(tail[13]) << 40;
				326	case 13: k2 ^= uint64_t(tail[12]) << 32;
				327	case 12: k2 ^= uint64_t(tail[11]) << 24;
				328	case 11: k2 ^= uint64_t(tail[10]) << 16;
				329	case 10: k2 ^= uint64_t(tail[ 9]) << 8;
				330	case 9: k2 ^= uint64_t(tail[ 8]) << 0;
				331
				332	case 8: k1 ^= uint64_t(tail[ 7]) << 56;
				333	case 7: k1 ^= uint64_t(tail[ 6]) << 48;
				334	case 6: k1 ^= uint64_t(tail[ 5]) << 40;
				335	case 5: k1 ^= uint64_t(tail[ 4]) << 32;
				336	case 4: k1 ^= uint64_t(tail[ 3]) << 24;
				337	case 3: k1 ^= uint64_t(tail[ 2]) << 16;
				338	case 2: k1 ^= uint64_t(tail[ 1]) << 8;
				339	case 1: k1 ^= uint64_t(tail[ 0]) << 0;
				340	bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	341	};
				342
				343	//----------
				344	// finalization
				345
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	346	h2 ^= len;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	347
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	348	h1 += h2;
				349	h2 += h1;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	350
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	351	h1 = fmix64(h1);
				352	h2 = fmix64(h2);
				353
				354	h1 += h2;
				355	h2 += h1;
				356
				357	((uint64_t*)out)[0] = h1;
				358	((uint64_t*)out)[1] = h2;
				359	}
				360
				361	//-----------------------------------------------------------------------------