Blame - MurmurHash3.cpp - chromium.googlesource.com/external/smhasher

blob: c66bd871d930105e178a8512e927640e0a35a9c9 [file] [log] [blame]

tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	1	#include "MurmurHash3.h"
				2
				3	#include <stdlib.h> // for _rotl
				4
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	5	//-----------------------------------------------------------------------------
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	6	// Block read - if your platform needs to do endian-swapping or can only
				7	// handle aligned reads, do the conversion here
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	8
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	9	inline uint32_t getblock ( const uint32_t * p, int i )
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	10	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	11	return p[i];
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	12	}
				13
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	14	//----------
				15	// Finalization mix - force all bits of a hash block to avalanche
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	16
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	17	// avalanches all bits to within 0.25% bias
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	18
				19	inline uint32_t fmix32 ( uint32_t h )
				20	{
				21	h ^= h >> 16;
				22	h *= 0x85ebca6b;
				23	h ^= h >> 13;
				24	h *= 0xc2b2ae35;
				25	h ^= h >> 16;
				26
				27	return h;
				28	}
				29
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	30	//-----------------------------------------------------------------------------
				31
				32	inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
				33	{
				34	k1 *= c1;
				35	k1 = _rotl(k1,11);
				36	k1 *= c2;
				37	h1 ^= k1;
				38
				39	h1 = h1*3+0x52dce729;
				40
				41	c1 = c1*5+0x7b7d159c;
				42	c2 = c2*5+0x6bce6396;
				43	}
				44
				45	//----------
				46
				47	void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
				48	{
				49	const uint8_t * data = (const uint8_t*)key;
				50	const int nblocks = len / 4;
				51
				52	uint32_t h1 = 0x971e137b ^ seed;
				53
				54	uint32_t c1 = 0x95543787;
				55	uint32_t c2 = 0x2ad7eb25;
				56
				57	//----------
				58	// body
				59
				60	const uint32_t * blocks = (const uint32_t )(data + nblocks4);
				61
				62	for(int i = -nblocks; i; i++)
				63	{
				64	uint32_t k1 = getblock(blocks,i);
				65
				66	bmix32(h1,k1,c1,c2);
				67	}
				68
				69	//----------
				70	// tail
				71
				72	const uint8_t * tail = (const uint8_t)(data + nblocks4);
				73
				74	uint32_t k1 = 0;
				75
				76	switch(len & 3)
				77	{
				78	case 3: k1 ^= tail[2] << 16;
				79	case 2: k1 ^= tail[1] << 8;
				80	case 1: k1 ^= tail[0];
				81	bmix32(h1,k1,c1,c2);
				82	};
				83
				84	//----------
				85	// finalization
				86
				87	h1 ^= len;
				88
				89	h1 = fmix32(h1);
				90
				91	(uint32_t)out = h1;
				92	}
				93
				94	//-----------------------------------------------------------------------------
				95
				96	inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 )
				97	{
				98	k1 *= c1;
				99	k1 = _rotl(k1,11);
				100	k1 *= c2;
				101	h1 ^= k1;
				102	h1 += h2;
				103
				104	h2 = _rotl(h2,17);
				105
				106	k2 *= c2;
				107	k2 = _rotl(k2,11);
				108	k2 *= c1;
				109	h2 ^= k2;
				110	h2 += h1;
				111
				112	h1 = h1*3+0x52dce729;
				113	h2 = h2*3+0x38495ab5;
				114
				115	c1 = c1*5+0x7b7d159c;
				116	c2 = c2*5+0x6bce6396;
				117	}
				118
				119	//----------
				120
				121	void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out )
				122	{
				123	const uint8_t * data = (const uint8_t*)key;
				124	const int nblocks = len / 8;
				125
				126	uint32_t h1 = 0x8de1c3ac ^ seed;
				127	uint32_t h2 = 0xbab98226 ^ seed;
				128
				129	uint32_t c1 = 0x95543787;
				130	uint32_t c2 = 0x2ad7eb25;
				131
				132	//----------
				133	// body
				134
				135	const uint32_t * blocks = (const uint32_t )(data + nblocks8);
				136
				137	for(int i = -nblocks; i; i++)
				138	{
				139	uint32_t k1 = getblock(blocks,i*2+0);
				140	uint32_t k2 = getblock(blocks,i*2+1);
				141
				142	bmix32(h1,h2,k1,k2,c1,c2);
				143	}
				144
				145	//----------
				146	// tail
				147
				148	const uint8_t * tail = (const uint8_t)(data + nblocks8);
				149
				150	uint32_t k1 = 0;
				151	uint32_t k2 = 0;
				152
				153	switch(len & 7)
				154	{
				155	case 7: k2 ^= tail[6] << 16;
				156	case 6: k2 ^= tail[5] << 8;
				157	case 5: k2 ^= tail[4] << 0;
				158	case 4: k1 ^= tail[3] << 24;
				159	case 3: k1 ^= tail[2] << 16;
				160	case 2: k1 ^= tail[1] << 8;
				161	case 1: k1 ^= tail[0] << 0;
				162	bmix32(h1,h2,k1,k2,c1,c2);
				163	};
				164
				165	//----------
				166	// finalization
				167
				168	h2 ^= len;
				169
				170	h1 += h2;
				171	h2 += h1;
				172
				173	h1 = fmix32(h1);
				174	h2 = fmix32(h2);
				175
				176	h1 += h2;
				177	h2 += h1;
				178
				179	((uint32_t*)out)[0] = h1;
				180	((uint32_t*)out)[1] = h2;
				181	}
				182
				183	//-----------------------------------------------------------------------------
				184	// This mix is large enough that VC++ refuses to inline it unless we use
				185	// __forceinline. It's also not all that fast due to register spillage.
				186
				187	__forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4,
				188	uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4,
				189	uint32_t & c1, uint32_t & c2 )
				190	{
				191	k1 *= c1;
				192	k1 = _rotl(k1,11);
				193	k1 *= c2;
				194	h1 ^= k1;
				195	h1 += h2;
				196	h1 += h3;
				197	h1 += h4;
				198
tanjent@gmail.com	31a9e8e	2010-11-09 20:29:19 +0000	[diff] [blame^]	199	h1 = _rotl(h1,17);
				200
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	201	k2 *= c2;
				202	k2 = _rotl(k2,11);
				203	k2 *= c1;
				204	h2 ^= k2;
				205	h2 += h1;
				206
				207	h1 = h1*3+0x52dce729;
				208	h2 = h2*3+0x38495ab5;
				209
				210	c1 = c1*5+0x7b7d159c;
				211	c2 = c2*5+0x6bce6396;
				212
				213	k3 *= c1;
				214	k3 = _rotl(k3,11);
				215	k3 *= c2;
				216	h3 ^= k3;
				217	h3 += h1;
				218
				219	k4 *= c2;
				220	k4 = _rotl(k4,11);
				221	k4 *= c1;
				222	h4 ^= k4;
				223	h4 += h1;
				224
				225	h3 = h3*3+0x52dce729;
				226	h4 = h4*3+0x38495ab5;
				227
				228	c1 = c1*5+0x7b7d159c;
				229	c2 = c2*5+0x6bce6396;
				230	}
				231
				232	//----------
				233
				234	void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out )
				235	{
				236	const uint8_t * data = (const uint8_t*)key;
				237	const int nblocks = len / 16;
				238
				239	uint32_t h1 = 0x8de1c3ac ^ seed;
				240	uint32_t h2 = 0xbab98226 ^ seed;
				241	uint32_t h3 = 0xfcba5b2d ^ seed;
				242	uint32_t h4 = 0x32452e3e ^ seed;
				243
				244	uint32_t c1 = 0x95543787;
				245	uint32_t c2 = 0x2ad7eb25;
				246
				247	//----------
				248	// body
				249
				250	const uint32_t * blocks = (const uint32_t *)(data);
				251
				252	for(int i = 0; i < nblocks; i++)
				253	{
				254	uint32_t k1 = getblock(blocks,i*4+0);
				255	uint32_t k2 = getblock(blocks,i*4+1);
				256	uint32_t k3 = getblock(blocks,i*4+2);
				257	uint32_t k4 = getblock(blocks,i*4+3);
				258
				259	bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
				260	}
				261
				262	//----------
				263	// tail
				264
				265	const uint8_t * tail = (const uint8_t)(data + nblocks16);
				266
				267	uint32_t k1 = 0;
				268	uint32_t k2 = 0;
				269	uint32_t k3 = 0;
				270	uint32_t k4 = 0;
				271
				272	switch(len & 15)
				273	{
				274	case 15: k4 ^= tail[14] << 16;
				275	case 14: k4 ^= tail[13] << 8;
				276	case 13: k4 ^= tail[12] << 0;
				277	case 12: k3 ^= tail[11] << 24;
				278	case 11: k3 ^= tail[10] << 16;
				279	case 10: k3 ^= tail[ 9] << 8;
				280	case 9: k3 ^= tail[ 8] << 0;
				281	case 8: k2 ^= tail[ 7] << 24;
				282	case 7: k2 ^= tail[ 6] << 16;
				283	case 6: k2 ^= tail[ 5] << 8;
				284	case 5: k2 ^= tail[ 4] << 0;
				285	case 4: k1 ^= tail[ 3] << 24;
				286	case 3: k1 ^= tail[ 2] << 16;
				287	case 2: k1 ^= tail[ 1] << 8;
				288	case 1: k1 ^= tail[ 0] << 0;
				289	bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
				290	};
				291
				292	//----------
				293	// finalization
				294
				295	h4 ^= len;
				296
				297	h1 += h2; h1 += h3; h1 += h4;
				298	h2 += h1; h3 += h1; h4 += h1;
				299
				300	h1 = fmix32(h1);
				301	h2 = fmix32(h2);
				302	h3 = fmix32(h3);
				303	h4 = fmix32(h4);
				304
				305	h1 += h2; h1 += h3; h1 += h4;
				306	h2 += h1; h3 += h1; h4 += h1;
				307
				308	((uint32_t*)out)[0] = h1;
				309	((uint32_t*)out)[1] = h2;
				310	((uint32_t*)out)[2] = h3;
				311	((uint32_t*)out)[3] = h4;
				312	}
				313
				314	//-----------------------------------------------------------------------------
				315	// Block read - if your platform needs to do endian-swapping or can only
				316	// handle aligned reads, do the conversion here
				317
				318	inline uint64_t getblock ( const uint64_t * p, int i )
				319	{
				320	return p[i];
				321	}
				322
				323	//----------
				324	// Block mix - combine the key bits with the hash bits and scramble everything
				325
				326	inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
				327	{
				328	k1 *= c1;
				329	k1 = _rotl64(k1,23);
				330	k1 *= c2;
				331	h1 ^= k1;
				332	h1 += h2;
				333
				334	h2 = _rotl64(h2,41);
				335
				336	k2 *= c2;
				337	k2 = _rotl64(k2,23);
				338	k2 *= c1;
				339	h2 ^= k2;
				340	h2 += h1;
				341
				342	h1 = h1*3+0x52dce729;
				343	h2 = h2*3+0x38495ab5;
				344
				345	c1 = c1*5+0x7b7d159c;
				346	c2 = c2*5+0x6bce6396;
				347	}
				348
				349	//----------
				350	// Finalization mix - avalanches all bits to within 0.05% bias
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	351
				352	inline uint64_t fmix64 ( uint64_t k )
				353	{
				354	k ^= k >> 33;
				355	k *= 0xff51afd7ed558ccd;
				356	k ^= k >> 33;
				357	k *= 0xc4ceb9fe1a85ec53;
				358	k ^= k >> 33;
				359
				360	return k;
				361	}
				362
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	363	//----------
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	364
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	365	void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	366	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	367	const uint8_t * data = (const uint8_t*)key;
				368	const int nblocks = len / 16;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	369
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	370	uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
				371	uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
				372
				373	uint64_t c1 = 0x87c37b91114253d5;
				374	uint64_t c2 = 0x4cf5ad432745937f;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	375
				376	//----------
				377	// body
				378
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	379	const uint64_t * blocks = (const uint64_t *)(data);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	380
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	381	for(int i = 0; i < nblocks; i++)
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	382	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	383	uint64_t k1 = getblock(blocks,i*2+0);
				384	uint64_t k2 = getblock(blocks,i*2+1);
				385
				386	bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	387	}
				388
				389	//----------
				390	// tail
				391
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	392	const uint8_t * tail = (const uint8_t)(data + nblocks16);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	393
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	394	uint64_t k1 = 0;
				395	uint64_t k2 = 0;
				396
				397	switch(len & 15)
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	398	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	399	case 15: k2 ^= uint64_t(tail[14]) << 48;
				400	case 14: k2 ^= uint64_t(tail[13]) << 40;
				401	case 13: k2 ^= uint64_t(tail[12]) << 32;
				402	case 12: k2 ^= uint64_t(tail[11]) << 24;
				403	case 11: k2 ^= uint64_t(tail[10]) << 16;
				404	case 10: k2 ^= uint64_t(tail[ 9]) << 8;
				405	case 9: k2 ^= uint64_t(tail[ 8]) << 0;
				406
				407	case 8: k1 ^= uint64_t(tail[ 7]) << 56;
				408	case 7: k1 ^= uint64_t(tail[ 6]) << 48;
				409	case 6: k1 ^= uint64_t(tail[ 5]) << 40;
				410	case 5: k1 ^= uint64_t(tail[ 4]) << 32;
				411	case 4: k1 ^= uint64_t(tail[ 3]) << 24;
				412	case 3: k1 ^= uint64_t(tail[ 2]) << 16;
				413	case 2: k1 ^= uint64_t(tail[ 1]) << 8;
				414	case 1: k1 ^= uint64_t(tail[ 0]) << 0;
				415	bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	416	};
				417
				418	//----------
				419	// finalization
				420
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	421	h2 ^= len;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	422
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	423	h1 += h2;
				424	h2 += h1;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	425
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame]	426	h1 = fmix64(h1);
				427	h2 = fmix64(h2);
				428
				429	h1 += h2;
				430	h2 += h1;
				431
				432	((uint64_t*)out)[0] = h1;
				433	((uint64_t*)out)[1] = h2;
				434	}
				435
				436	//-----------------------------------------------------------------------------
				437	// If we need a smaller hash value, it's faster to just use a portion of the
				438	// 128-bit hash
				439
				440	void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
				441	{
				442	uint32_t temp[4];
				443
				444	MurmurHash3_x64_128(key,len,seed,temp);
				445
				446	(uint32_t)out = temp[0];
				447	}
				448
				449	//----------
				450
				451	void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
				452	{
				453	uint64_t temp[2];
				454
				455	MurmurHash3_x64_128(key,len,seed,temp);
				456
				457	(uint64_t)out = temp[0];
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	458	}
				459
				460	//-----------------------------------------------------------------------------