Blame - MurmurHash3.cpp - chromium.googlesource.com/external/smhasher

blob: 2942f10c2d0b3dfd54ee8a3a128f26347c68e2e2 [file] [log] [blame]

tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	1	#include "MurmurHash3.h"
				2
				3	#include <stdlib.h> // for _rotl
				4
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	5	//-----------------------------------------------------------------------------
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	6	// Block read - if your platform needs to do endian-swapping or can only
				7	// handle aligned reads, do the conversion here
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	8
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	9	inline uint32_t getblock ( const uint32_t * p, int i )
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	10	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	11	return p[i];
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	12	}
				13
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	14	//----------
				15	// Finalization mix - force all bits of a hash block to avalanche
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	16
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	17	// avalanches all bits to within 0.25% bias
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	18
				19	inline uint32_t fmix32 ( uint32_t h )
				20	{
				21	h ^= h >> 16;
				22	h *= 0x85ebca6b;
				23	h ^= h >> 13;
				24	h *= 0xc2b2ae35;
				25	h ^= h >> 16;
				26
				27	return h;
				28	}
				29
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	30	//-----------------------------------------------------------------------------
				31
				32	inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
				33	{
				34	k1 *= c1;
				35	k1 = _rotl(k1,11);
				36	k1 *= c2;
				37	h1 ^= k1;
				38
				39	h1 = h1*3+0x52dce729;
				40
				41	c1 = c1*5+0x7b7d159c;
				42	c2 = c2*5+0x6bce6396;
				43	}
				44
				45	//----------
				46
				47	void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
				48	{
				49	const uint8_t * data = (const uint8_t*)key;
				50	const int nblocks = len / 4;
				51
				52	uint32_t h1 = 0x971e137b ^ seed;
				53
				54	uint32_t c1 = 0x95543787;
				55	uint32_t c2 = 0x2ad7eb25;
				56
				57	//----------
				58	// body
				59
				60	const uint32_t * blocks = (const uint32_t )(data + nblocks4);
				61
				62	for(int i = -nblocks; i; i++)
				63	{
				64	uint32_t k1 = getblock(blocks,i);
				65
				66	bmix32(h1,k1,c1,c2);
				67	}
				68
				69	//----------
				70	// tail
				71
				72	const uint8_t * tail = (const uint8_t)(data + nblocks4);
				73
				74	uint32_t k1 = 0;
				75
				76	switch(len & 3)
				77	{
				78	case 3: k1 ^= tail[2] << 16;
				79	case 2: k1 ^= tail[1] << 8;
				80	case 1: k1 ^= tail[0];
				81	bmix32(h1,k1,c1,c2);
				82	};
				83
				84	//----------
				85	// finalization
				86
				87	h1 ^= len;
				88
				89	h1 = fmix32(h1);
				90
				91	(uint32_t)out = h1;
				92	}
				93
				94	//-----------------------------------------------------------------------------
				95
				96	inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 )
				97	{
				98	k1 *= c1;
				99	k1 = _rotl(k1,11);
				100	k1 *= c2;
				101	h1 ^= k1;
				102	h1 += h2;
				103
				104	h2 = _rotl(h2,17);
				105
				106	k2 *= c2;
				107	k2 = _rotl(k2,11);
				108	k2 *= c1;
				109	h2 ^= k2;
				110	h2 += h1;
				111
				112	h1 = h1*3+0x52dce729;
				113	h2 = h2*3+0x38495ab5;
				114
				115	c1 = c1*5+0x7b7d159c;
				116	c2 = c2*5+0x6bce6396;
				117	}
				118
				119	//----------
				120
				121	void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out )
				122	{
				123	const uint8_t * data = (const uint8_t*)key;
				124	const int nblocks = len / 8;
				125
				126	uint32_t h1 = 0x8de1c3ac ^ seed;
				127	uint32_t h2 = 0xbab98226 ^ seed;
				128
				129	uint32_t c1 = 0x95543787;
				130	uint32_t c2 = 0x2ad7eb25;
				131
				132	//----------
				133	// body
				134
				135	const uint32_t * blocks = (const uint32_t )(data + nblocks8);
				136
				137	for(int i = -nblocks; i; i++)
				138	{
				139	uint32_t k1 = getblock(blocks,i*2+0);
				140	uint32_t k2 = getblock(blocks,i*2+1);
				141
				142	bmix32(h1,h2,k1,k2,c1,c2);
				143	}
				144
				145	//----------
				146	// tail
				147
				148	const uint8_t * tail = (const uint8_t)(data + nblocks8);
				149
				150	uint32_t k1 = 0;
				151	uint32_t k2 = 0;
				152
				153	switch(len & 7)
				154	{
				155	case 7: k2 ^= tail[6] << 16;
				156	case 6: k2 ^= tail[5] << 8;
				157	case 5: k2 ^= tail[4] << 0;
				158	case 4: k1 ^= tail[3] << 24;
				159	case 3: k1 ^= tail[2] << 16;
				160	case 2: k1 ^= tail[1] << 8;
				161	case 1: k1 ^= tail[0] << 0;
				162	bmix32(h1,h2,k1,k2,c1,c2);
				163	};
				164
				165	//----------
				166	// finalization
				167
				168	h2 ^= len;
				169
				170	h1 += h2;
				171	h2 += h1;
				172
				173	h1 = fmix32(h1);
				174	h2 = fmix32(h2);
				175
				176	h1 += h2;
				177	h2 += h1;
				178
				179	((uint32_t*)out)[0] = h1;
				180	((uint32_t*)out)[1] = h2;
				181	}
				182
				183	//-----------------------------------------------------------------------------
				184	// This mix is large enough that VC++ refuses to inline it unless we use
				185	// __forceinline. It's also not all that fast due to register spillage.
				186
				187	__forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4,
				188	uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4,
				189	uint32_t & c1, uint32_t & c2 )
				190	{
				191	k1 *= c1;
				192	k1 = _rotl(k1,11);
				193	k1 *= c2;
				194	h1 ^= k1;
				195	h1 += h2;
				196	h1 += h3;
				197	h1 += h4;
				198
				199	k2 *= c2;
				200	k2 = _rotl(k2,11);
				201	k2 *= c1;
				202	h2 ^= k2;
				203	h2 += h1;
				204
				205	h1 = h1*3+0x52dce729;
				206	h2 = h2*3+0x38495ab5;
				207
				208	c1 = c1*5+0x7b7d159c;
				209	c2 = c2*5+0x6bce6396;
				210
				211	k3 *= c1;
				212	k3 = _rotl(k3,11);
				213	k3 *= c2;
				214	h3 ^= k3;
				215	h3 += h1;
				216
				217	k4 *= c2;
				218	k4 = _rotl(k4,11);
				219	k4 *= c1;
				220	h4 ^= k4;
				221	h4 += h1;
				222
				223	h3 = h3*3+0x52dce729;
				224	h4 = h4*3+0x38495ab5;
				225
				226	c1 = c1*5+0x7b7d159c;
				227	c2 = c2*5+0x6bce6396;
				228	}
				229
				230	//----------
				231
				232	void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out )
				233	{
				234	const uint8_t * data = (const uint8_t*)key;
				235	const int nblocks = len / 16;
				236
				237	uint32_t h1 = 0x8de1c3ac ^ seed;
				238	uint32_t h2 = 0xbab98226 ^ seed;
				239	uint32_t h3 = 0xfcba5b2d ^ seed;
				240	uint32_t h4 = 0x32452e3e ^ seed;
				241
				242	uint32_t c1 = 0x95543787;
				243	uint32_t c2 = 0x2ad7eb25;
				244
				245	//----------
				246	// body
				247
				248	const uint32_t * blocks = (const uint32_t *)(data);
				249
				250	for(int i = 0; i < nblocks; i++)
				251	{
				252	uint32_t k1 = getblock(blocks,i*4+0);
				253	uint32_t k2 = getblock(blocks,i*4+1);
				254	uint32_t k3 = getblock(blocks,i*4+2);
				255	uint32_t k4 = getblock(blocks,i*4+3);
				256
				257	bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
				258	}
				259
				260	//----------
				261	// tail
				262
				263	const uint8_t * tail = (const uint8_t)(data + nblocks16);
				264
				265	uint32_t k1 = 0;
				266	uint32_t k2 = 0;
				267	uint32_t k3 = 0;
				268	uint32_t k4 = 0;
				269
				270	switch(len & 15)
				271	{
				272	case 15: k4 ^= tail[14] << 16;
				273	case 14: k4 ^= tail[13] << 8;
				274	case 13: k4 ^= tail[12] << 0;
				275	case 12: k3 ^= tail[11] << 24;
				276	case 11: k3 ^= tail[10] << 16;
				277	case 10: k3 ^= tail[ 9] << 8;
				278	case 9: k3 ^= tail[ 8] << 0;
				279	case 8: k2 ^= tail[ 7] << 24;
				280	case 7: k2 ^= tail[ 6] << 16;
				281	case 6: k2 ^= tail[ 5] << 8;
				282	case 5: k2 ^= tail[ 4] << 0;
				283	case 4: k1 ^= tail[ 3] << 24;
				284	case 3: k1 ^= tail[ 2] << 16;
				285	case 2: k1 ^= tail[ 1] << 8;
				286	case 1: k1 ^= tail[ 0] << 0;
				287	bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
				288	};
				289
				290	//----------
				291	// finalization
				292
				293	h4 ^= len;
				294
				295	h1 += h2; h1 += h3; h1 += h4;
				296	h2 += h1; h3 += h1; h4 += h1;
				297
				298	h1 = fmix32(h1);
				299	h2 = fmix32(h2);
				300	h3 = fmix32(h3);
				301	h4 = fmix32(h4);
				302
				303	h1 += h2; h1 += h3; h1 += h4;
				304	h2 += h1; h3 += h1; h4 += h1;
				305
				306	((uint32_t*)out)[0] = h1;
				307	((uint32_t*)out)[1] = h2;
				308	((uint32_t*)out)[2] = h3;
				309	((uint32_t*)out)[3] = h4;
				310	}
				311
				312	//-----------------------------------------------------------------------------
				313	// Block read - if your platform needs to do endian-swapping or can only
				314	// handle aligned reads, do the conversion here
				315
				316	inline uint64_t getblock ( const uint64_t * p, int i )
				317	{
				318	return p[i];
				319	}
				320
				321	//----------
				322	// Block mix - combine the key bits with the hash bits and scramble everything
				323
				324	inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
				325	{
				326	k1 *= c1;
				327	k1 = _rotl64(k1,23);
				328	k1 *= c2;
				329	h1 ^= k1;
				330	h1 += h2;
				331
				332	h2 = _rotl64(h2,41);
				333
				334	k2 *= c2;
				335	k2 = _rotl64(k2,23);
				336	k2 *= c1;
				337	h2 ^= k2;
				338	h2 += h1;
				339
				340	h1 = h1*3+0x52dce729;
				341	h2 = h2*3+0x38495ab5;
				342
				343	c1 = c1*5+0x7b7d159c;
				344	c2 = c2*5+0x6bce6396;
				345	}
				346
				347	//----------
				348	// Finalization mix - avalanches all bits to within 0.05% bias
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	349
				350	inline uint64_t fmix64 ( uint64_t k )
				351	{
				352	k ^= k >> 33;
				353	k *= 0xff51afd7ed558ccd;
				354	k ^= k >> 33;
				355	k *= 0xc4ceb9fe1a85ec53;
				356	k ^= k >> 33;
				357
				358	return k;
				359	}
				360
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	361	//----------
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	362
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	363	void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	364	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	365	const uint8_t * data = (const uint8_t*)key;
				366	const int nblocks = len / 16;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	367
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	368	uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
				369	uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
				370
				371	uint64_t c1 = 0x87c37b91114253d5;
				372	uint64_t c2 = 0x4cf5ad432745937f;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	373
				374	//----------
				375	// body
				376
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	377	const uint64_t * blocks = (const uint64_t *)(data);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	378
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	379	for(int i = 0; i < nblocks; i++)
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	380	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	381	uint64_t k1 = getblock(blocks,i*2+0);
				382	uint64_t k2 = getblock(blocks,i*2+1);
				383
				384	bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	385	}
				386
				387	//----------
				388	// tail
				389
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	390	const uint8_t * tail = (const uint8_t)(data + nblocks16);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	391
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	392	uint64_t k1 = 0;
				393	uint64_t k2 = 0;
				394
				395	switch(len & 15)
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	396	{
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	397	case 15: k2 ^= uint64_t(tail[14]) << 48;
				398	case 14: k2 ^= uint64_t(tail[13]) << 40;
				399	case 13: k2 ^= uint64_t(tail[12]) << 32;
				400	case 12: k2 ^= uint64_t(tail[11]) << 24;
				401	case 11: k2 ^= uint64_t(tail[10]) << 16;
				402	case 10: k2 ^= uint64_t(tail[ 9]) << 8;
				403	case 9: k2 ^= uint64_t(tail[ 8]) << 0;
				404
				405	case 8: k1 ^= uint64_t(tail[ 7]) << 56;
				406	case 7: k1 ^= uint64_t(tail[ 6]) << 48;
				407	case 6: k1 ^= uint64_t(tail[ 5]) << 40;
				408	case 5: k1 ^= uint64_t(tail[ 4]) << 32;
				409	case 4: k1 ^= uint64_t(tail[ 3]) << 24;
				410	case 3: k1 ^= uint64_t(tail[ 2]) << 16;
				411	case 2: k1 ^= uint64_t(tail[ 1]) << 8;
				412	case 1: k1 ^= uint64_t(tail[ 0]) << 0;
				413	bmix64(h1,h2,k1,k2,c1,c2);
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	414	};
				415
				416	//----------
				417	// finalization
				418
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	419	h2 ^= len;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	420
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	421	h1 += h2;
				422	h2 += h1;
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	423
tanjent@gmail.com	ad4b363	2010-11-05 01:20:58 +0000	[diff] [blame^]	424	h1 = fmix64(h1);
				425	h2 = fmix64(h2);
				426
				427	h1 += h2;
				428	h2 += h1;
				429
				430	((uint64_t*)out)[0] = h1;
				431	((uint64_t*)out)[1] = h2;
				432	}
				433
				434	//-----------------------------------------------------------------------------
				435	// If we need a smaller hash value, it's faster to just use a portion of the
				436	// 128-bit hash
				437
				438	void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
				439	{
				440	uint32_t temp[4];
				441
				442	MurmurHash3_x64_128(key,len,seed,temp);
				443
				444	(uint32_t)out = temp[0];
				445	}
				446
				447	//----------
				448
				449	void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
				450	{
				451	uint64_t temp[2];
				452
				453	MurmurHash3_x64_128(key,len,seed,temp);
				454
				455	(uint64_t)out = temp[0];
tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame]	456	}
				457
				458	//-----------------------------------------------------------------------------