Blame - MurmurHash2.cpp - chromium.googlesource.com/external/smhasher

blob: 349ed8eac2ca461b4037d77eb9e9be9706f42bff [file] [log] [blame]

tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame^]	1	#include "MurmurHash2.h"
				2
				3	//-----------------------------------------------------------------------------
				4	// MurmurHash2, by Austin Appleby
				5
				6	// Note - This code makes a few assumptions about how your machine behaves -
				7
				8	// 1. We can read a 4-byte value from any address without crashing
				9	// 2. sizeof(int) == 4
				10
				11	// And it has a few limitations -
				12
				13	// 1. It will not work incrementally.
				14	// 2. It will not produce the same results on little-endian and big-endian
				15	// machines.
				16
				17	uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
				18	{
				19	// 'm' and 'r' are mixing constants generated offline.
				20	// They're not really 'magic', they just happen to work well.
				21
				22	const uint32_t m = 0x5bd1e995;
				23	const int r = 24;
				24
				25	// Initialize the hash to a 'random' value
				26
				27	uint32_t h = seed ^ len;
				28
				29	// Mix 4 bytes at a time into the hash
				30
				31	const unsigned char * data = (const unsigned char *)key;
				32
				33	while(len >= 4)
				34	{
				35	uint32_t k = (uint32_t)data;
				36
				37	k *= m;
				38	k ^= k >> r;
				39	k *= m;
				40
				41	h *= m;
				42	h ^= k;
				43
				44	data += 4;
				45	len -= 4;
				46	}
				47
				48	// Handle the last few bytes of the input array
				49
				50	switch(len)
				51	{
				52	case 3: h ^= data[2] << 16;
				53	case 2: h ^= data[1] << 8;
				54	case 1: h ^= data[0];
				55	h *= m;
				56	};
				57
				58	// Do a few final mixes of the hash to ensure the last few
				59	// bytes are well-incorporated.
				60
				61	h ^= h >> 13;
				62	h *= m;
				63	h ^= h >> 15;
				64
				65	return h;
				66	}
				67
				68	//-----------------------------------------------------------------------------
				69	// MurmurHash2, 64-bit versions, by Austin Appleby
				70
				71	// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
				72	// and endian-ness issues if used across multiple platforms.
				73
				74	// 64-bit hash for 64-bit platforms
				75
				76	uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
				77	{
				78	const uint64_t m = 0xc6a4a7935bd1e995;
				79	const int r = 47;
				80
				81	uint64_t h = seed ^ (len * m);
				82
				83	const uint64_t * data = (const uint64_t *)key;
				84	const uint64_t * end = data + (len/8);
				85
				86	while(data != end)
				87	{
				88	uint64_t k = *data++;
				89
				90	k *= m;
				91	k ^= k >> r;
				92	k *= m;
				93
				94	h ^= k;
				95	h *= m;
				96	}
				97
				98	const unsigned char * data2 = (const unsigned char*)data;
				99
				100	switch(len & 7)
				101	{
				102	case 7: h ^= uint64_t(data2[6]) << 48;
				103	case 6: h ^= uint64_t(data2[5]) << 40;
				104	case 5: h ^= uint64_t(data2[4]) << 32;
				105	case 4: h ^= uint64_t(data2[3]) << 24;
				106	case 3: h ^= uint64_t(data2[2]) << 16;
				107	case 2: h ^= uint64_t(data2[1]) << 8;
				108	case 1: h ^= uint64_t(data2[0]);
				109	h *= m;
				110	};
				111
				112	h ^= h >> r;
				113	h *= m;
				114	h ^= h >> r;
				115
				116	return h;
				117	}
				118
				119
				120	// 64-bit hash for 32-bit platforms
				121
				122	uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
				123	{
				124	const uint32_t m = 0x5bd1e995;
				125	const int r = 24;
				126
				127	uint32_t h1 = uint32_t(seed) ^ len;
				128	uint32_t h2 = uint32_t(seed >> 32);
				129
				130	const uint32_t * data = (const uint32_t *)key;
				131
				132	while(len >= 8)
				133	{
				134	uint32_t k1 = *data++;
				135	k1 = m; k1 ^= k1 >> r; k1 = m;
				136	h1 *= m; h1 ^= k1;
				137	len -= 4;
				138
				139	uint32_t k2 = *data++;
				140	k2 = m; k2 ^= k2 >> r; k2 = m;
				141	h2 *= m; h2 ^= k2;
				142	len -= 4;
				143	}
				144
				145	if(len >= 4)
				146	{
				147	uint32_t k1 = *data++;
				148	k1 = m; k1 ^= k1 >> r; k1 = m;
				149	h1 *= m; h1 ^= k1;
				150	len -= 4;
				151	}
				152
				153	switch(len)
				154	{
				155	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
				156	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
				157	case 1: h2 ^= ((unsigned char*)data)[0];
				158	h2 *= m;
				159	};
				160
				161	h1 ^= h2 >> 18; h1 *= m;
				162	h2 ^= h1 >> 22; h2 *= m;
				163	h1 ^= h2 >> 17; h1 *= m;
				164	h2 ^= h1 >> 19; h2 *= m;
				165
				166	uint64_t h = h1;
				167
				168	h = (h << 32) \| h2;
				169
				170	return h;
				171	}
				172
				173	//-----------------------------------------------------------------------------
				174	// MurmurHash2A, by Austin Appleby
				175
				176	// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
				177	// construction. Bulk speed should be identical to Murmur2, small-key speed
				178	// will be 10%-20% slower due to the added overhead at the end of the hash.
				179
				180	// This variant fixes a minor issue where null keys were more likely to
				181	// collide with each other than expected, and also makes the function
				182	// more amenable to incremental implementations.
				183
				184	#define mmix(h,k) { k = m; k ^= k >> r; k = m; h *= m; h ^= k; }
				185
				186	uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
				187	{
				188	const uint32_t m = 0x5bd1e995;
				189	const int r = 24;
				190	uint32_t l = len;
				191
				192	const unsigned char * data = (const unsigned char *)key;
				193
				194	uint32_t h = seed;
				195
				196	while(len >= 4)
				197	{
				198	uint32_t k = (uint32_t)data;
				199
				200	mmix(h,k);
				201
				202	data += 4;
				203	len -= 4;
				204	}
				205
				206	uint32_t t = 0;
				207
				208	switch(len)
				209	{
				210	case 3: t ^= data[2] << 16;
				211	case 2: t ^= data[1] << 8;
				212	case 1: t ^= data[0];
				213	};
				214
				215	mmix(h,t);
				216	mmix(h,l);
				217
				218	h ^= h >> 13;
				219	h *= m;
				220	h ^= h >> 15;
				221
				222	return h;
				223	}
				224
				225	//-----------------------------------------------------------------------------
				226	// CMurmurHash2A, by Austin Appleby
				227
				228	// This is a sample implementation of MurmurHash2A designed to work
				229	// incrementally.
				230
				231	// Usage -
				232
				233	// CMurmurHash2A hasher
				234	// hasher.Begin(seed);
				235	// hasher.Add(data1,size1);
				236	// hasher.Add(data2,size2);
				237	// ...
				238	// hasher.Add(dataN,sizeN);
				239	// uint32_t hash = hasher.End()
				240
				241	class CMurmurHash2A
				242	{
				243	public:
				244
				245	void Begin ( uint32_t seed = 0 )
				246	{
				247	m_hash = seed;
				248	m_tail = 0;
				249	m_count = 0;
				250	m_size = 0;
				251	}
				252
				253	void Add ( const unsigned char * data, int len )
				254	{
				255	m_size += len;
				256
				257	MixTail(data,len);
				258
				259	while(len >= 4)
				260	{
				261	uint32_t k = (uint32_t)data;
				262
				263	mmix(m_hash,k);
				264
				265	data += 4;
				266	len -= 4;
				267	}
				268
				269	MixTail(data,len);
				270	}
				271
				272	uint32_t End ( void )
				273	{
				274	mmix(m_hash,m_tail);
				275	mmix(m_hash,m_size);
				276
				277	m_hash ^= m_hash >> 13;
				278	m_hash *= m;
				279	m_hash ^= m_hash >> 15;
				280
				281	return m_hash;
				282	}
				283
				284	private:
				285
				286	static const uint32_t m = 0x5bd1e995;
				287	static const int r = 24;
				288
				289	void MixTail ( const unsigned char * & data, int & len )
				290	{
				291	while( len && ((len<4) \|\| m_count) )
				292	{
				293	m_tail \|= (data++) << (m_count 8);
				294
				295	m_count++;
				296	len--;
				297
				298	if(m_count == 4)
				299	{
				300	mmix(m_hash,m_tail);
				301	m_tail = 0;
				302	m_count = 0;
				303	}
				304	}
				305	}
				306
				307	uint32_t m_hash;
				308	uint32_t m_tail;
				309	uint32_t m_count;
				310	uint32_t m_size;
				311	};
				312
				313	//-----------------------------------------------------------------------------
				314	// MurmurHashNeutral2, by Austin Appleby
				315
				316	// Same as MurmurHash2, but endian- and alignment-neutral.
				317	// Half the speed though, alas.
				318
				319	uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
				320	{
				321	const uint32_t m = 0x5bd1e995;
				322	const int r = 24;
				323
				324	uint32_t h = seed ^ len;
				325
				326	const unsigned char * data = (const unsigned char *)key;
				327
				328	while(len >= 4)
				329	{
				330	uint32_t k;
				331
				332	k = data[0];
				333	k \|= data[1] << 8;
				334	k \|= data[2] << 16;
				335	k \|= data[3] << 24;
				336
				337	k *= m;
				338	k ^= k >> r;
				339	k *= m;
				340
				341	h *= m;
				342	h ^= k;
				343
				344	data += 4;
				345	len -= 4;
				346	}
				347
				348	switch(len)
				349	{
				350	case 3: h ^= data[2] << 16;
				351	case 2: h ^= data[1] << 8;
				352	case 1: h ^= data[0];
				353	h *= m;
				354	};
				355
				356	h ^= h >> 13;
				357	h *= m;
				358	h ^= h >> 15;
				359
				360	return h;
				361	}
				362
				363	//-----------------------------------------------------------------------------
				364	// MurmurHashAligned2, by Austin Appleby
				365
				366	// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
				367	// on certain platforms.
				368
				369	// Performance will be lower than MurmurHash2
				370
				371	#define MIX(h,k,m) { k = m; k ^= k >> r; k = m; h *= m; h ^= k; }
				372
				373	uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
				374	{
				375	const uint32_t m = 0x5bd1e995;
				376	const int r = 24;
				377
				378	const unsigned char * data = (const unsigned char *)key;
				379
				380	uint32_t h = seed ^ len;
				381
				382	int align = (int)data & 3;
				383
				384	if(align && (len >= 4))
				385	{
				386	// Pre-load the temp registers
				387
				388	uint32_t t = 0, d = 0;
				389
				390	switch(align)
				391	{
				392	case 1: t \|= data[2] << 16;
				393	case 2: t \|= data[1] << 8;
				394	case 3: t \|= data[0];
				395	}
				396
				397	t <<= (8 * align);
				398
				399	data += 4-align;
				400	len -= 4-align;
				401
				402	int sl = 8 * (4-align);
				403	int sr = 8 * align;
				404
				405	// Mix
				406
				407	while(len >= 4)
				408	{
				409	d = (uint32_t )data;
				410	t = (t >> sr) \| (d << sl);
				411
				412	uint32_t k = t;
				413
				414	MIX(h,k,m);
				415
				416	t = d;
				417
				418	data += 4;
				419	len -= 4;
				420	}
				421
				422	// Handle leftover data in temp registers
				423
				424	d = 0;
				425
				426	if(len >= align)
				427	{
				428	switch(align)
				429	{
				430	case 3: d \|= data[2] << 16;
				431	case 2: d \|= data[1] << 8;
				432	case 1: d \|= data[0];
				433	}
				434
				435	uint32_t k = (t >> sr) \| (d << sl);
				436	MIX(h,k,m);
				437
				438	data += align;
				439	len -= align;
				440
				441	//----------
				442	// Handle tail bytes
				443
				444	switch(len)
				445	{
				446	case 3: h ^= data[2] << 16;
				447	case 2: h ^= data[1] << 8;
				448	case 1: h ^= data[0];
				449	h *= m;
				450	};
				451	}
				452	else
				453	{
				454	switch(len)
				455	{
				456	case 3: d \|= data[2] << 16;
				457	case 2: d \|= data[1] << 8;
				458	case 1: d \|= data[0];
				459	case 0: h ^= (t >> sr) \| (d << sl);
				460	h *= m;
				461	}
				462	}
				463
				464	h ^= h >> 13;
				465	h *= m;
				466	h ^= h >> 15;
				467
				468	return h;
				469	}
				470	else
				471	{
				472	while(len >= 4)
				473	{
				474	uint32_t k = (uint32_t )data;
				475
				476	MIX(h,k,m);
				477
				478	data += 4;
				479	len -= 4;
				480	}
				481
				482	//----------
				483	// Handle tail bytes
				484
				485	switch(len)
				486	{
				487	case 3: h ^= data[2] << 16;
				488	case 2: h ^= data[1] << 8;
				489	case 1: h ^= data[0];
				490	h *= m;
				491	};
				492
				493	h ^= h >> 13;
				494	h *= m;
				495	h ^= h >> 15;
				496
				497	return h;
				498	}
				499	}
				500
				501	//-----------------------------------------------------------------------------
				502