Blame - AvalancheTest.cpp - chromium.googlesource.com/external/smhasher

blob: 25ee86cefb2f5c37fcbcd3973eac7e2cc37300cf [file] [log] [blame]

tanjent@gmail.com	7e5c363	2010-11-02 00:50:04 +0000	[diff] [blame^]	1	//-----------------------------------------------------------------------------
				2	// Flipping a single bit of a key should cause an "avalanche" of changes in
				3	// the hash function's output. Ideally, each output bits should flip 50% of
				4	// the time - if the probability of an output bit flipping is not 50%, that bit
				5	// is "biased". Too much bias means that patterns applied to the input will
				6	// cause "echoes" of the patterns in the output, which in turn can cause the
				7	// hash function to fail to create an even, random distribution of hash values.
				8
				9	#include "AvalancheTest.h"
				10
				11	#include "Bitvec.h"
				12	#include "Random.h"
				13
				14	#include <math.h>
				15
				16	// Avalanche fails if a bit is biased by more than 1%
				17
				18	double gc_avalancheFail = 0.01;
				19
				20	//-----------------------------------------------------------------------------
				21
				22	void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
				23	{
				24	const char * symbols = ".123456789X";
				25
				26	for(int i = 0; i < y; i++)
				27	{
				28	printf("[");
				29	for(int j = 0; j < x; j++)
				30	{
				31	int k = (y - i) -1;
				32
				33	int bin = bins[k + (j*y)];
				34
				35	double b = double(bin) / double(reps);
				36	b = fabs(b*2 - 1);
				37
				38	b *= scale;
				39
				40	int s = (int)floor(b*10);
				41
				42	if(s > 10) s = 10;
				43	if(s < 0) s = 0;
				44
				45	printf("%c",symbols[s]);
				46	}
				47
				48	printf("]\n");
				49	}
				50	}
				51
				52	//----------------------------------------------------------------------------
				53
				54	double maxBias ( std::vector<int> & counts, int reps )
				55	{
				56	double worst = 0;
				57
				58	for(int i = 0; i < (int)counts.size(); i++)
				59	{
				60	double c = double(counts[i]) / double(reps);
				61
				62	double d = fabs(c * 2 - 1);
				63
				64	if(d > worst)
				65	{
				66	worst = d;
				67	}
				68	}
				69
				70	return worst;
				71	}
				72
				73	double rmsBias ( std::vector<int> & counts, int reps )
				74	{
				75	double rms = 0;
				76
				77	for(int i = 0; i < (int)counts.size(); i++)
				78	{
				79	double d = double(counts[i]) / reps;
				80
				81	d = fabs(d * 2 - 1);
				82
				83	rms += d*d;
				84	}
				85
				86	rms /= counts.size();
				87	rms = sqrt(rms);
				88
				89	return rms;
				90	}
				91
				92	//-----------------------------------------------------------------------------
				93
				94	void calcBias ( pfHash hash, const int nbitsIn, const int nbitsOut, std::vector<int> & counts, int reps )
				95	{
				96	const int nbytesIn = nbitsIn / 8;
				97	const int nbytesOut = nbitsOut / 8;
				98
				99	uint8_t * K = new uint8_t[nbytesIn];
				100	uint8_t * A = new uint8_t[nbytesIn];
				101	uint8_t * B = new uint8_t[nbytesIn];
				102
				103	Rand r(378473);
				104
				105	for(int irep = 0; irep < reps; irep++)
				106	{
				107	r.rand_p(K,nbytesIn);
				108
				109	hash(K,nbytesIn,0,A);
				110
				111	int * cursor = &counts[0];
				112
				113	for(int iBit = 0; iBit < nbitsIn; iBit++)
				114	{
				115	flipbit(K,nbytesIn,iBit);
				116	hash(K,nbytesIn,0,B);
				117	flipbit(K,nbytesIn,iBit);
				118
				119	for(int iOut = 0; iOut < nbitsOut; iOut++)
				120	{
				121	int bitA = getbit(A,nbytesOut,iOut);
				122	int bitB = getbit(B,nbytesOut,iOut);
				123
				124	(*cursor++) += (bitA ^ bitB);
				125	}
				126	}
				127	}
				128
				129	delete [] K;
				130	delete [] A;
				131	delete [] B;
				132	}
				133
				134	//-----------------------------------------------------------------------------
				135
				136	bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps )
				137	{
				138	printf("Avalanche for %3d-bit keys -> %3d-bit hashes, %8d reps - ",keybits,hashbits,reps);
				139
				140	std::vector<int> bins(keybits*hashbits,0);
				141
				142	calcBias(hash,keybits,hashbits,bins,reps);
				143
				144	double b = maxBias(bins,reps);
				145
				146	printf("Max avalanche bias is %f\n",b);
				147
				148	if(b > gc_avalancheFail)
				149	{
				150	return false;
				151	}
				152	else
				153	{
				154	return true;
				155	}
				156	}
				157
				158	//----------------------------------------------------------------------------
				159	// Computing whether a given mix function produces a low bias can take many
				160	// millions of tests when the bias is low. This code tries to speed up the
				161	// process by early-outing if the probability that the bias will fall outside
				162	// the given range is over 99%
				163
				164	/*
				165	bool testMixAvalanche32_Fast ( pfMix32 mix, double cutmin, double cutmax, bool winlose )
				166	{
				167	int counts[32*32];
				168
				169	memset(counts,0,sizeof(counts));
				170
				171	double pmin = 0;
				172	double pmax = 0;
				173	double n = 0;
				174	double s = 4.75;
				175	int w = 0;
				176
				177	int batchsize = 512;
				178
				179	for(int iBatch = 0; iBatch < 1024 * 1024; iBatch++)
				180	{
				181	calcMixBias<uint32_t>(mix,counts,batchsize);
				182
				183	n = (iBatch+1) * batchsize;
				184	w = maxIntBias(32,32,counts,(int)n);
				185
				186	// compute p such that w is at the bottom of the confidence interval
				187
				188	double a = ssn + n*n;
				189	double b = -2.0double(w)n - ssn;
				190	double c = double(w)*double(w);
				191
				192	SolveQuadratic(a,b,c,pmin,pmax);
				193
				194	double win = 0;
				195	double tie = 0;
				196	double lose = 0;
				197
				198	if(winlose)
				199	{
				200	if(pmax < cutmax)
				201	{
				202	printf("\n+!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
				203	return true;
				204	}
				205
				206	if(pmin > cutmax)
				207	{
				208	//printf("\n-!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
				209	return false;
				210	}
				211
				212	// doesn't fail or win outright. does it have a chance of winning?
				213
				214	if(pmin < cutmin)
				215	{
				216	// pmin:pmax contains cutmin:cutmax
				217
				218	assert(cutmin > pmin);
				219	assert(cutmax < pmax);
				220
				221	win = (cutmin-pmin) / (pmax-pmin);
				222	tie = (cutmax-cutmin) / (pmax-pmin);
				223	lose = (pmax-cutmax) / (pmax-pmin);
				224	}
				225	else
				226	{
				227	// pmin:pmax overlaps above cutmin:cutmax
				228
				229	assert(cutmin < pmin);
				230
				231	win = 0;
				232	tie = ((cutmax - pmin) / (pmax-pmin)) * ((cutmax-pmin) / (cutmax-cutmin));
				233	lose = (pmax-cutmax) / (pmax-pmin);
				234
				235	return false;
				236	}
				237
				238	double frac = win + tie*0.5;
				239
				240	if((pmax-pmin)/(cutmax-cutmin) < 5)
				241	{
				242	if(frac < 0.20)
				243	{
				244	// 99% chance of loss
				245	//printf("\n--- %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
				246	return false;
				247	}
				248
				249	if(frac > 0.80)
				250	{
				251	// 99% chance of win
				252	printf("\n+++ %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
				253	return true;
				254	}
				255	}
				256	}
				257
				258	if(!winlose && (n > 0) && ((int)n % (128 * 1024) == 0))
				259	{
				260	printf("%f - %f : %f - %d - %f : %f : %f\n",double(w)/n,pmin,pmax,int(n),win,tie,lose);
				261	}
				262
				263	}
				264
				265	// We failed to determine whether this mix function passes or fails
				266
				267	printf("\n??? %f - %f : %f",double(w)/n,pmin,pmax);
				268
				269	return true;
				270	}
				271	*/
				272
				273	//-----------------------------------------------------------------------------