blob: 25ee86cefb2f5c37fcbcd3973eac7e2cc37300cf [file] [log] [blame]
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00001//-----------------------------------------------------------------------------
2// Flipping a single bit of a key should cause an "avalanche" of changes in
3// the hash function's output. Ideally, each output bits should flip 50% of
4// the time - if the probability of an output bit flipping is not 50%, that bit
5// is "biased". Too much bias means that patterns applied to the input will
6// cause "echoes" of the patterns in the output, which in turn can cause the
7// hash function to fail to create an even, random distribution of hash values.
8
9#include "AvalancheTest.h"
10
11#include "Bitvec.h"
12#include "Random.h"
13
14#include <math.h>
15
16// Avalanche fails if a bit is biased by more than 1%
17
18double gc_avalancheFail = 0.01;
19
20//-----------------------------------------------------------------------------
21
22void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
23{
24 const char * symbols = ".123456789X";
25
26 for(int i = 0; i < y; i++)
27 {
28 printf("[");
29 for(int j = 0; j < x; j++)
30 {
31 int k = (y - i) -1;
32
33 int bin = bins[k + (j*y)];
34
35 double b = double(bin) / double(reps);
36 b = fabs(b*2 - 1);
37
38 b *= scale;
39
40 int s = (int)floor(b*10);
41
42 if(s > 10) s = 10;
43 if(s < 0) s = 0;
44
45 printf("%c",symbols[s]);
46 }
47
48 printf("]\n");
49 }
50}
51
52//----------------------------------------------------------------------------
53
54double maxBias ( std::vector<int> & counts, int reps )
55{
56 double worst = 0;
57
58 for(int i = 0; i < (int)counts.size(); i++)
59 {
60 double c = double(counts[i]) / double(reps);
61
62 double d = fabs(c * 2 - 1);
63
64 if(d > worst)
65 {
66 worst = d;
67 }
68 }
69
70 return worst;
71}
72
73double rmsBias ( std::vector<int> & counts, int reps )
74{
75 double rms = 0;
76
77 for(int i = 0; i < (int)counts.size(); i++)
78 {
79 double d = double(counts[i]) / reps;
80
81 d = fabs(d * 2 - 1);
82
83 rms += d*d;
84 }
85
86 rms /= counts.size();
87 rms = sqrt(rms);
88
89 return rms;
90}
91
92//-----------------------------------------------------------------------------
93
94void calcBias ( pfHash hash, const int nbitsIn, const int nbitsOut, std::vector<int> & counts, int reps )
95{
96 const int nbytesIn = nbitsIn / 8;
97 const int nbytesOut = nbitsOut / 8;
98
99 uint8_t * K = new uint8_t[nbytesIn];
100 uint8_t * A = new uint8_t[nbytesIn];
101 uint8_t * B = new uint8_t[nbytesIn];
102
103 Rand r(378473);
104
105 for(int irep = 0; irep < reps; irep++)
106 {
107 r.rand_p(K,nbytesIn);
108
109 hash(K,nbytesIn,0,A);
110
111 int * cursor = &counts[0];
112
113 for(int iBit = 0; iBit < nbitsIn; iBit++)
114 {
115 flipbit(K,nbytesIn,iBit);
116 hash(K,nbytesIn,0,B);
117 flipbit(K,nbytesIn,iBit);
118
119 for(int iOut = 0; iOut < nbitsOut; iOut++)
120 {
121 int bitA = getbit(A,nbytesOut,iOut);
122 int bitB = getbit(B,nbytesOut,iOut);
123
124 (*cursor++) += (bitA ^ bitB);
125 }
126 }
127 }
128
129 delete [] K;
130 delete [] A;
131 delete [] B;
132}
133
134//-----------------------------------------------------------------------------
135
136bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps )
137{
138 printf("Avalanche for %3d-bit keys -> %3d-bit hashes, %8d reps - ",keybits,hashbits,reps);
139
140 std::vector<int> bins(keybits*hashbits,0);
141
142 calcBias(hash,keybits,hashbits,bins,reps);
143
144 double b = maxBias(bins,reps);
145
146 printf("Max avalanche bias is %f\n",b);
147
148 if(b > gc_avalancheFail)
149 {
150 return false;
151 }
152 else
153 {
154 return true;
155 }
156}
157
158//----------------------------------------------------------------------------
159// Computing whether a given mix function produces a low bias can take many
160// millions of tests when the bias is low. This code tries to speed up the
161// process by early-outing if the probability that the bias will fall outside
162// the given range is over 99%
163
164/*
165bool testMixAvalanche32_Fast ( pfMix32 mix, double cutmin, double cutmax, bool winlose )
166{
167 int counts[32*32];
168
169 memset(counts,0,sizeof(counts));
170
171 double pmin = 0;
172 double pmax = 0;
173 double n = 0;
174 double s = 4.75;
175 int w = 0;
176
177 int batchsize = 512;
178
179 for(int iBatch = 0; iBatch < 1024 * 1024; iBatch++)
180 {
181 calcMixBias<uint32_t>(mix,counts,batchsize);
182
183 n = (iBatch+1) * batchsize;
184 w = maxIntBias(32,32,counts,(int)n);
185
186 // compute p such that w is at the bottom of the confidence interval
187
188 double a = s*s*n + n*n;
189 double b = -2.0*double(w)*n - s*s*n;
190 double c = double(w)*double(w);
191
192 SolveQuadratic(a,b,c,pmin,pmax);
193
194 double win = 0;
195 double tie = 0;
196 double lose = 0;
197
198 if(winlose)
199 {
200 if(pmax < cutmax)
201 {
202 printf("\n+!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
203 return true;
204 }
205
206 if(pmin > cutmax)
207 {
208 //printf("\n-!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
209 return false;
210 }
211
212 // doesn't fail or win outright. does it have a chance of winning?
213
214 if(pmin < cutmin)
215 {
216 // pmin:pmax contains cutmin:cutmax
217
218 assert(cutmin > pmin);
219 assert(cutmax < pmax);
220
221 win = (cutmin-pmin) / (pmax-pmin);
222 tie = (cutmax-cutmin) / (pmax-pmin);
223 lose = (pmax-cutmax) / (pmax-pmin);
224 }
225 else
226 {
227 // pmin:pmax overlaps above cutmin:cutmax
228
229 assert(cutmin < pmin);
230
231 win = 0;
232 tie = ((cutmax - pmin) / (pmax-pmin)) * ((cutmax-pmin) / (cutmax-cutmin));
233 lose = (pmax-cutmax) / (pmax-pmin);
234
235 return false;
236 }
237
238 double frac = win + tie*0.5;
239
240 if((pmax-pmin)/(cutmax-cutmin) < 5)
241 {
242 if(frac < 0.20)
243 {
244 // 99% chance of loss
245 //printf("\n--- %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
246 return false;
247 }
248
249 if(frac > 0.80)
250 {
251 // 99% chance of win
252 printf("\n+++ %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
253 return true;
254 }
255 }
256 }
257
258 if(!winlose && (n > 0) && ((int)n % (128 * 1024) == 0))
259 {
260 printf("%f - %f : %f - %d - %f : %f : %f\n",double(w)/n,pmin,pmax,int(n),win,tie,lose);
261 }
262
263 }
264
265 // We failed to determine whether this mix function passes or fails
266
267 printf("\n??? %f - %f : %f",double(w)/n,pmin,pmax);
268
269 return true;
270}
271*/
272
273//-----------------------------------------------------------------------------