blob: 4c2336974ed610ab95cecb5e571b29e1366f733d [file] [log] [blame]
tanjent@gmail.comad4b3632010-11-05 01:20:58 +00001//-----------------------------------------------------------------------------
2// Flipping a single bit of a key should cause an "avalanche" of changes in
3// the hash function's output. Ideally, each output bits should flip 50% of
4// the time - if the probability of an output bit flipping is not 50%, that bit
5// is "biased". Too much bias means that patterns applied to the input will
6// cause "echoes" of the patterns in the output, which in turn can cause the
7// hash function to fail to create an even, random distribution of hash values.
8
9
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000010#pragma once
11
12#include "Types.h"
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000013#include "Random.h"
tanjent@gmail.com2aa29c32011-03-19 08:53:53 +000014
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000015#include <vector>
tanjent@gmail.com2aa29c32011-03-19 08:53:53 +000016#include <stdio.h>
17#include <math.h>
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000018
19// Avalanche fails if a bit is biased by more than 1%
20
21#define AVALANCHE_FAIL 0.01
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000022
23double maxBias ( std::vector<int> & counts, int reps );
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000024
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000025//-----------------------------------------------------------------------------
26
27template < typename keytype, typename hashtype >
aappleby@google.com7f20a312011-03-21 20:55:06 +000028void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000029{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000030 const int keybytes = sizeof(keytype);
31 const int hashbytes = sizeof(hashtype);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000032
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000033 const int keybits = keybytes * 8;
34 const int hashbits = hashbytes * 8;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000035
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000036 keytype K;
37 hashtype A,B;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000038
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000039 for(int irep = 0; irep < reps; irep++)
40 {
41 if(irep % (reps/10) == 0) printf(".");
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000042
aappleby@google.com7f20a312011-03-21 20:55:06 +000043 r.rand_p(&K,keybytes);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000044
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000045 hash(&K,keybytes,0,&A);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000046
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000047 int * cursor = &counts[0];
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000048
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000049 for(int iBit = 0; iBit < keybits; iBit++)
50 {
51 flipbit(&K,keybytes,iBit);
52 hash(&K,keybytes,0,&B);
53 flipbit(&K,keybytes,iBit);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000054
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000055 for(int iOut = 0; iOut < hashbits; iOut++)
56 {
57 int bitA = getbit(&A,hashbytes,iOut);
58 int bitB = getbit(&B,hashbytes,iOut);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000059
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000060 (*cursor++) += (bitA ^ bitB);
61 }
62 }
63 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000064}
65
66//-----------------------------------------------------------------------------
67
68template < typename keytype, typename hashtype >
69bool AvalancheTest ( pfHash hash, const int reps )
70{
aappleby@google.com7f20a312011-03-21 20:55:06 +000071 Rand r(48273);
72
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000073 const int keybytes = sizeof(keytype);
74 const int hashbytes = sizeof(hashtype);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000075
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000076 const int keybits = keybytes * 8;
77 const int hashbits = hashbytes * 8;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000078
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000079 printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000080
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000081 //----------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000082
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000083 std::vector<int> bins(keybits*hashbits,0);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000084
aappleby@google.com7f20a312011-03-21 20:55:06 +000085 calcBias<keytype,hashtype>(hash,bins,reps,r);
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000086
87 //----------
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000088
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000089 bool result = true;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000090
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000091 double b = maxBias(bins,reps);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000092
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000093 printf(" worst bias is %f%%",b * 100.0);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000094
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000095 if(b > AVALANCHE_FAIL)
96 {
97 printf(" !!!!! ");
98 result = false;
99 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000100
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000101 printf("\n");
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000102
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000103 return result;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000104}
105
106//----------------------------------------------------------------------------
107// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
108// not really all that useful.
109
110template< typename keytype, typename hashtype >
111void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
112{
aappleby@google.com7f20a312011-03-21 20:55:06 +0000113 Rand r(11938);
114
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000115 const int keybytes = sizeof(keytype);
116 const int hashbytes = sizeof(hashtype);
117 const int hashbits = hashbytes * 8;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000118
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000119 std::vector<int> bins(hashbits*hashbits*4,0);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000120
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000121 keytype key;
122 hashtype h1,h2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000123
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000124 for(int irep = 0; irep < reps; irep++)
125 {
126 if(verbose)
127 {
128 if(irep % (reps/10) == 0) printf(".");
129 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000130
aappleby@google.com7f20a312011-03-21 20:55:06 +0000131 r.rand_p(&key,keybytes);
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000132 hash(&key,keybytes,0,&h1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000133
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000134 flipbit(key,keybit);
135 hash(&key,keybytes,0,&h2);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000136
tanjent@gmail.com623590d2011-03-28 18:19:31 +0000137 hashtype d = h1 ^ h2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000138
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000139 for(int out1 = 0; out1 < hashbits; out1++)
140 for(int out2 = 0; out2 < hashbits; out2++)
141 {
142 if(out1 == out2) continue;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000143
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000144 uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000145
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000146 bins[(out1 * hashbits + out2) * 4 + b]++;
147 }
148 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000149
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000150 if(verbose) printf("\n");
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000151
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000152 maxBias = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000153
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000154 for(int out1 = 0; out1 < hashbits; out1++)
155 {
156 for(int out2 = 0; out2 < hashbits; out2++)
157 {
158 if(out1 == out2)
159 {
160 if(verbose) printf("\\");
161 continue;
162 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000163
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000164 double bias = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000165
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000166 for(int b = 0; b < 4; b++)
167 {
168 double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
169 b2 = fabs(b2 * 2 - 1);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000170
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000171 if(b2 > bias) bias = b2;
172 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000173
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000174 if(bias > maxBias)
175 {
176 maxBias = bias;
177 maxA = out1;
178 maxB = out2;
179 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000180
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000181 if(verbose)
182 {
183 if (bias < 0.01) printf(".");
184 else if(bias < 0.05) printf("o");
185 else if(bias < 0.33) printf("O");
186 else printf("X");
187 }
188 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000189
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000190 if(verbose) printf("\n");
191 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000192}
193
194//----------
195
196template< typename keytype, typename hashtype >
197bool BicTest ( pfHash hash, const int reps )
198{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000199 const int keybytes = sizeof(keytype);
200 const int keybits = keybytes * 8;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000201
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000202 double maxBias = 0;
203 int maxK = 0;
204 int maxA = 0;
205 int maxB = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000206
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000207 for(int i = 0; i < keybits; i++)
208 {
209 if(i % (keybits/10) == 0) printf(".");
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000210
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000211 double bias;
212 int a,b;
213
tanjent@gmail.com623590d2011-03-28 18:19:31 +0000214 BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000215
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000216 if(bias > maxBias)
217 {
218 maxBias = bias;
219 maxK = i;
220 maxA = a;
221 maxB = b;
222 }
223 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000224
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000225 printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000226
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000227 // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000228
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000229 bool result = (maxBias < 0.05);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000230
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000231 return result;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000232}
233
234//-----------------------------------------------------------------------------
tanjent@gmail.com623590d2011-03-28 18:19:31 +0000235// BIC test variant - store all intermediate data in a table, draw diagram
236// afterwards (much faster)
237
238template< typename keytype, typename hashtype >
239void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
240{
241 const int keybytes = sizeof(keytype);
242 const int keybits = keybytes * 8;
243 const int hashbytes = sizeof(hashtype);
244 const int hashbits = hashbytes * 8;
245 const int pagesize = hashbits*hashbits*4;
246
247 Rand r(11938);
248
249 double maxBias = 0;
250 int maxK = 0;
251 int maxA = 0;
252 int maxB = 0;
253
254 keytype key;
255 hashtype h1,h2;
256
257 std::vector<int> bins(keybits*pagesize,0);
258
259 for(int keybit = 0; keybit < keybits; keybit++)
260 {
261 if(keybit % (keybits/10) == 0) printf(".");
262
263 int * page = &bins[keybit*pagesize];
264
265 for(int irep = 0; irep < reps; irep++)
266 {
267 r.rand_p(&key,keybytes);
268 hash(&key,keybytes,0,&h1);
269 flipbit(key,keybit);
270 hash(&key,keybytes,0,&h2);
271
272 hashtype d = h1 ^ h2;
273
274 for(int out1 = 0; out1 < hashbits-1; out1++)
275 for(int out2 = out1+1; out2 < hashbits; out2++)
276 {
277 int * b = &page[(out1*hashbits+out2)*4];
278
279 uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
280
281 b[x]++;
282 }
283 }
284 }
285
286 printf("\n");
287
288 for(int out1 = 0; out1 < hashbits-1; out1++)
289 {
290 for(int out2 = out1+1; out2 < hashbits; out2++)
291 {
292 if(verbose) printf("(%3d,%3d) - ",out1,out2);
293
294 for(int keybit = 0; keybit < keybits; keybit++)
295 {
296 int * page = &bins[keybit*pagesize];
297 int * bins = &page[(out1*hashbits+out2)*4];
298
299 double bias = 0;
300
301 for(int b = 0; b < 4; b++)
302 {
303 double b2 = double(bins[b]) / double(reps / 2);
304 b2 = fabs(b2 * 2 - 1);
305
306 if(b2 > bias) bias = b2;
307 }
308
309 if(bias > maxBias)
310 {
311 maxBias = bias;
312 maxK = keybit;
313 maxA = out1;
314 maxB = out2;
315 }
316
317 if(verbose)
318 {
319 if (bias < 0.01) printf(".");
320 else if(bias < 0.05) printf("o");
321 else if(bias < 0.33) printf("O");
322 else printf("X");
323 }
324 }
325
326 // Finished keybit
327
328 if(verbose) printf("\n");
329 }
330
331 if(verbose)
332 {
333 for(int i = 0; i < keybits+12; i++) printf("-");
334 printf("\n");
335 }
336 }
337
338 printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
339}
340
341
342//-----------------------------------------------------------------------------
343// BIC test variant - iterate over output bits, then key bits. No temp storage,
344// but slooooow
345
346template< typename keytype, typename hashtype >
347void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
348{
349 const int keybytes = sizeof(keytype);
350 const int keybits = keybytes * 8;
351 const int hashbytes = sizeof(hashtype);
352 const int hashbits = hashbytes * 8;
353
354 Rand r(11938);
355
356 double maxBias = 0;
357 int maxK = 0;
358 int maxA = 0;
359 int maxB = 0;
360
361 keytype key;
362 hashtype h1,h2;
363
364 for(int out1 = 0; out1 < hashbits-1; out1++)
365 for(int out2 = out1+1; out2 < hashbits; out2++)
366 {
367 if(verbose) printf("(%3d,%3d) - ",out1,out2);
368
369 for(int keybit = 0; keybit < keybits; keybit++)
370 {
371 int bins[4] = { 0, 0, 0, 0 };
372
373 for(int irep = 0; irep < reps; irep++)
374 {
375 r.rand_p(&key,keybytes);
376 hash(&key,keybytes,0,&h1);
377 flipbit(key,keybit);
378 hash(&key,keybytes,0,&h2);
379
380 hashtype d = h1 ^ h2;
381
382 uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
383
384 bins[b]++;
385 }
386
387 double bias = 0;
388
389 for(int b = 0; b < 4; b++)
390 {
391 double b2 = double(bins[b]) / double(reps / 2);
392 b2 = fabs(b2 * 2 - 1);
393
394 if(b2 > bias) bias = b2;
395 }
396
397 if(bias > maxBias)
398 {
399 maxBias = bias;
400 maxK = keybit;
401 maxA = out1;
402 maxB = out2;
403 }
404
405 if(verbose)
406 {
407 if (bias < 0.05) printf(".");
408 else if(bias < 0.10) printf("o");
409 else if(bias < 0.50) printf("O");
410 else printf("X");
411 }
412 }
413
414 // Finished keybit
415
416 if(verbose) printf("\n");
417 }
418
419 printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
420}
421
422//-----------------------------------------------------------------------------