blob: 3136cbb219fb20fe56fc957f0770b96000a3a483 [file] [log] [blame]
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00001//-----------------------------------------------------------------------------
2// Differential collision & distribution tests - generate a bunch of random keys,
3// see what happens to the hash value when we flip a few bits of the key.
4
5#pragma once
tanjent@gmail.comad4b3632010-11-05 01:20:58 +00006
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +00007#include "Types.h"
tanjent@gmail.com2aa29c32011-03-19 08:53:53 +00008#include "Stats.h" // for chooseUpToK
9#include "KeysetTest.h" // for SparseKeygenRecurse
tanjent@gmail.com3ee45612011-03-21 19:33:01 +000010#include "Random.h"
tanjent@gmail.com2aa29c32011-03-19 08:53:53 +000011
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000012#include <vector>
tanjent@gmail.com2aa29c32011-03-19 08:53:53 +000013#include <algorithm>
14#include <stdio.h>
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000015
16//-----------------------------------------------------------------------------
17// Sort through the differentials, ignoring collisions that only occured once
18// (these could be false positives). If we find collisions of 3 or more, the
19// differential test fails.
20
21template < class keytype >
22bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
23{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000024 std::sort(diffs.begin(), diffs.end());
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000025
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000026 int count = 1;
27 int ignore = 0;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000028
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000029 bool result = true;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000030
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000031 if(diffs.size())
32 {
33 keytype kp = diffs[0];
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000034
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000035 for(int i = 1; i < (int)diffs.size(); i++)
36 {
37 if(diffs[i] == kp)
38 {
39 count++;
40 continue;
41 }
42 else
43 {
44 if(count > 1)
45 {
46 result = false;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000047
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000048 double pct = 100 * (double(count) / double(reps));
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000049
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000050 if(dumpCollisions)
51 {
52 printbits((unsigned char*)&kp,sizeof(kp));
53 printf(" - %4.2f%%\n", pct );
54 }
55 }
56 else
57 {
58 ignore++;
59 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000060
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000061 kp = diffs[i];
62 count = 1;
63 }
64 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000065
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000066 if(count > 1)
67 {
68 double pct = 100 * (double(count) / double(reps));
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000069
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000070 if(dumpCollisions)
71 {
72 printbits((unsigned char*)&kp,sizeof(kp));
73 printf(" - %4.2f%%\n", pct );
74 }
75 }
76 else
77 {
78 ignore++;
79 }
80 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000081
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000082 printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000083
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000084 if(result == false)
85 {
86 printf(" !!!!! ");
87 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000088
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000089 printf("\n");
90 printf("\n");
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000091
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +000092 return result;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +000093}
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +000094
95//-----------------------------------------------------------------------------
96// Check all possible keybits-choose-N differentials for collisions, report
97// ones that occur significantly more often than expected.
98
99// Random collisions can happen with probability 1 in 2^32 - if we do more than
100// 2^32 tests, we'll probably see some spurious random collisions, so don't report
101// them.
102
103template < typename keytype, typename hashtype >
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000104void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
105{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000106 const int bits = sizeof(keytype)*8;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000107
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000108 for(int i = start; i < bits; i++)
109 {
110 flipbit(&k2,sizeof(k2),i);
111 bitsleft--;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000112
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000113 hash(&k2,sizeof(k2),0,&h2);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000114
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000115 if(h1 == h2)
116 {
117 diffs.push_back(k1 ^ k2);
118 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000119
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000120 if(bitsleft)
121 {
122 DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
123 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000124
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000125 flipbit(&k2,sizeof(k2),i);
126 bitsleft++;
127 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000128}
129
130//----------
131
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000132template < typename keytype, typename hashtype >
133bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000134{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000135 const int keybits = sizeof(keytype) * 8;
136 const int hashbits = sizeof(hashtype) * 8;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000137
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000138 double diffcount = chooseUpToK(keybits,diffbits);
139 double testcount = (diffcount * double(reps));
140 double expected = testcount / pow(2.0,double(hashbits));
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000141
tanjent@gmail.com3ee45612011-03-21 19:33:01 +0000142 Rand r(100);
143
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000144 std::vector<keytype> diffs;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000145
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000146 keytype k1,k2;
147 hashtype h1,h2;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000148
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000149 printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
150 printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000151
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000152 for(int i = 0; i < reps; i++)
153 {
154 if(i % (reps/10) == 0) printf(".");
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000155
tanjent@gmail.com3ee45612011-03-21 19:33:01 +0000156 r.rand_p(&k1,sizeof(keytype));
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000157 k2 = k1;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000158
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000159 hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000160
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000161 DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
162 }
163 printf("\n");
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000164
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000165 bool result = true;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000166
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000167 result &= ProcessDifferentials(diffs,reps,dumpCollisions);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000168
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000169 return result;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000170}
171
172//-----------------------------------------------------------------------------
173// Differential distribution test - for each N-bit input differential, generate
174// a large set of differential key pairs, hash them, and test the output
175// differentials using our distribution test code.
176
177// This is a very hard test to pass - even if the hash values are well-distributed,
178// the differences between hash values may not be. It's also not entirely relevant
179// for testing hash functions, but it's still interesting.
180
181// This test is a _lot_ of work, as it's essentially a full keyset test for
182// each of a potentially huge number of input differentials. To speed things
183// along, we do only a few distribution tests per keyset instead of the full
184// grid.
185
186// #TODO - put diagram drawing back on
187
188template < typename keytype, typename hashtype >
189void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
190{
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000191 std::vector<keytype> keys(trials);
192 std::vector<hashtype> A(trials),B(trials);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000193
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000194 for(int i = 0; i < trials; i++)
195 {
196 rand_p(&keys[i],sizeof(keytype));
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000197
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000198 hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
199 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000200
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000201 //----------
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000202
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000203 std::vector<keytype> diffs;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000204
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000205 keytype temp(0);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000206
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000207 SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000208
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000209 //----------
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000210
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000211 worst = 0;
212 avg = 0;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000213
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000214 hashtype h2;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000215
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000216 for(size_t j = 0; j < diffs.size(); j++)
217 {
218 keytype & d = diffs[j];
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000219
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000220 for(int i = 0; i < trials; i++)
221 {
222 keytype k2 = keys[i] ^ d;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000223
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000224 hash(&k2,sizeof(k2),0,&h2);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000225
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000226 B[i] = A[i] ^ h2;
227 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000228
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000229 double dworst,davg;
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000230
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000231 TestDistributionFast(B,dworst,davg);
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000232
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000233 avg += davg;
234 worst = (dworst > worst) ? dworst : worst;
235 }
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000236
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000237 avg /= double(diffs.size());
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000238}
239
tanjent@gmail.com623590d2011-03-28 18:19:31 +0000240//-----------------------------------------------------------------------------
241// Simpler differential-distribution test - for all 1-bit differentials,
242// generate random key pairs and run full distribution/collision tests on the
243// hash differentials
244
245template < typename keytype, typename hashtype >
246bool DiffDistTest2 ( pfHash hash )
247{
248 Rand r(857374);
249
250 int keybits = sizeof(keytype) * 8;
251 const int keycount = 256*256*32;
252 keytype k;
253
254 std::vector<hashtype> hashes(keycount);
255 hashtype h1,h2;
256
257 bool result = true;
258
259 for(int keybit = 0; keybit < keybits; keybit++)
260 {
261 printf("Testing bit %d\n",keybit);
262
263 for(int i = 0; i < keycount; i++)
264 {
265 r.rand_p(&k,sizeof(keytype));
266
267 hash(&k,sizeof(keytype),0,&h1);
268 flipbit(&k,sizeof(keytype),keybit);
269 hash(&k,sizeof(keytype),0,&h2);
270
271 hashes[i] = h1 ^ h2;
272 }
273
274 result &= TestHashList<hashtype>(hashes,true,true,true);
275 printf("\n");
276 }
277
278 return result;
279}
280
tanjent@gmail.com7e5c3632010-11-02 00:50:04 +0000281//----------------------------------------------------------------------------