tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | //-----------------------------------------------------------------------------
|
| 2 | // Differential collision & distribution tests - generate a bunch of random keys,
|
| 3 | // see what happens to the hash value when we flip a few bits of the key.
|
| 4 |
|
| 5 | #pragma once
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 6 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 7 | #include "Types.h"
|
tanjent@gmail.com | 2aa29c3 | 2011-03-19 08:53:53 +0000 | [diff] [blame] | 8 | #include "Stats.h" // for chooseUpToK
|
| 9 | #include "KeysetTest.h" // for SparseKeygenRecurse
|
tanjent@gmail.com | 3ee4561 | 2011-03-21 19:33:01 +0000 | [diff] [blame] | 10 | #include "Random.h"
|
tanjent@gmail.com | 2aa29c3 | 2011-03-19 08:53:53 +0000 | [diff] [blame] | 11 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 12 | #include <vector>
|
tanjent@gmail.com | 2aa29c3 | 2011-03-19 08:53:53 +0000 | [diff] [blame] | 13 | #include <algorithm>
|
| 14 | #include <stdio.h>
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 15 |
|
| 16 | //-----------------------------------------------------------------------------
|
| 17 | // Sort through the differentials, ignoring collisions that only occured once
|
| 18 | // (these could be false positives). If we find collisions of 3 or more, the
|
| 19 | // differential test fails.
|
| 20 |
|
| 21 | template < class keytype >
|
| 22 | bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
|
| 23 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 24 | std::sort(diffs.begin(), diffs.end());
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 25 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 26 | int count = 1;
|
| 27 | int ignore = 0;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 28 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 29 | bool result = true;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 30 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 31 | if(diffs.size())
|
| 32 | {
|
| 33 | keytype kp = diffs[0];
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 34 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 35 | for(int i = 1; i < (int)diffs.size(); i++)
|
| 36 | {
|
| 37 | if(diffs[i] == kp)
|
| 38 | {
|
| 39 | count++;
|
| 40 | continue;
|
| 41 | }
|
| 42 | else
|
| 43 | {
|
| 44 | if(count > 1)
|
| 45 | {
|
| 46 | result = false;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 47 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 48 | double pct = 100 * (double(count) / double(reps));
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 49 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 50 | if(dumpCollisions)
|
| 51 | {
|
| 52 | printbits((unsigned char*)&kp,sizeof(kp));
|
| 53 | printf(" - %4.2f%%\n", pct );
|
| 54 | }
|
| 55 | }
|
| 56 | else
|
| 57 | {
|
| 58 | ignore++;
|
| 59 | }
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 60 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 61 | kp = diffs[i];
|
| 62 | count = 1;
|
| 63 | }
|
| 64 | }
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 65 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 66 | if(count > 1)
|
| 67 | {
|
| 68 | double pct = 100 * (double(count) / double(reps));
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 69 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 70 | if(dumpCollisions)
|
| 71 | {
|
| 72 | printbits((unsigned char*)&kp,sizeof(kp));
|
| 73 | printf(" - %4.2f%%\n", pct );
|
| 74 | }
|
| 75 | }
|
| 76 | else
|
| 77 | {
|
| 78 | ignore++;
|
| 79 | }
|
| 80 | }
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 81 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 82 | printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 83 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 84 | if(result == false)
|
| 85 | {
|
| 86 | printf(" !!!!! ");
|
| 87 | }
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 88 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 89 | printf("\n");
|
| 90 | printf("\n");
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 91 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 92 | return result;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 93 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 94 |
|
| 95 | //-----------------------------------------------------------------------------
|
| 96 | // Check all possible keybits-choose-N differentials for collisions, report
|
| 97 | // ones that occur significantly more often than expected.
|
| 98 |
|
| 99 | // Random collisions can happen with probability 1 in 2^32 - if we do more than
|
| 100 | // 2^32 tests, we'll probably see some spurious random collisions, so don't report
|
| 101 | // them.
|
| 102 |
|
| 103 | template < typename keytype, typename hashtype >
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 104 | void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
|
| 105 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 106 | const int bits = sizeof(keytype)*8;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 107 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 108 | for(int i = start; i < bits; i++)
|
| 109 | {
|
| 110 | flipbit(&k2,sizeof(k2),i);
|
| 111 | bitsleft--;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 112 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 113 | hash(&k2,sizeof(k2),0,&h2);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 114 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 115 | if(h1 == h2)
|
| 116 | {
|
| 117 | diffs.push_back(k1 ^ k2);
|
| 118 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 119 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 120 | if(bitsleft)
|
| 121 | {
|
| 122 | DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
|
| 123 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 124 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 125 | flipbit(&k2,sizeof(k2),i);
|
| 126 | bitsleft++;
|
| 127 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 128 | }
|
| 129 |
|
| 130 | //----------
|
| 131 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 132 | template < typename keytype, typename hashtype >
|
| 133 | bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 134 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 135 | const int keybits = sizeof(keytype) * 8;
|
| 136 | const int hashbits = sizeof(hashtype) * 8;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 137 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 138 | double diffcount = chooseUpToK(keybits,diffbits);
|
| 139 | double testcount = (diffcount * double(reps));
|
| 140 | double expected = testcount / pow(2.0,double(hashbits));
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 141 |
|
tanjent@gmail.com | 3ee4561 | 2011-03-21 19:33:01 +0000 | [diff] [blame] | 142 | Rand r(100);
|
| 143 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 144 | std::vector<keytype> diffs;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 145 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 146 | keytype k1,k2;
|
| 147 | hashtype h1,h2;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 148 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 149 | printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
|
| 150 | printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 151 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 152 | for(int i = 0; i < reps; i++)
|
| 153 | {
|
| 154 | if(i % (reps/10) == 0) printf(".");
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 155 |
|
tanjent@gmail.com | 3ee4561 | 2011-03-21 19:33:01 +0000 | [diff] [blame] | 156 | r.rand_p(&k1,sizeof(keytype));
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 157 | k2 = k1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 158 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 159 | hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 160 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 161 | DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
|
| 162 | }
|
| 163 | printf("\n");
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 164 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 165 | bool result = true;
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 166 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 167 | result &= ProcessDifferentials(diffs,reps,dumpCollisions);
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 168 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 169 | return result;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 170 | }
|
| 171 |
|
| 172 | //-----------------------------------------------------------------------------
|
| 173 | // Differential distribution test - for each N-bit input differential, generate
|
| 174 | // a large set of differential key pairs, hash them, and test the output
|
| 175 | // differentials using our distribution test code.
|
| 176 |
|
| 177 | // This is a very hard test to pass - even if the hash values are well-distributed,
|
| 178 | // the differences between hash values may not be. It's also not entirely relevant
|
| 179 | // for testing hash functions, but it's still interesting.
|
| 180 |
|
| 181 | // This test is a _lot_ of work, as it's essentially a full keyset test for
|
| 182 | // each of a potentially huge number of input differentials. To speed things
|
| 183 | // along, we do only a few distribution tests per keyset instead of the full
|
| 184 | // grid.
|
| 185 |
|
| 186 | // #TODO - put diagram drawing back on
|
| 187 |
|
| 188 | template < typename keytype, typename hashtype >
|
| 189 | void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
|
| 190 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 191 | std::vector<keytype> keys(trials);
|
| 192 | std::vector<hashtype> A(trials),B(trials);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 193 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 194 | for(int i = 0; i < trials; i++)
|
| 195 | {
|
| 196 | rand_p(&keys[i],sizeof(keytype));
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 197 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 198 | hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
|
| 199 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 200 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 201 | //----------
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 202 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 203 | std::vector<keytype> diffs;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 204 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 205 | keytype temp(0);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 206 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 207 | SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 208 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 209 | //----------
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 210 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 211 | worst = 0;
|
| 212 | avg = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 213 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 214 | hashtype h2;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 215 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 216 | for(size_t j = 0; j < diffs.size(); j++)
|
| 217 | {
|
| 218 | keytype & d = diffs[j];
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 219 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 220 | for(int i = 0; i < trials; i++)
|
| 221 | {
|
| 222 | keytype k2 = keys[i] ^ d;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 223 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 224 | hash(&k2,sizeof(k2),0,&h2);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 225 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 226 | B[i] = A[i] ^ h2;
|
| 227 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 228 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 229 | double dworst,davg;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 230 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 231 | TestDistributionFast(B,dworst,davg);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 232 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 233 | avg += davg;
|
| 234 | worst = (dworst > worst) ? dworst : worst;
|
| 235 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 236 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 237 | avg /= double(diffs.size());
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 238 | }
|
| 239 |
|
tanjent@gmail.com | 623590d | 2011-03-28 18:19:31 +0000 | [diff] [blame] | 240 | //-----------------------------------------------------------------------------
|
| 241 | // Simpler differential-distribution test - for all 1-bit differentials,
|
| 242 | // generate random key pairs and run full distribution/collision tests on the
|
| 243 | // hash differentials
|
| 244 |
|
| 245 | template < typename keytype, typename hashtype >
|
| 246 | bool DiffDistTest2 ( pfHash hash )
|
| 247 | {
|
| 248 | Rand r(857374);
|
| 249 |
|
| 250 | int keybits = sizeof(keytype) * 8;
|
| 251 | const int keycount = 256*256*32;
|
| 252 | keytype k;
|
| 253 |
|
| 254 | std::vector<hashtype> hashes(keycount);
|
| 255 | hashtype h1,h2;
|
| 256 |
|
| 257 | bool result = true;
|
| 258 |
|
| 259 | for(int keybit = 0; keybit < keybits; keybit++)
|
| 260 | {
|
| 261 | printf("Testing bit %d\n",keybit);
|
| 262 |
|
| 263 | for(int i = 0; i < keycount; i++)
|
| 264 | {
|
| 265 | r.rand_p(&k,sizeof(keytype));
|
| 266 |
|
| 267 | hash(&k,sizeof(keytype),0,&h1);
|
| 268 | flipbit(&k,sizeof(keytype),keybit);
|
| 269 | hash(&k,sizeof(keytype),0,&h2);
|
| 270 |
|
| 271 | hashes[i] = h1 ^ h2;
|
| 272 | }
|
| 273 |
|
| 274 | result &= TestHashList<hashtype>(hashes,true,true,true);
|
| 275 | printf("\n");
|
| 276 | }
|
| 277 |
|
| 278 | return result;
|
| 279 | }
|
| 280 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 281 | //----------------------------------------------------------------------------
|