tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame^] | 1 | //-----------------------------------------------------------------------------
|
| 2 | // Flipping a single bit of a key should cause an "avalanche" of changes in
|
| 3 | // the hash function's output. Ideally, each output bits should flip 50% of
|
| 4 | // the time - if the probability of an output bit flipping is not 50%, that bit
|
| 5 | // is "biased". Too much bias means that patterns applied to the input will
|
| 6 | // cause "echoes" of the patterns in the output, which in turn can cause the
|
| 7 | // hash function to fail to create an even, random distribution of hash values.
|
| 8 |
|
| 9 | #include "AvalancheTest.h"
|
| 10 |
|
| 11 | #include "Bitvec.h"
|
| 12 | #include "Random.h"
|
| 13 |
|
| 14 | #include <math.h>
|
| 15 |
|
| 16 | // Avalanche fails if a bit is biased by more than 1%
|
| 17 |
|
| 18 | double gc_avalancheFail = 0.01;
|
| 19 |
|
| 20 | //-----------------------------------------------------------------------------
|
| 21 |
|
| 22 | void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
|
| 23 | {
|
| 24 | const char * symbols = ".123456789X";
|
| 25 |
|
| 26 | for(int i = 0; i < y; i++)
|
| 27 | {
|
| 28 | printf("[");
|
| 29 | for(int j = 0; j < x; j++)
|
| 30 | {
|
| 31 | int k = (y - i) -1;
|
| 32 |
|
| 33 | int bin = bins[k + (j*y)];
|
| 34 |
|
| 35 | double b = double(bin) / double(reps);
|
| 36 | b = fabs(b*2 - 1);
|
| 37 |
|
| 38 | b *= scale;
|
| 39 |
|
| 40 | int s = (int)floor(b*10);
|
| 41 |
|
| 42 | if(s > 10) s = 10;
|
| 43 | if(s < 0) s = 0;
|
| 44 |
|
| 45 | printf("%c",symbols[s]);
|
| 46 | }
|
| 47 |
|
| 48 | printf("]\n");
|
| 49 | }
|
| 50 | }
|
| 51 |
|
| 52 | //----------------------------------------------------------------------------
|
| 53 |
|
| 54 | double maxBias ( std::vector<int> & counts, int reps )
|
| 55 | {
|
| 56 | double worst = 0;
|
| 57 |
|
| 58 | for(int i = 0; i < (int)counts.size(); i++)
|
| 59 | {
|
| 60 | double c = double(counts[i]) / double(reps);
|
| 61 |
|
| 62 | double d = fabs(c * 2 - 1);
|
| 63 |
|
| 64 | if(d > worst)
|
| 65 | {
|
| 66 | worst = d;
|
| 67 | }
|
| 68 | }
|
| 69 |
|
| 70 | return worst;
|
| 71 | }
|
| 72 |
|
| 73 | double rmsBias ( std::vector<int> & counts, int reps )
|
| 74 | {
|
| 75 | double rms = 0;
|
| 76 |
|
| 77 | for(int i = 0; i < (int)counts.size(); i++)
|
| 78 | {
|
| 79 | double d = double(counts[i]) / reps;
|
| 80 |
|
| 81 | d = fabs(d * 2 - 1);
|
| 82 |
|
| 83 | rms += d*d;
|
| 84 | }
|
| 85 |
|
| 86 | rms /= counts.size();
|
| 87 | rms = sqrt(rms);
|
| 88 |
|
| 89 | return rms;
|
| 90 | }
|
| 91 |
|
| 92 | //-----------------------------------------------------------------------------
|
| 93 |
|
| 94 | void calcBias ( pfHash hash, const int nbitsIn, const int nbitsOut, std::vector<int> & counts, int reps )
|
| 95 | {
|
| 96 | const int nbytesIn = nbitsIn / 8;
|
| 97 | const int nbytesOut = nbitsOut / 8;
|
| 98 |
|
| 99 | uint8_t * K = new uint8_t[nbytesIn];
|
| 100 | uint8_t * A = new uint8_t[nbytesIn];
|
| 101 | uint8_t * B = new uint8_t[nbytesIn];
|
| 102 |
|
| 103 | Rand r(378473);
|
| 104 |
|
| 105 | for(int irep = 0; irep < reps; irep++)
|
| 106 | {
|
| 107 | r.rand_p(K,nbytesIn);
|
| 108 |
|
| 109 | hash(K,nbytesIn,0,A);
|
| 110 |
|
| 111 | int * cursor = &counts[0];
|
| 112 |
|
| 113 | for(int iBit = 0; iBit < nbitsIn; iBit++)
|
| 114 | {
|
| 115 | flipbit(K,nbytesIn,iBit);
|
| 116 | hash(K,nbytesIn,0,B);
|
| 117 | flipbit(K,nbytesIn,iBit);
|
| 118 |
|
| 119 | for(int iOut = 0; iOut < nbitsOut; iOut++)
|
| 120 | {
|
| 121 | int bitA = getbit(A,nbytesOut,iOut);
|
| 122 | int bitB = getbit(B,nbytesOut,iOut);
|
| 123 |
|
| 124 | (*cursor++) += (bitA ^ bitB);
|
| 125 | }
|
| 126 | }
|
| 127 | }
|
| 128 |
|
| 129 | delete [] K;
|
| 130 | delete [] A;
|
| 131 | delete [] B;
|
| 132 | }
|
| 133 |
|
| 134 | //-----------------------------------------------------------------------------
|
| 135 |
|
| 136 | bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps )
|
| 137 | {
|
| 138 | printf("Avalanche for %3d-bit keys -> %3d-bit hashes, %8d reps - ",keybits,hashbits,reps);
|
| 139 |
|
| 140 | std::vector<int> bins(keybits*hashbits,0);
|
| 141 |
|
| 142 | calcBias(hash,keybits,hashbits,bins,reps);
|
| 143 |
|
| 144 | double b = maxBias(bins,reps);
|
| 145 |
|
| 146 | printf("Max avalanche bias is %f\n",b);
|
| 147 |
|
| 148 | if(b > gc_avalancheFail)
|
| 149 | {
|
| 150 | return false;
|
| 151 | }
|
| 152 | else
|
| 153 | {
|
| 154 | return true;
|
| 155 | }
|
| 156 | }
|
| 157 |
|
| 158 | //----------------------------------------------------------------------------
|
| 159 | // Computing whether a given mix function produces a low bias can take many
|
| 160 | // millions of tests when the bias is low. This code tries to speed up the
|
| 161 | // process by early-outing if the probability that the bias will fall outside
|
| 162 | // the given range is over 99%
|
| 163 |
|
| 164 | /*
|
| 165 | bool testMixAvalanche32_Fast ( pfMix32 mix, double cutmin, double cutmax, bool winlose )
|
| 166 | {
|
| 167 | int counts[32*32];
|
| 168 |
|
| 169 | memset(counts,0,sizeof(counts));
|
| 170 |
|
| 171 | double pmin = 0;
|
| 172 | double pmax = 0;
|
| 173 | double n = 0;
|
| 174 | double s = 4.75;
|
| 175 | int w = 0;
|
| 176 |
|
| 177 | int batchsize = 512;
|
| 178 |
|
| 179 | for(int iBatch = 0; iBatch < 1024 * 1024; iBatch++)
|
| 180 | {
|
| 181 | calcMixBias<uint32_t>(mix,counts,batchsize);
|
| 182 |
|
| 183 | n = (iBatch+1) * batchsize;
|
| 184 | w = maxIntBias(32,32,counts,(int)n);
|
| 185 |
|
| 186 | // compute p such that w is at the bottom of the confidence interval
|
| 187 |
|
| 188 | double a = s*s*n + n*n;
|
| 189 | double b = -2.0*double(w)*n - s*s*n;
|
| 190 | double c = double(w)*double(w);
|
| 191 |
|
| 192 | SolveQuadratic(a,b,c,pmin,pmax);
|
| 193 |
|
| 194 | double win = 0;
|
| 195 | double tie = 0;
|
| 196 | double lose = 0;
|
| 197 |
|
| 198 | if(winlose)
|
| 199 | {
|
| 200 | if(pmax < cutmax)
|
| 201 | {
|
| 202 | printf("\n+!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
|
| 203 | return true;
|
| 204 | }
|
| 205 |
|
| 206 | if(pmin > cutmax)
|
| 207 | {
|
| 208 | //printf("\n-!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
|
| 209 | return false;
|
| 210 | }
|
| 211 |
|
| 212 | // doesn't fail or win outright. does it have a chance of winning?
|
| 213 |
|
| 214 | if(pmin < cutmin)
|
| 215 | {
|
| 216 | // pmin:pmax contains cutmin:cutmax
|
| 217 |
|
| 218 | assert(cutmin > pmin);
|
| 219 | assert(cutmax < pmax);
|
| 220 |
|
| 221 | win = (cutmin-pmin) / (pmax-pmin);
|
| 222 | tie = (cutmax-cutmin) / (pmax-pmin);
|
| 223 | lose = (pmax-cutmax) / (pmax-pmin);
|
| 224 | }
|
| 225 | else
|
| 226 | {
|
| 227 | // pmin:pmax overlaps above cutmin:cutmax
|
| 228 |
|
| 229 | assert(cutmin < pmin);
|
| 230 |
|
| 231 | win = 0;
|
| 232 | tie = ((cutmax - pmin) / (pmax-pmin)) * ((cutmax-pmin) / (cutmax-cutmin));
|
| 233 | lose = (pmax-cutmax) / (pmax-pmin);
|
| 234 |
|
| 235 | return false;
|
| 236 | }
|
| 237 |
|
| 238 | double frac = win + tie*0.5;
|
| 239 |
|
| 240 | if((pmax-pmin)/(cutmax-cutmin) < 5)
|
| 241 | {
|
| 242 | if(frac < 0.20)
|
| 243 | {
|
| 244 | // 99% chance of loss
|
| 245 | //printf("\n--- %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
|
| 246 | return false;
|
| 247 | }
|
| 248 |
|
| 249 | if(frac > 0.80)
|
| 250 | {
|
| 251 | // 99% chance of win
|
| 252 | printf("\n+++ %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
|
| 253 | return true;
|
| 254 | }
|
| 255 | }
|
| 256 | }
|
| 257 |
|
| 258 | if(!winlose && (n > 0) && ((int)n % (128 * 1024) == 0))
|
| 259 | {
|
| 260 | printf("%f - %f : %f - %d - %f : %f : %f\n",double(w)/n,pmin,pmax,int(n),win,tie,lose);
|
| 261 | }
|
| 262 |
|
| 263 | }
|
| 264 |
|
| 265 | // We failed to determine whether this mix function passes or fails
|
| 266 |
|
| 267 | printf("\n??? %f - %f : %f",double(w)/n,pmin,pmax);
|
| 268 |
|
| 269 | return true;
|
| 270 | }
|
| 271 | */
|
| 272 |
|
| 273 | //-----------------------------------------------------------------------------
|