tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | #include "Stats.h"
|
| 2 |
|
| 3 | //-----------------------------------------------------------------------------
|
| 4 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 5 | double chooseK ( int n, int k )
|
| 6 | {
|
| 7 | if(k > (n - k)) k = n - k;
|
| 8 |
|
| 9 | double c = 1;
|
| 10 |
|
| 11 | for(int i = 0; i < k; i++)
|
| 12 | {
|
| 13 | double t = double(n-i) / double(i+1);
|
| 14 |
|
| 15 | c *= t;
|
| 16 | }
|
| 17 |
|
| 18 | return c;
|
| 19 | }
|
| 20 |
|
| 21 | double chooseUpToK ( int n, int k )
|
| 22 | {
|
| 23 | double c = 0;
|
| 24 |
|
| 25 | for(int i = 1; i <= k; i++)
|
| 26 | {
|
| 27 | c += chooseK(n,i);
|
| 28 | }
|
| 29 |
|
| 30 | return c;
|
| 31 | }
|
| 32 |
|
| 33 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 34 | // Distribution "score"
|
| 35 | // TODO - big writeup of what this score means
|
| 36 |
|
| 37 | // Basically, we're computing a constant that says "The test distribution is as
|
| 38 | // uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
|
| 39 | // the bins. This makes for a nice uniform way to rate a distribution that isn't
|
| 40 | // dependent on the number of bins or the number of keys
|
| 41 |
|
| 42 | // (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
|
| 43 | // as distribution weaknesses)
|
| 44 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame^] | 45 | double calcScore ( const int * bins, const int bincount, const int keycount )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 46 | {
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame^] | 47 | double n = bincount;
|
| 48 | double k = keycount;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 49 |
|
| 50 | // compute rms value
|
| 51 |
|
| 52 | double r = 0;
|
| 53 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame^] | 54 | for(int i = 0; i < bincount; i++)
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 55 | {
|
| 56 | double b = bins[i];
|
| 57 |
|
| 58 | r += b*b;
|
| 59 | }
|
| 60 |
|
| 61 | r = sqrt(r / n);
|
| 62 |
|
| 63 | // compute fill factor
|
| 64 |
|
| 65 | double f = (k*k - 1) / (n*r*r - k);
|
| 66 |
|
| 67 | // rescale to (0,1) with 0 = good, 1 = bad
|
| 68 |
|
| 69 | return 1 - (f / n);
|
| 70 | }
|
| 71 |
|
| 72 |
|
| 73 | //----------------------------------------------------------------------------
|
| 74 |
|
| 75 | void plot ( double n )
|
| 76 | {
|
| 77 | double n2 = n * 1;
|
| 78 |
|
| 79 | if(n2 < 0) n2 = 0;
|
| 80 |
|
| 81 | n2 *= 100;
|
| 82 |
|
| 83 | if(n2 > 64) n2 = 64;
|
| 84 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame^] | 85 | int n3 = (int)n2;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 86 |
|
| 87 | if(n3 == 0)
|
| 88 | printf(".");
|
| 89 | else
|
| 90 | {
|
| 91 | char x = '0' + char(n3);
|
| 92 |
|
| 93 | if(x > '9') x = 'X';
|
| 94 |
|
| 95 | printf("%c",x);
|
| 96 | }
|
| 97 | }
|
| 98 |
|
| 99 | //-----------------------------------------------------------------------------
|