tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 1 | #include "Stats.h"
|
| 2 |
|
| 3 | //-----------------------------------------------------------------------------
|
| 4 |
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 5 | double chooseK ( int n, int k )
|
| 6 | {
|
tanjent@gmail.com | 96601f2 | 2011-03-31 02:41:29 +0000 | [diff] [blame] | 7 | if(k > (n - k)) k = n - k;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 8 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 9 | double c = 1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 10 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 11 | for(int i = 0; i < k; i++)
|
| 12 | {
|
| 13 | double t = double(n-i) / double(i+1);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 14 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 15 | c *= t;
|
| 16 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 17 |
|
| 18 | return c;
|
| 19 | }
|
| 20 |
|
| 21 | double chooseUpToK ( int n, int k )
|
| 22 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 23 | double c = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 24 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 25 | for(int i = 1; i <= k; i++)
|
| 26 | {
|
| 27 | c += chooseK(n,i);
|
| 28 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 29 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 30 | return c;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 31 | }
|
| 32 |
|
| 33 | //-----------------------------------------------------------------------------
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 34 | // Distribution "score"
|
| 35 | // TODO - big writeup of what this score means
|
| 36 |
|
| 37 | // Basically, we're computing a constant that says "The test distribution is as
|
| 38 | // uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
|
| 39 | // the bins. This makes for a nice uniform way to rate a distribution that isn't
|
| 40 | // dependent on the number of bins or the number of keys
|
| 41 |
|
| 42 | // (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
|
| 43 | // as distribution weaknesses)
|
| 44 |
|
tanjent@gmail.com | ad4b363 | 2010-11-05 01:20:58 +0000 | [diff] [blame] | 45 | double calcScore ( const int * bins, const int bincount, const int keycount )
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 46 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 47 | double n = bincount;
|
| 48 | double k = keycount;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 49 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 50 | // compute rms value
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 51 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 52 | double r = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 53 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 54 | for(int i = 0; i < bincount; i++)
|
| 55 | {
|
| 56 | double b = bins[i];
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 57 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 58 | r += b*b;
|
| 59 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 60 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 61 | r = sqrt(r / n);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 62 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 63 | // compute fill factor
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 64 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 65 | double f = (k*k - 1) / (n*r*r - k);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 66 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 67 | // rescale to (0,1) with 0 = good, 1 = bad
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 68 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 69 | return 1 - (f / n);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 70 | }
|
| 71 |
|
| 72 |
|
| 73 | //----------------------------------------------------------------------------
|
| 74 |
|
| 75 | void plot ( double n )
|
| 76 | {
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 77 | double n2 = n * 1;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 78 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 79 | if(n2 < 0) n2 = 0;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 80 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 81 | n2 *= 100;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 82 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 83 | if(n2 > 64) n2 = 64;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 84 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 85 | int n3 = (int)n2;
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 86 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 87 | if(n3 == 0)
|
| 88 | printf(".");
|
| 89 | else
|
| 90 | {
|
| 91 | char x = '0' + char(n3);
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 92 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 93 | if(x > '9') x = 'X';
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 94 |
|
tanjent@gmail.com | 6ffe010 | 2011-03-19 21:28:26 +0000 | [diff] [blame] | 95 | printf("%c",x);
|
| 96 | }
|
tanjent@gmail.com | 7e5c363 | 2010-11-02 00:50:04 +0000 | [diff] [blame] | 97 | }
|
| 98 |
|
| 99 | //-----------------------------------------------------------------------------
|