blob: 5f218f89328b39290604b30efc639abad07e47ef [file] [log] [blame]
tanjent@gmail.comad4b3632010-11-05 01:20:58 +00001#include "SpeedTest.h"
2
3#include "Random.h"
4
aappleby@google.com7f20a312011-03-21 20:55:06 +00005#include <stdio.h> // for printf
6#include <memory.h> // for memset
aappleby@google.com6bde2782011-04-04 22:42:08 +00007#include <math.h> // for sqrt
8#include <algorithm> // for sort
9
10//-----------------------------------------------------------------------------
11// We view our timing values as a series of random variables V that has been
12// contaminated with occasional outliers due to cache misses, thread
13// preemption, etcetera. To filter out the outliers, we search for the largest
14// subset of V such that all its values are within three standard deviations
15// of the mean.
16
17double CalcMean ( std::vector<double> & v )
18{
19 double mean = 0;
20
21 for(int i = 0; i < (int)v.size(); i++)
22 {
23 mean += v[i];
24 }
25
26 mean /= double(v.size());
27
28 return mean;
29}
30
31double CalcMean ( std::vector<double> & v, int a, int b )
32{
33 double mean = 0;
34
35 for(int i = a; i <= b; i++)
36 {
37 mean += v[i];
38 }
39
40 mean /= (b-a+1);
41
42 return mean;
43}
44
45double CalcStdv ( std::vector<double> & v, int a, int b )
46{
47 double mean = CalcMean(v,a,b);
48
49 double stdv = 0;
50
51 for(int i = a; i <= b; i++)
52 {
53 double x = v[i] - mean;
54
55 stdv += x*x;
56 }
57
58 stdv = sqrt(stdv / (b-a+1));
59
60 return stdv;
61}
62
63// Return true if the largest value in v[0,len) is more than three
64// standard deviations from the mean
65
66bool ContainsOutlier ( std::vector<double> & v, int len )
67{
68 double mean = 0;
69
70 for(int i = 0; i < len; i++)
71 {
72 mean += v[i];
73 }
74
75 mean /= double(len);
76
77 double stdv = 0;
78
79 for(int i = 0; i < len; i++)
80 {
81 double x = v[i] - mean;
82 stdv += x*x;
83 }
84
85 stdv = sqrt(stdv / double(len));
86
87 double cutoff = mean + stdv*3;
88
89 return v[len-1] > cutoff;
90}
91
92// Do a binary search to find the largest subset of v that does not contain
93// outliers.
94
95void FilterOutliers ( std::vector<double> & v )
96{
97 std::sort(v.begin(),v.end());
98
99 int len = 0;
100
101 for(int x = 0x40000000; x; x = x >> 1 )
102 {
103 if((len | x) >= v.size()) continue;
104
105 if(!ContainsOutlier(v,len | x))
106 {
107 len |= x;
108 }
109 }
110
111 v.resize(len);
112}
113
114// Iteratively tighten the set to find a subset that does not contain
115// outliers. I'm not positive this works correctly in all cases.
116
117void FilterOutliers2 ( std::vector<double> & v )
118{
119 std::sort(v.begin(),v.end());
120
121 int a = 0;
122 int b = (int)(v.size() - 1);
123
124 for(int i = 0; i < 10; i++)
125 {
126 //printf("%d %d\n",a,b);
127
128 double mean = CalcMean(v,a,b);
129 double stdv = CalcStdv(v,a,b);
130
131 double cutA = mean - stdv*3;
132 double cutB = mean + stdv*3;
133
134 while((a < b) && (v[a] < cutA)) a++;
135 while((b > a) && (v[b] > cutB)) b--;
136 }
137
138 std::vector<double> v2;
139
140 v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
141
142 v.swap(v2);
143}
144
145//-----------------------------------------------------------------------------
146// We really want the rdtsc() calls to bracket the function call as tightly
147// as possible, but that's hard to do portably. We'll try and get as close as
148// possible by marking the function as NEVER_INLINE (to keep the optimizer from
149// moving it) and marking the timing variables as "volatile register".
150
151NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
152{
153 volatile register int64_t begin,end;
154
155 uint32_t temp[16];
156
157 begin = rdtsc();
158
159 hash(key,len,seed,temp);
160
161 end = rdtsc();
162
163 return end-begin;
164}
165
166//-----------------------------------------------------------------------------
167
168double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
169{
170 Rand r(seed);
171
tanjent@gmail.com954fc282011-04-05 00:18:44 +0000172 uint8_t * buf = new uint8_t[blocksize + 512];
aappleby@google.com6bde2782011-04-04 22:42:08 +0000173
174 uint64_t t1 = reinterpret_cast<uint64_t>(buf);
175
tanjent@gmail.com510b8522011-04-12 15:36:18 +0000176 t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
aappleby@google.com6bde2782011-04-04 22:42:08 +0000177 t1 += align;
178
179 uint8_t * block = reinterpret_cast<uint8_t*>(t1);
180
181 r.rand_p(block,blocksize);
182
183 //----------
184
185 std::vector<double> times;
186 times.reserve(trials);
187
188 for(int itrial = 0; itrial < trials; itrial++)
189 {
190 r.rand_p(block,blocksize);
191
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000192 double t = (double)timehash(hash,block,blocksize,itrial);
aappleby@google.com6bde2782011-04-04 22:42:08 +0000193
194 if(t > 0) times.push_back(t);
195 }
196
197 //----------
198
199 std::sort(times.begin(),times.end());
200
201 FilterOutliers(times);
202
203 delete [] buf;
204
205 return CalcMean(times);
206}
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000207
208//-----------------------------------------------------------------------------
209// 256k blocks seem to give the best results.
210
aappleby@google.com7f20a312011-03-21 20:55:06 +0000211void BulkSpeedTest ( pfHash hash, uint32_t seed )
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000212{
tanjent@gmail.com0f37bbd2011-04-04 23:05:26 +0000213 const int trials = 2999;
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000214 const int blocksize = 256 * 1024;
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000215
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000216 printf("Bulk speed test - %d-byte keys\n",blocksize);
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000217
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000218 for(int align = 0; align < 8; align++)
219 {
aappleby@google.com6bde2782011-04-04 22:42:08 +0000220 double cycles = SpeedTest(hash,seed,trials,blocksize,align);
221
222 double bestbpc = double(blocksize)/cycles;
223
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000224 double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
225 printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
226 }
tanjent@gmail.comad4b3632010-11-05 01:20:58 +0000227}
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000228
229//-----------------------------------------------------------------------------
230
aappleby@google.com6bde2782011-04-04 22:42:08 +0000231void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000232{
aappleby@google.com6bde2782011-04-04 22:42:08 +0000233 const int trials = 999999;
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000234
tanjent@gmail.com6ffe0102011-03-19 21:28:26 +0000235 if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
aappleby@google.com7f20a312011-03-21 20:55:06 +0000236
aappleby@google.com6bde2782011-04-04 22:42:08 +0000237 double cycles = SpeedTest(hash,seed,trials,keysize,0);
aappleby@google.com7f20a312011-03-21 20:55:06 +0000238
aappleby@google.com6bde2782011-04-04 22:42:08 +0000239 printf("%8.2f cycles/hash\n",cycles);
tanjent@gmail.combabb5532011-02-28 06:03:12 +0000240}
241
242//-----------------------------------------------------------------------------