blob: cc09dd503436183daa508dee16567ded5793d763 [file] [log] [blame]
Marshall Clowbaa6fb32017-10-04 22:23:03 +00001// -*- C++ -*-
2//===------------------------- fuzzing.cpp -------------------------------===//
3//
4// The LLVM Compiler Infrastructure
5//
6// This file is dual licensed under the MIT and the University of Illinois Open
7// Source Licenses. See LICENSE.TXT for details.
8//
9//===----------------------------------------------------------------------===//
10
11// A set of routines to use when fuzzing the algorithms in libc++
12// Each one tests a single algorithm.
13//
14// They all have the form of:
15// int `algorithm`(const uint8_t *data, size_t size);
16//
17// They perform the operation, and then check to see if the results are correct.
18// If so, they return zero, and non-zero otherwise.
19//
20// For example, sort calls std::sort, then checks two things:
21// (1) The resulting vector is sorted
22// (2) The resulting vector contains the same elements as the original data.
23
24
25
26#include "fuzzing.h"
27#include <vector>
28#include <algorithm>
Marshall Clowa97ab842017-10-23 23:19:30 +000029#include <functional>
Marshall Clowd84736b2017-10-12 14:48:09 +000030#include <regex>
Marshall Clowbaa6fb32017-10-04 22:23:03 +000031
Marshall Clowa97ab842017-10-23 23:19:30 +000032#include <iostream>
33
34// If we had C++14, we could use the four iterator version of is_permutation and equal
Marshall Clowbaa6fb32017-10-04 22:23:03 +000035
36namespace fuzzing {
37
38// This is a struct we can use to test the stable_XXX algorithms.
39// perform the operation on the key, then check the order of the payload.
40
41struct stable_test {
42 uint8_t key;
Marshall Clow716c16d2017-10-18 20:40:57 +000043 size_t payload;
Marshall Clowbaa6fb32017-10-04 22:23:03 +000044
45 stable_test(uint8_t k) : key(k), payload(0) {}
Marshall Clow716c16d2017-10-18 20:40:57 +000046 stable_test(uint8_t k, size_t p) : key(k), payload(p) {}
Marshall Clowbaa6fb32017-10-04 22:23:03 +000047 };
48
49void swap(stable_test &lhs, stable_test &rhs)
50{
51 using std::swap;
52 swap(lhs.key, rhs.key);
53 swap(lhs.payload, rhs.payload);
54}
55
56struct key_less
57{
58 bool operator () (const stable_test &lhs, const stable_test &rhs) const
59 {
60 return lhs.key < rhs.key;
61 }
62};
63
64struct payload_less
65{
66 bool operator () (const stable_test &lhs, const stable_test &rhs) const
67 {
68 return lhs.payload < rhs.payload;
69 }
70};
71
72struct total_less
73{
74 bool operator () (const stable_test &lhs, const stable_test &rhs) const
75 {
76 return lhs.key == rhs.key ? lhs.payload < rhs.payload : lhs.key < rhs.key;
77 }
78};
79
80bool operator==(const stable_test &lhs, const stable_test &rhs)
81{
82 return lhs.key == rhs.key && lhs.payload == rhs.payload;
83}
84
85
86template<typename T>
87struct is_even
88{
89 bool operator () (const T &t) const
90 {
91 return t % 2 == 0;
92 }
93};
94
95
96template<>
97struct is_even<stable_test>
98{
99 bool operator () (const stable_test &t) const
100 {
101 return t.key % 2 == 0;
102 }
103};
104
Marshall Clow974677c2017-10-30 19:51:58 +0000105typedef std::vector<uint8_t> Vec;
106typedef std::vector<stable_test> StableVec;
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000107
Marshall Clow974677c2017-10-30 19:51:58 +0000108// == sort ==
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000109int sort(const uint8_t *data, size_t size)
110{
Marshall Clow974677c2017-10-30 19:51:58 +0000111 Vec working(data, data + size);
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000112 std::sort(working.begin(), working.end());
113
114 if (!std::is_sorted(working.begin(), working.end())) return 1;
115 if (!std::is_permutation(data, data + size, working.begin())) return 99;
116 return 0;
117}
118
119
120// == stable_sort ==
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000121int stable_sort(const uint8_t *data, size_t size)
122{
Marshall Clow974677c2017-10-30 19:51:58 +0000123 StableVec input;
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000124 for (size_t i = 0; i < size; ++i)
125 input.push_back(stable_test(data[i], i));
Marshall Clow974677c2017-10-30 19:51:58 +0000126 StableVec working = input;
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000127 std::stable_sort(working.begin(), working.end(), key_less());
128
129 if (!std::is_sorted(working.begin(), working.end(), key_less())) return 1;
130 auto iter = working.begin();
131 while (iter != working.end())
132 {
133 auto range = std::equal_range(iter, working.end(), *iter, key_less());
134 if (!std::is_sorted(range.first, range.second, total_less())) return 2;
135 iter = range.second;
136 }
137 if (!std::is_permutation(input.begin(), input.end(), working.begin())) return 99;
138 return 0;
139}
140
141// == partition ==
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000142int partition(const uint8_t *data, size_t size)
143{
Marshall Clow974677c2017-10-30 19:51:58 +0000144 Vec working(data, data + size);
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000145 auto iter = std::partition(working.begin(), working.end(), is_even<uint8_t>());
146
147 if (!std::all_of (working.begin(), iter, is_even<uint8_t>())) return 1;
148 if (!std::none_of(iter, working.end(), is_even<uint8_t>())) return 2;
149 if (!std::is_permutation(data, data + size, working.begin())) return 99;
150 return 0;
151}
152
153
Marshall Clow974677c2017-10-30 19:51:58 +0000154// == partition_copy ==
155int partition_copy(const uint8_t *data, size_t size)
156{
157 Vec v1, v2;
158 auto iter = std::partition_copy(data, data + size,
159 std::back_inserter<Vec>(v1), std::back_inserter<Vec>(v2),
160 is_even<uint8_t>());
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000161
Marshall Clow974677c2017-10-30 19:51:58 +0000162// The two vectors should add up to the original size
163 if (v1.size() + v2.size() != size) return 1;
164
165// All of the even values should be in the first vector, and none in the second
166 if (!std::all_of (v1.begin(), v1.end(), is_even<uint8_t>())) return 2;
167 if (!std::none_of(v2.begin(), v2.end(), is_even<uint8_t>())) return 3;
168
169// Every value in both vectors has to be in the original
170 for (auto v: v1)
171 if (std::find(data, data + size, v) == data + size) return 4;
172
173 for (auto v: v2)
174 if (std::find(data, data + size, v) == data + size) return 5;
175
176 return 0;
177}
178
179// == stable_partition ==
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000180int stable_partition (const uint8_t *data, size_t size)
181{
Marshall Clow974677c2017-10-30 19:51:58 +0000182 StableVec input;
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000183 for (size_t i = 0; i < size; ++i)
184 input.push_back(stable_test(data[i], i));
Marshall Clow974677c2017-10-30 19:51:58 +0000185 StableVec working = input;
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000186 auto iter = std::stable_partition(working.begin(), working.end(), is_even<stable_test>());
187
188 if (!std::all_of (working.begin(), iter, is_even<stable_test>())) return 1;
189 if (!std::none_of(iter, working.end(), is_even<stable_test>())) return 2;
190 if (!std::is_sorted(working.begin(), iter, payload_less())) return 3;
191 if (!std::is_sorted(iter, working.end(), payload_less())) return 4;
192 if (!std::is_permutation(input.begin(), input.end(), working.begin())) return 99;
193 return 0;
194}
195
196// == nth_element ==
197// use the first element as a position into the data
198int nth_element (const uint8_t *data, size_t size)
199{
200 if (size <= 1) return 0;
201 const size_t partition_point = data[0] % size;
Marshall Clow974677c2017-10-30 19:51:58 +0000202 Vec working(data + 1, data + size);
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000203 const auto partition_iter = working.begin() + partition_point;
204 std::nth_element(working.begin(), partition_iter, working.end());
205
206// nth may be the end iterator, in this case nth_element has no effect.
207 if (partition_iter == working.end())
208 {
209 if (!std::equal(data + 1, data + size, working.begin())) return 98;
210 }
211 else
212 {
213 const uint8_t nth = *partition_iter;
214 if (!std::all_of(working.begin(), partition_iter, [=](uint8_t v) { return v <= nth; }))
215 return 1;
216 if (!std::all_of(partition_iter, working.end(), [=](uint8_t v) { return v >= nth; }))
217 return 2;
218 if (!std::is_permutation(data + 1, data + size, working.begin())) return 99;
219 }
220
221 return 0;
222}
223
224// == partial_sort ==
225// use the first element as a position into the data
226int partial_sort (const uint8_t *data, size_t size)
227{
228 if (size <= 1) return 0;
229 const size_t sort_point = data[0] % size;
Marshall Clow974677c2017-10-30 19:51:58 +0000230 Vec working(data + 1, data + size);
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000231 const auto sort_iter = working.begin() + sort_point;
232 std::partial_sort(working.begin(), sort_iter, working.end());
233
234 if (sort_iter != working.end())
235 {
236 const uint8_t nth = *std::min_element(sort_iter, working.end());
237 if (!std::all_of(working.begin(), sort_iter, [=](uint8_t v) { return v <= nth; }))
238 return 1;
239 if (!std::all_of(sort_iter, working.end(), [=](uint8_t v) { return v >= nth; }))
240 return 2;
241 }
242 if (!std::is_sorted(working.begin(), sort_iter)) return 3;
243 if (!std::is_permutation(data + 1, data + size, working.begin())) return 99;
244
245 return 0;
246}
247
Marshall Clowd84736b2017-10-12 14:48:09 +0000248
Marshall Clow974677c2017-10-30 19:51:58 +0000249// == partial_sort_copy ==
250// use the first element as a count
251int partial_sort_copy (const uint8_t *data, size_t size)
252{
253 if (size <= 1) return 0;
254 const size_t num_results = data[0] % size;
255 Vec results(num_results);
256 (void) std::partial_sort_copy(data + 1, data + size, results.begin(), results.end());
Marshall Clowd84736b2017-10-12 14:48:09 +0000257
Marshall Clow974677c2017-10-30 19:51:58 +0000258// The results have to be sorted
259 if (!std::is_sorted(results.begin(), results.end())) return 1;
260// All the values in results have to be in the original data
261 for (auto v: results)
262 if (std::find(data + 1, data + size, v) == data + size) return 2;
263
264// The things in results have to be the smallest N in the original data
265 Vec sorted(data + 1, data + size);
266 std::sort(sorted.begin(), sorted.end());
267 if (!std::equal(results.begin(), results.end(), sorted.begin())) return 3;
268 return 0;
269}
270
271// The second sequence has been "uniqued"
272template <typename Iter1, typename Iter2>
273static bool compare_unique(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2)
274{
275 assert(first1 != last1 && first2 != last2);
276 if (*first1 != *first2) return false;
277
278 uint8_t last_value = *first1;
279 ++first1; ++first2;
280 while(first1 != last1 && first2 != last2)
281 {
282 // Skip over dups in the first sequence
283 while (*first1 == last_value)
284 if (++first1 == last1) return false;
285 if (*first1 != *first2) return false;
286 last_value = *first1;
287 ++first1; ++first2;
288 }
289
290// Still stuff left in the 'uniqued' sequence - oops
291 if (first1 == last1 && first2 != last2) return false;
292
293// Still stuff left in the original sequence - better be all the same
294 while (first1 != last1)
295 {
296 if (*first1 != last_value) return false;
297 ++first1;
298 }
299 return true;
300}
301
302// == unique ==
303int unique (const uint8_t *data, size_t size)
304{
305 Vec working(data, data + size);
306 std::sort(working.begin(), working.end());
307 Vec results = working;
308 Vec::iterator new_end = std::unique(results.begin(), results.end());
309 Vec::iterator it; // scratch iterator
310
311// Check the size of the unique'd sequence.
312// it should only be zero if the input sequence was empty.
313 if (results.begin() == new_end)
314 return working.size() == 0 ? 0 : 1;
315
316// 'results' is sorted
317 if (!std::is_sorted(results.begin(), new_end)) return 2;
318
319// All the elements in 'results' must be different
320 it = results.begin();
321 uint8_t prev_value = *it++;
322 for (; it != new_end; ++it)
323 {
324 if (*it == prev_value) return 3;
325 prev_value = *it;
326 }
327
328// Every element in 'results' must be in 'working'
329 for (it = results.begin(); it != new_end; ++it)
330 if (std::find(working.begin(), working.end(), *it) == working.end())
331 return 4;
332
333// Every element in 'working' must be in 'results'
334 for (auto v : working)
335 if (std::find(results.begin(), new_end, v) == new_end)
336 return 5;
337
338 return 0;
339}
340
341// == unique_copy ==
342int unique_copy (const uint8_t *data, size_t size)
343{
344 Vec working(data, data + size);
345 std::sort(working.begin(), working.end());
346 Vec results;
347 (void) std::unique_copy(working.begin(), working.end(),
348 std::back_inserter<Vec>(results));
349 Vec::iterator it; // scratch iterator
350
351// Check the size of the unique'd sequence.
352// it should only be zero if the input sequence was empty.
353 if (results.size() == 0)
354 return working.size() == 0 ? 0 : 1;
355
356// 'results' is sorted
357 if (!std::is_sorted(results.begin(), results.end())) return 2;
358
359// All the elements in 'results' must be different
360 it = results.begin();
361 uint8_t prev_value = *it++;
362 for (; it != results.end(); ++it)
363 {
364 if (*it == prev_value) return 3;
365 prev_value = *it;
366 }
367
368// Every element in 'results' must be in 'working'
369 for (auto v : results)
370 if (std::find(working.begin(), working.end(), v) == working.end())
371 return 4;
372
373// Every element in 'working' must be in 'results'
374 for (auto v : working)
375 if (std::find(results.begin(), results.end(), v) == results.end())
376 return 5;
377
378 return 0;
379}
380
381
382// -- regex fuzzers
Marshall Clowd84736b2017-10-12 14:48:09 +0000383static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag)
384{
385 if (size > 0)
386 {
387 try
388 {
389 std::string s((const char *)data, size);
390 std::regex re(s, flag);
391 return std::regex_match(s, re) ? 1 : 0;
392 }
393 catch (std::regex_error &ex) {}
394 }
395 return 0;
396}
397
398
399int regex_ECMAScript (const uint8_t *data, size_t size)
400{
401 (void) regex_helper(data, size, std::regex_constants::ECMAScript);
402 return 0;
403}
404
405int regex_POSIX (const uint8_t *data, size_t size)
406{
407 (void) regex_helper(data, size, std::regex_constants::basic);
408 return 0;
409}
410
411int regex_extended (const uint8_t *data, size_t size)
412{
413 (void) regex_helper(data, size, std::regex_constants::extended);
414 return 0;
415}
416
417int regex_awk (const uint8_t *data, size_t size)
418{
419 (void) regex_helper(data, size, std::regex_constants::awk);
420 return 0;
421}
422
423int regex_grep (const uint8_t *data, size_t size)
424{
425 (void) regex_helper(data, size, std::regex_constants::grep);
426 return 0;
427}
428
429int regex_egrep (const uint8_t *data, size_t size)
430{
431 (void) regex_helper(data, size, std::regex_constants::egrep);
432 return 0;
433}
434
Marshall Clowa97ab842017-10-23 23:19:30 +0000435// -- heap fuzzers
436int make_heap (const uint8_t *data, size_t size)
437{
Marshall Clow974677c2017-10-30 19:51:58 +0000438 Vec working(data, data + size);
Marshall Clowa97ab842017-10-23 23:19:30 +0000439 std::make_heap(working.begin(), working.end());
440
441 if (!std::is_heap(working.begin(), working.end())) return 1;
442 if (!std::is_permutation(data, data + size, working.begin())) return 99;
443 return 0;
444}
445
446int push_heap (const uint8_t *data, size_t size)
447{
448 if (size < 2) return 0;
449
450// Make a heap from the first half of the data
Marshall Clow974677c2017-10-30 19:51:58 +0000451 Vec working(data, data + size);
Marshall Clowa97ab842017-10-23 23:19:30 +0000452 auto iter = working.begin() + (size / 2);
453 std::make_heap(working.begin(), iter);
454 if (!std::is_heap(working.begin(), iter)) return 1;
455
456// Now push the rest onto the heap, one at a time
457 ++iter;
458 for (; iter != working.end(); ++iter) {
459 std::push_heap(working.begin(), iter);
460 if (!std::is_heap(working.begin(), iter)) return 2;
461 }
462
463 if (!std::is_permutation(data, data + size, working.begin())) return 99;
464 return 0;
465}
466
467int pop_heap (const uint8_t *data, size_t size)
468{
469 if (size < 2) return 0;
Marshall Clow974677c2017-10-30 19:51:58 +0000470 Vec working(data, data + size);
Marshall Clowa97ab842017-10-23 23:19:30 +0000471 std::make_heap(working.begin(), working.end());
472
473// Pop things off, one at a time
474 auto iter = --working.end();
475 while (iter != working.begin()) {
476 std::pop_heap(working.begin(), iter);
477 if (!std::is_heap(working.begin(), --iter)) return 2;
478 }
479
480 return 0;
481}
482
483
484// -- search fuzzers
485int search (const uint8_t *data, size_t size)
486{
487 if (size < 2) return 0;
488
489 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
490 assert(pat_size <= size - 1);
491 const uint8_t *pat_begin = data + 1;
492 const uint8_t *pat_end = pat_begin + pat_size;
493 const uint8_t *data_end = data + size;
494 assert(pat_end <= data_end);
495// std::cerr << "data[0] = " << size_t(data[0]) << " ";
496// std::cerr << "Pattern size = " << pat_size << "; corpus is " << size - 1 << std::endl;
497 auto it = std::search(pat_end, data_end, pat_begin, pat_end);
498 if (it != data_end) // not found
499 if (!std::equal(pat_begin, pat_end, it))
500 return 1;
501 return 0;
502}
503
504template <typename S>
505static int search_helper (const uint8_t *data, size_t size)
506{
507 if (size < 2) return 0;
508
509 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
510 const uint8_t *pat_begin = data + 1;
511 const uint8_t *pat_end = pat_begin + pat_size;
512 const uint8_t *data_end = data + size;
513
514 auto it = std::search(pat_end, data_end, S(pat_begin, pat_end));
515 if (it != data_end) // not found
516 if (!std::equal(pat_begin, pat_end, it))
517 return 1;
518 return 0;
519}
520
521// These are still in std::experimental
522// int search_boyer_moore (const uint8_t *data, size_t size)
523// {
524// return search_helper<std::boyer_moore_searcher<const uint8_t *>>(data, size);
525// }
526//
527// int search_boyer_moore_horspool (const uint8_t *data, size_t size)
528// {
529// return search_helper<std::boyer_moore_horspool_searcher<const uint8_t *>>(data, size);
530// }
531
Marshall Clow974677c2017-10-30 19:51:58 +0000532
533// -- set operation fuzzers
534template <typename S>
535static void set_helper (const uint8_t *data, size_t size, Vec &v1, Vec &v2)
536{
537 assert(size > 1);
538
539 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
540 const uint8_t *pat_begin = data + 1;
541 const uint8_t *pat_end = pat_begin + pat_size;
542 const uint8_t *data_end = data + size;
543 v1.assign(pat_begin, pat_end);
544 v2.assign(pat_end, data_end);
545
546 std::sort(v1.begin(), v1.end());
547 std::sort(v2.begin(), v2.end());
548}
549
Marshall Clowbaa6fb32017-10-04 22:23:03 +0000550} // namespace fuzzing