blob: dd4c37b6e39c76771c36e7e5fba1cacdd7152c60 [file] [log] [blame]
george.karpenkov29efa6d2017-08-21 23:25:50 +00001//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Merging Corpora.
10//
11// The task:
12// Take the existing corpus (possibly empty) and merge new inputs into
13// it so that only inputs with new coverage ('features') are added.
14// The process should tolerate the crashes, OOMs, leaks, etc.
15//
16// Algorithm:
17// The outter process collects the set of files and writes their names
18// into a temporary "control" file, then repeatedly launches the inner
19// process until all inputs are processed.
20// The outer process does not actually execute the target code.
21//
22// The inner process reads the control file and sees a) list of all the inputs
23// and b) the last processed input. Then it starts processing the inputs one
24// by one. Before processing every input it writes one line to control file:
25// STARTED INPUT_ID INPUT_SIZE
26// After processing an input it write another line:
27// DONE INPUT_ID Feature1 Feature2 Feature3 ...
28// If a crash happens while processing an input the last line in the control
29// file will be "STARTED INPUT_ID" and so the next process will know
30// where to resume.
31//
32// Once all inputs are processed by the innner process(es) the outer process
33// reads the control files and does the merge based entirely on the contents
34// of control file.
35// It uses a single pass greedy algorithm choosing first the smallest inputs
36// within the same size the inputs that have more new features.
37//
38//===----------------------------------------------------------------------===//
39
40#ifndef LLVM_FUZZER_MERGE_H
41#define LLVM_FUZZER_MERGE_H
42
43#include "FuzzerDefs.h"
44
45#include <istream>
46#include <ostream>
47#include <set>
48#include <vector>
49
50namespace fuzzer {
51
52struct MergeFileInfo {
53 std::string Name;
54 size_t Size = 0;
55 std::vector<uint32_t> Features;
56};
57
58struct Merger {
59 std::vector<MergeFileInfo> Files;
60 size_t NumFilesInFirstCorpus = 0;
61 size_t FirstNotProcessedFile = 0;
62 std::string LastFailure;
63
64 bool Parse(std::istream &IS, bool ParseCoverage);
65 bool Parse(const std::string &Str, bool ParseCoverage);
66 void ParseOrExit(std::istream &IS, bool ParseCoverage);
67 void PrintSummary(std::ostream &OS);
68 std::set<uint32_t> ParseSummary(std::istream &IS);
69 size_t Merge(const std::set<uint32_t> &InitialFeatures,
70 std::vector<std::string> *NewFiles);
71 size_t Merge(std::vector<std::string> *NewFiles) {
72 return Merge(std::set<uint32_t>{}, NewFiles);
73 }
74 size_t ApproximateMemoryConsumption() const;
75 std::set<uint32_t> AllFeatures() const;
76};
77
78} // namespace fuzzer
79
80#endif // LLVM_FUZZER_MERGE_H