blob: e54885a1ebaef7ba792ebd992856d99df697b0ef [file] [log] [blame]
george.karpenkov29efa6d2017-08-21 23:25:50 +00001//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Merging Corpora.
10//
11// The task:
12// Take the existing corpus (possibly empty) and merge new inputs into
13// it so that only inputs with new coverage ('features') are added.
14// The process should tolerate the crashes, OOMs, leaks, etc.
15//
16// Algorithm:
17// The outter process collects the set of files and writes their names
18// into a temporary "control" file, then repeatedly launches the inner
19// process until all inputs are processed.
20// The outer process does not actually execute the target code.
21//
22// The inner process reads the control file and sees a) list of all the inputs
23// and b) the last processed input. Then it starts processing the inputs one
24// by one. Before processing every input it writes one line to control file:
25// STARTED INPUT_ID INPUT_SIZE
26// After processing an input it write another line:
27// DONE INPUT_ID Feature1 Feature2 Feature3 ...
28// If a crash happens while processing an input the last line in the control
29// file will be "STARTED INPUT_ID" and so the next process will know
30// where to resume.
31//
32// Once all inputs are processed by the innner process(es) the outer process
33// reads the control files and does the merge based entirely on the contents
34// of control file.
35// It uses a single pass greedy algorithm choosing first the smallest inputs
36// within the same size the inputs that have more new features.
37//
38//===----------------------------------------------------------------------===//
39
40#ifndef LLVM_FUZZER_MERGE_H
41#define LLVM_FUZZER_MERGE_H
42
43#include "FuzzerDefs.h"
44
45#include <istream>
46#include <ostream>
47#include <set>
48#include <vector>
49
50namespace fuzzer {
51
52struct MergeFileInfo {
53 std::string Name;
54 size_t Size = 0;
george.karpenkovfbfa45c2017-08-27 23:20:09 +000055 Vector<uint32_t> Features;
george.karpenkov29efa6d2017-08-21 23:25:50 +000056};
57
58struct Merger {
george.karpenkovfbfa45c2017-08-27 23:20:09 +000059 Vector<MergeFileInfo> Files;
george.karpenkov29efa6d2017-08-21 23:25:50 +000060 size_t NumFilesInFirstCorpus = 0;
61 size_t FirstNotProcessedFile = 0;
62 std::string LastFailure;
63
64 bool Parse(std::istream &IS, bool ParseCoverage);
65 bool Parse(const std::string &Str, bool ParseCoverage);
66 void ParseOrExit(std::istream &IS, bool ParseCoverage);
67 void PrintSummary(std::ostream &OS);
george.karpenkovfbfa45c2017-08-27 23:20:09 +000068 Set<uint32_t> ParseSummary(std::istream &IS);
69 size_t Merge(const Set<uint32_t> &InitialFeatures,
70 Vector<std::string> *NewFiles);
71 size_t Merge(Vector<std::string> *NewFiles) {
72 return Merge(Set<uint32_t>{}, NewFiles);
george.karpenkov29efa6d2017-08-21 23:25:50 +000073 }
74 size_t ApproximateMemoryConsumption() const;
george.karpenkovfbfa45c2017-08-27 23:20:09 +000075 Set<uint32_t> AllFeatures() const;
george.karpenkov29efa6d2017-08-21 23:25:50 +000076};
77
78} // namespace fuzzer
79
80#endif // LLVM_FUZZER_MERGE_H