blob: 49031d3b408209f8bb6a9b564c04fd9b4a6ba031 [file] [log] [blame]
george.karpenkov29efa6d2017-08-21 23:25:50 +00001//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2//
chandlerc40284492019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
george.karpenkov29efa6d2017-08-21 23:25:50 +00006//
7//===----------------------------------------------------------------------===//
8// Merging Corpora.
9//
10// The task:
11// Take the existing corpus (possibly empty) and merge new inputs into
12// it so that only inputs with new coverage ('features') are added.
13// The process should tolerate the crashes, OOMs, leaks, etc.
14//
15// Algorithm:
16// The outter process collects the set of files and writes their names
17// into a temporary "control" file, then repeatedly launches the inner
18// process until all inputs are processed.
19// The outer process does not actually execute the target code.
20//
21// The inner process reads the control file and sees a) list of all the inputs
22// and b) the last processed input. Then it starts processing the inputs one
23// by one. Before processing every input it writes one line to control file:
24// STARTED INPUT_ID INPUT_SIZE
25// After processing an input it write another line:
26// DONE INPUT_ID Feature1 Feature2 Feature3 ...
27// If a crash happens while processing an input the last line in the control
28// file will be "STARTED INPUT_ID" and so the next process will know
29// where to resume.
30//
31// Once all inputs are processed by the innner process(es) the outer process
32// reads the control files and does the merge based entirely on the contents
33// of control file.
34// It uses a single pass greedy algorithm choosing first the smallest inputs
35// within the same size the inputs that have more new features.
36//
37//===----------------------------------------------------------------------===//
38
39#ifndef LLVM_FUZZER_MERGE_H
40#define LLVM_FUZZER_MERGE_H
41
42#include "FuzzerDefs.h"
43
44#include <istream>
45#include <ostream>
46#include <set>
47#include <vector>
48
49namespace fuzzer {
50
51struct MergeFileInfo {
52 std::string Name;
53 size_t Size = 0;
george.karpenkovfbfa45c2017-08-27 23:20:09 +000054 Vector<uint32_t> Features;
george.karpenkov29efa6d2017-08-21 23:25:50 +000055};
56
57struct Merger {
george.karpenkovfbfa45c2017-08-27 23:20:09 +000058 Vector<MergeFileInfo> Files;
george.karpenkov29efa6d2017-08-21 23:25:50 +000059 size_t NumFilesInFirstCorpus = 0;
60 size_t FirstNotProcessedFile = 0;
61 std::string LastFailure;
62
63 bool Parse(std::istream &IS, bool ParseCoverage);
64 bool Parse(const std::string &Str, bool ParseCoverage);
65 void ParseOrExit(std::istream &IS, bool ParseCoverage);
66 void PrintSummary(std::ostream &OS);
george.karpenkovfbfa45c2017-08-27 23:20:09 +000067 Set<uint32_t> ParseSummary(std::istream &IS);
68 size_t Merge(const Set<uint32_t> &InitialFeatures,
69 Vector<std::string> *NewFiles);
70 size_t Merge(Vector<std::string> *NewFiles) {
71 return Merge(Set<uint32_t>{}, NewFiles);
george.karpenkov29efa6d2017-08-21 23:25:50 +000072 }
73 size_t ApproximateMemoryConsumption() const;
george.karpenkovfbfa45c2017-08-27 23:20:09 +000074 Set<uint32_t> AllFeatures() const;
george.karpenkov29efa6d2017-08-21 23:25:50 +000075};
76
77} // namespace fuzzer
78
79#endif // LLVM_FUZZER_MERGE_H