blob: c14dd589e62dd214cea7605664ae9ace63767764 [file] [log] [blame]
george.karpenkov29efa6d2017-08-21 23:25:50 +00001//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2//
chandlerc40284492019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
george.karpenkov29efa6d2017-08-21 23:25:50 +00006//
7//===----------------------------------------------------------------------===//
8// Merging Corpora.
9//
10// The task:
11// Take the existing corpus (possibly empty) and merge new inputs into
12// it so that only inputs with new coverage ('features') are added.
13// The process should tolerate the crashes, OOMs, leaks, etc.
14//
15// Algorithm:
16// The outter process collects the set of files and writes their names
17// into a temporary "control" file, then repeatedly launches the inner
18// process until all inputs are processed.
19// The outer process does not actually execute the target code.
20//
21// The inner process reads the control file and sees a) list of all the inputs
22// and b) the last processed input. Then it starts processing the inputs one
23// by one. Before processing every input it writes one line to control file:
24// STARTED INPUT_ID INPUT_SIZE
25// After processing an input it write another line:
26// DONE INPUT_ID Feature1 Feature2 Feature3 ...
27// If a crash happens while processing an input the last line in the control
28// file will be "STARTED INPUT_ID" and so the next process will know
29// where to resume.
30//
31// Once all inputs are processed by the innner process(es) the outer process
32// reads the control files and does the merge based entirely on the contents
33// of control file.
34// It uses a single pass greedy algorithm choosing first the smallest inputs
35// within the same size the inputs that have more new features.
36//
37//===----------------------------------------------------------------------===//
38
39#ifndef LLVM_FUZZER_MERGE_H
40#define LLVM_FUZZER_MERGE_H
41
42#include "FuzzerDefs.h"
43
44#include <istream>
45#include <ostream>
46#include <set>
47#include <vector>
48
49namespace fuzzer {
50
51struct MergeFileInfo {
52 std::string Name;
53 size_t Size = 0;
kcc98a86242019-02-15 00:08:16 +000054 Vector<uint32_t> Features, Cov;
george.karpenkov29efa6d2017-08-21 23:25:50 +000055};
56
57struct Merger {
george.karpenkovfbfa45c2017-08-27 23:20:09 +000058 Vector<MergeFileInfo> Files;
george.karpenkov29efa6d2017-08-21 23:25:50 +000059 size_t NumFilesInFirstCorpus = 0;
60 size_t FirstNotProcessedFile = 0;
61 std::string LastFailure;
62
63 bool Parse(std::istream &IS, bool ParseCoverage);
64 bool Parse(const std::string &Str, bool ParseCoverage);
65 void ParseOrExit(std::istream &IS, bool ParseCoverage);
kcc95a78ad2019-02-15 00:15:13 +000066 size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
67 const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
68 Vector<std::string> *NewFiles);
george.karpenkov29efa6d2017-08-21 23:25:50 +000069 size_t ApproximateMemoryConsumption() const;
george.karpenkovfbfa45c2017-08-27 23:20:09 +000070 Set<uint32_t> AllFeatures() const;
george.karpenkov29efa6d2017-08-21 23:25:50 +000071};
72
kcc4b5aa122019-02-09 00:16:21 +000073void CrashResistantMerge(const Vector<std::string> &Args,
74 const Vector<SizedFile> &OldCorpus,
75 const Vector<SizedFile> &NewCorpus,
76 Vector<std::string> *NewFiles,
77 const Set<uint32_t> &InitialFeatures,
78 Set<uint32_t> *NewFeatures,
kcc98a86242019-02-15 00:08:16 +000079 const Set<uint32_t> &InitialCov,
80 Set<uint32_t> *NewCov,
kccbfb59752019-02-12 03:12:40 +000081 const std::string &CFPath,
82 bool Verbose);
kcca3815862019-02-08 21:27:23 +000083
george.karpenkov29efa6d2017-08-21 23:25:50 +000084} // namespace fuzzer
85
86#endif // LLVM_FUZZER_MERGE_H