blob: 989675e81d95f029082eaec11cc99a6add678a7f [file] [log] [blame]
kccdbf20a02018-05-10 19:59:01 +00001/*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
2//
chandlerc40284492019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
kccdbf20a02018-05-10 19:59:01 +00006//
7//===----------------------------------------------------------------------===//
8// An experimental data-flow tracer for fuzz targets.
9// It is based on DFSan and SanitizerCoverage.
10// https://clang.llvm.org/docs/DataFlowSanitizer.html
11// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
12//
13// It executes the fuzz target on the given input while monitoring the
14// data flow for every instrumented comparison instruction.
15//
kccce6392a2019-05-08 00:51:15 +000016// The output shows which functions depend on which bytes of the input,
17// and also provides basic-block coverage for every input.
kccdbf20a02018-05-10 19:59:01 +000018//
19// Build:
20// 1. Compile this file with -fsanitize=dataflow
21// 2. Build the fuzz target with -g -fsanitize=dataflow
kccce6392a2019-05-08 00:51:15 +000022// -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
kccdbf20a02018-05-10 19:59:01 +000023// 3. Link those together with -fsanitize=dataflow
24//
25// -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
26// instruction, DFSan modifies the calls to pass the data flow labels.
27// The callbacks update the data flow label for the current function.
28// See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
29//
kccce6392a2019-05-08 00:51:15 +000030// -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function
kccdbf20a02018-05-10 19:59:01 +000031// entries so that the comparison callback knows that current function.
kccce6392a2019-05-08 00:51:15 +000032// -fsanitize-coverage=...,bb also allows to collect basic block coverage.
kccdbf20a02018-05-10 19:59:01 +000033//
34//
35// Run:
kccce6392a2019-05-08 00:51:15 +000036// # Collect data flow and coverage for INPUT_FILE
37// # write to OUTPUT_FILE (default: stdout)
hans39ed0342019-06-14 07:32:22 +000038// ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE]
kccdbf20a02018-05-10 19:59:01 +000039//
40// # Print all instrumented functions. llvm-symbolizer must be present in PATH
41// ./a.out
42//
43// Example output:
44// ===============
kcc54dce2c2018-05-23 20:57:11 +000045// F0 11111111111111
46// F1 10000000000000
kcc45fa3552019-05-08 17:20:09 +000047// C0 1 2 3 4 5
48// C1 8
kccdbf20a02018-05-10 19:59:01 +000049// ===============
kcc54dce2c2018-05-23 20:57:11 +000050// "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
hans39ed0342019-06-14 07:32:22 +000051// The byte string is LEN+1 bytes. The last byte is set if the function
52// depends on the input length.
kcc45fa3552019-05-08 17:20:09 +000053// "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
54// in addition to the function's entry block, out of T total instrumented
55// blocks.
kccce6392a2019-05-08 00:51:15 +000056//
kccdbf20a02018-05-10 19:59:01 +000057//===----------------------------------------------------------------------===*/
58
59#include <assert.h>
60#include <stdio.h>
61#include <stdlib.h>
62#include <stdint.h>
63#include <string.h>
64
65#include <execinfo.h> // backtrace_symbols_fd
66
67#include <sanitizer/dfsan_interface.h>
68
69extern "C" {
70extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
71__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
72} // extern "C"
73
74static size_t InputLen;
hans39ed0342019-06-14 07:32:22 +000075static size_t InputLabelBeg;
76static size_t InputLabelEnd;
77static size_t InputSizeLabel;
kccce6392a2019-05-08 00:51:15 +000078static size_t NumFuncs, NumGuards;
79static uint32_t *GuardsBeg, *GuardsEnd;
80static const uintptr_t *PCsBeg, *PCsEnd;
hans39ed0342019-06-14 07:32:22 +000081static __thread size_t CurrentFunc;
82static dfsan_label *FuncLabels; // Array of NumFuncs elements.
kccce6392a2019-05-08 00:51:15 +000083static bool *BBExecuted; // Array of NumGuards elements.
hans39ed0342019-06-14 07:32:22 +000084static char *PrintableStringForLabel; // InputLen + 2 bytes.
85static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
kccdbf20a02018-05-10 19:59:01 +000086
kccce6392a2019-05-08 00:51:15 +000087enum {
88 PCFLAG_FUNC_ENTRY = 1,
89};
90
kcc45fa3552019-05-08 17:20:09 +000091static inline bool BlockIsEntry(size_t BlockIdx) {
92 return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
93}
94
kccdbf20a02018-05-10 19:59:01 +000095// Prints all instrumented functions.
kcc54dce2c2018-05-23 20:57:11 +000096static int PrintFunctions() {
kccdbf20a02018-05-10 19:59:01 +000097 // We don't have the symbolizer integrated with dfsan yet.
98 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
99 // TODO(kcc): this is pretty ugly and may break in lots of ways.
100 // We'll need to make a proper in-process symbolizer work with DFSan.
101 FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
102 "| llvm-symbolizer "
103 "| grep 'dfs\\$' "
104 "| sed 's/dfs\\$//g'", "w");
kccce6392a2019-05-08 00:51:15 +0000105 for (size_t I = 0; I < NumGuards; I++) {
106 uintptr_t PC = PCsBeg[I * 2];
kcc45fa3552019-05-08 17:20:09 +0000107 if (!BlockIsEntry(I)) continue;
kccdbf20a02018-05-10 19:59:01 +0000108 void *const Buf[1] = {(void*)PC};
109 backtrace_symbols_fd(Buf, 1, fileno(Pipe));
110 }
111 pclose(Pipe);
112 return 0;
113}
114
hans39ed0342019-06-14 07:32:22 +0000115extern "C"
116void SetBytesForLabel(dfsan_label L, char *Bytes) {
117 if (LabelSeen[L])
118 return;
119 LabelSeen[L] = true;
120 assert(L);
121 if (L < InputSizeLabel) {
122 Bytes[L + InputLabelBeg - 1] = '1';
123 } else if (L == InputSizeLabel) {
124 Bytes[InputLen] = '1';
125 } else {
126 auto *DLI = dfsan_get_label_info(L);
127 SetBytesForLabel(DLI->l1, Bytes);
128 SetBytesForLabel(DLI->l2, Bytes);
129 }
130}
131
132static char *GetPrintableStringForLabel(dfsan_label L) {
133 memset(PrintableStringForLabel, '0', InputLen + 1);
134 PrintableStringForLabel[InputLen + 1] = 0;
135 memset(LabelSeen, 0, sizeof(LabelSeen));
136 SetBytesForLabel(L, PrintableStringForLabel);
137 return PrintableStringForLabel;
kcc54dce2c2018-05-23 20:57:11 +0000138}
139
140static void PrintDataFlow(FILE *Out) {
hans39ed0342019-06-14 07:32:22 +0000141 for (size_t I = 0; I < NumFuncs; I++)
142 if (FuncLabels[I])
143 fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
kccdbf20a02018-05-10 19:59:01 +0000144}
145
kccce6392a2019-05-08 00:51:15 +0000146static void PrintCoverage(FILE *Out) {
147 ssize_t CurrentFuncGuard = -1;
148 ssize_t CurrentFuncNum = -1;
kcc45fa3552019-05-08 17:20:09 +0000149 ssize_t NumBlocksInCurrentFunc = -1;
150 for (size_t FuncBeg = 0; FuncBeg < NumGuards;) {
151 CurrentFuncNum++;
152 assert(BlockIsEntry(FuncBeg));
153 size_t FuncEnd = FuncBeg + 1;
154 for (; FuncEnd < NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++)
155 ;
156 if (BBExecuted[FuncBeg]) {
157 fprintf(Out, "C%zd", CurrentFuncNum);
158 for (size_t I = FuncBeg + 1; I < FuncEnd; I++)
159 if (BBExecuted[I])
160 fprintf(Out, " %zd", I - FuncBeg);
161 fprintf(Out, " %zd\n", FuncEnd - FuncBeg);
kccce6392a2019-05-08 00:51:15 +0000162 }
kcc45fa3552019-05-08 17:20:09 +0000163 FuncBeg = FuncEnd;
kccce6392a2019-05-08 00:51:15 +0000164 }
kccce6392a2019-05-08 00:51:15 +0000165}
166
kccdbf20a02018-05-10 19:59:01 +0000167int main(int argc, char **argv) {
168 if (LLVMFuzzerInitialize)
169 LLVMFuzzerInitialize(&argc, &argv);
170 if (argc == 1)
171 return PrintFunctions();
hans39ed0342019-06-14 07:32:22 +0000172 assert(argc == 4 || argc == 5);
173 InputLabelBeg = atoi(argv[1]);
174 InputLabelEnd = atoi(argv[2]);
175 assert(InputLabelBeg < InputLabelEnd);
kccdbf20a02018-05-10 19:59:01 +0000176
hans39ed0342019-06-14 07:32:22 +0000177 const char *Input = argv[3];
kccdbf20a02018-05-10 19:59:01 +0000178 fprintf(stderr, "INFO: reading '%s'\n", Input);
179 FILE *In = fopen(Input, "r");
180 assert(In);
181 fseek(In, 0, SEEK_END);
182 InputLen = ftell(In);
183 fseek(In, 0, SEEK_SET);
184 unsigned char *Buf = (unsigned char*)malloc(InputLen);
185 size_t NumBytesRead = fread(Buf, 1, InputLen, In);
186 assert(NumBytesRead == InputLen);
hans39ed0342019-06-14 07:32:22 +0000187 PrintableStringForLabel = (char*)malloc(InputLen + 2);
kccdbf20a02018-05-10 19:59:01 +0000188 fclose(In);
189
hans39ed0342019-06-14 07:32:22 +0000190 fprintf(stderr, "INFO: running '%s'\n", Input);
191 for (size_t I = 1; I <= InputLen; I++) {
192 size_t Idx = I - 1;
193 if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
194 dfsan_label L = dfsan_create_label("", nullptr);
195 assert(L == I - InputLabelBeg);
196 dfsan_set_label(L, Buf + Idx, 1);
197 }
kcce39dc5c2019-06-13 21:17:49 +0000198 }
hans39ed0342019-06-14 07:32:22 +0000199 dfsan_label SizeL = dfsan_create_label("", nullptr);
200 InputSizeLabel = SizeL;
201 assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
202 dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
203
204 LLVMFuzzerTestOneInput(Buf, InputLen);
kccdbf20a02018-05-10 19:59:01 +0000205 free(Buf);
206
hans39ed0342019-06-14 07:32:22 +0000207 bool OutIsStdout = argc == 4;
kccdbf20a02018-05-10 19:59:01 +0000208 fprintf(stderr, "INFO: writing dataflow to %s\n",
hans39ed0342019-06-14 07:32:22 +0000209 OutIsStdout ? "<stdout>" : argv[4]);
210 FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
kccdbf20a02018-05-10 19:59:01 +0000211 PrintDataFlow(Out);
kccce6392a2019-05-08 00:51:15 +0000212 PrintCoverage(Out);
kccdbf20a02018-05-10 19:59:01 +0000213 if (!OutIsStdout) fclose(Out);
214}
215
216extern "C" {
217
218void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
219 uint32_t *stop) {
220 assert(NumFuncs == 0 && "This tool does not support DSOs");
221 assert(start < stop && "The code is not instrumented for coverage");
222 if (start == stop || *start) return; // Initialize only once.
kccce6392a2019-05-08 00:51:15 +0000223 GuardsBeg = start;
224 GuardsEnd = stop;
kccdbf20a02018-05-10 19:59:01 +0000225}
226
227void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
228 const uintptr_t *pcs_end) {
kccce6392a2019-05-08 00:51:15 +0000229 if (NumGuards) return; // Initialize only once.
230 NumGuards = GuardsEnd - GuardsBeg;
231 PCsBeg = pcs_beg;
232 PCsEnd = pcs_end;
233 assert(NumGuards == (PCsEnd - PCsBeg) / 2);
234 for (size_t i = 0; i < NumGuards; i++) {
kcc45fa3552019-05-08 17:20:09 +0000235 if (BlockIsEntry(i)) {
kccce6392a2019-05-08 00:51:15 +0000236 NumFuncs++;
237 GuardsBeg[i] = NumFuncs;
238 }
239 }
hans39ed0342019-06-14 07:32:22 +0000240 FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
kccce6392a2019-05-08 00:51:15 +0000241 BBExecuted = (bool*)calloc(NumGuards, sizeof(bool));
242 fprintf(stderr, "INFO: %zd instrumented function(s) observed "
243 "and %zd basic blocks\n", NumFuncs, NumGuards);
kccdbf20a02018-05-10 19:59:01 +0000244}
245
246void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused.
247
kccce6392a2019-05-08 00:51:15 +0000248void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
249 size_t GuardIdx = guard - GuardsBeg;
250 assert(GuardIdx < NumGuards);
251 BBExecuted[GuardIdx] = true;
252 if (!*guard) return; // not a function entry.
kccdbf20a02018-05-10 19:59:01 +0000253 uint32_t FuncNum = *guard - 1; // Guards start from 1.
254 assert(FuncNum < NumFuncs);
255 CurrentFunc = FuncNum;
256}
257
258void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
259 dfsan_label L1, dfsan_label UnusedL) {
260 assert(CurrentFunc < NumFuncs);
hans39ed0342019-06-14 07:32:22 +0000261 FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
kccdbf20a02018-05-10 19:59:01 +0000262}
263
264#define HOOK(Name, Type) \
265 void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
266 assert(CurrentFunc < NumFuncs); \
hans39ed0342019-06-14 07:32:22 +0000267 FuncLabels[CurrentFunc] = \
268 dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \
kccdbf20a02018-05-10 19:59:01 +0000269 }
270
271HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
272HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
273HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
274HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
275HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
276HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
277HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
278HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
279
280} // extern "C"