blob: 187a8e52cd59ecd65070f7ea96f7add9ed3e7dff [file] [log] [blame]
kccdbf20a02018-05-10 19:59:01 +00001/*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
2//
chandlerc40284492019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
kccdbf20a02018-05-10 19:59:01 +00006//
7//===----------------------------------------------------------------------===//
8// An experimental data-flow tracer for fuzz targets.
9// It is based on DFSan and SanitizerCoverage.
10// https://clang.llvm.org/docs/DataFlowSanitizer.html
11// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
12//
13// It executes the fuzz target on the given input while monitoring the
14// data flow for every instrumented comparison instruction.
15//
16// The output shows which functions depend on which bytes of the input.
17//
18// Build:
19// 1. Compile this file with -fsanitize=dataflow
20// 2. Build the fuzz target with -g -fsanitize=dataflow
21// -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
22// 3. Link those together with -fsanitize=dataflow
23//
24// -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
25// instruction, DFSan modifies the calls to pass the data flow labels.
26// The callbacks update the data flow label for the current function.
27// See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
28//
29// -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
30// entries so that the comparison callback knows that current function.
31//
32//
33// Run:
34// # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
35// ./a.out INPUT_FILE [OUTPUT_FILE]
36//
37// # Print all instrumented functions. llvm-symbolizer must be present in PATH
38// ./a.out
39//
40// Example output:
41// ===============
kcc54dce2c2018-05-23 20:57:11 +000042// F0 11111111111111
43// F1 10000000000000
kccdbf20a02018-05-10 19:59:01 +000044// ===============
kcc54dce2c2018-05-23 20:57:11 +000045// "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
46// The byte string is LEN+1 bytes. The last byte is set if the function
47// depends on the input length.
kccdbf20a02018-05-10 19:59:01 +000048//===----------------------------------------------------------------------===*/
49
50#include <assert.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <stdint.h>
54#include <string.h>
55
56#include <execinfo.h> // backtrace_symbols_fd
57
58#include <sanitizer/dfsan_interface.h>
59
60extern "C" {
61extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
62__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
63} // extern "C"
64
65static size_t InputLen;
dor1sd7a96a22019-04-12 21:00:12 +000066static size_t InputLabelBeg;
67static size_t InputLabelEnd;
68static size_t InputSizeLabel;
kccdbf20a02018-05-10 19:59:01 +000069static size_t NumFuncs;
70static const uintptr_t *FuncsBeg;
71static __thread size_t CurrentFunc;
72static dfsan_label *FuncLabels; // Array of NumFuncs elements.
kcc54dce2c2018-05-23 20:57:11 +000073static char *PrintableStringForLabel; // InputLen + 2 bytes.
kcc86e43882018-06-06 01:23:29 +000074static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
kccdbf20a02018-05-10 19:59:01 +000075
76// Prints all instrumented functions.
kcc54dce2c2018-05-23 20:57:11 +000077static int PrintFunctions() {
kccdbf20a02018-05-10 19:59:01 +000078 // We don't have the symbolizer integrated with dfsan yet.
79 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
80 // TODO(kcc): this is pretty ugly and may break in lots of ways.
81 // We'll need to make a proper in-process symbolizer work with DFSan.
82 FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
83 "| llvm-symbolizer "
84 "| grep 'dfs\\$' "
85 "| sed 's/dfs\\$//g'", "w");
86 for (size_t I = 0; I < NumFuncs; I++) {
87 uintptr_t PC = FuncsBeg[I * 2];
88 void *const Buf[1] = {(void*)PC};
89 backtrace_symbols_fd(Buf, 1, fileno(Pipe));
90 }
91 pclose(Pipe);
92 return 0;
93}
94
kcc86e43882018-06-06 01:23:29 +000095extern "C"
96void SetBytesForLabel(dfsan_label L, char *Bytes) {
97 if (LabelSeen[L])
98 return;
99 LabelSeen[L] = true;
kccb8a127f2018-05-23 23:55:54 +0000100 assert(L);
dor1sd7a96a22019-04-12 21:00:12 +0000101 if (L < InputSizeLabel) {
102 Bytes[L + InputLabelBeg - 1] = '1';
103 } else if (L == InputSizeLabel) {
104 Bytes[InputLen] = '1';
kcc54dce2c2018-05-23 20:57:11 +0000105 } else {
kccdbf20a02018-05-10 19:59:01 +0000106 auto *DLI = dfsan_get_label_info(L);
kcc54dce2c2018-05-23 20:57:11 +0000107 SetBytesForLabel(DLI->l1, Bytes);
108 SetBytesForLabel(DLI->l2, Bytes);
kccdbf20a02018-05-10 19:59:01 +0000109 }
kcc54dce2c2018-05-23 20:57:11 +0000110}
111
112static char *GetPrintableStringForLabel(dfsan_label L) {
113 memset(PrintableStringForLabel, '0', InputLen + 1);
114 PrintableStringForLabel[InputLen + 1] = 0;
kcc86e43882018-06-06 01:23:29 +0000115 memset(LabelSeen, 0, sizeof(LabelSeen));
kcc54dce2c2018-05-23 20:57:11 +0000116 SetBytesForLabel(L, PrintableStringForLabel);
117 return PrintableStringForLabel;
118}
119
120static void PrintDataFlow(FILE *Out) {
kccdbf20a02018-05-10 19:59:01 +0000121 for (size_t I = 0; I < NumFuncs; I++)
122 if (FuncLabels[I])
kcc54dce2c2018-05-23 20:57:11 +0000123 fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
kccdbf20a02018-05-10 19:59:01 +0000124}
125
126int main(int argc, char **argv) {
127 if (LLVMFuzzerInitialize)
128 LLVMFuzzerInitialize(&argc, &argv);
129 if (argc == 1)
130 return PrintFunctions();
kcc0a250e22018-05-24 01:43:48 +0000131 assert(argc == 4 || argc == 5);
dor1sd7a96a22019-04-12 21:00:12 +0000132 InputLabelBeg = atoi(argv[1]);
133 InputLabelEnd = atoi(argv[2]);
134 assert(InputLabelBeg < InputLabelEnd);
kccdbf20a02018-05-10 19:59:01 +0000135
kcc0a250e22018-05-24 01:43:48 +0000136 const char *Input = argv[3];
kccdbf20a02018-05-10 19:59:01 +0000137 fprintf(stderr, "INFO: reading '%s'\n", Input);
138 FILE *In = fopen(Input, "r");
139 assert(In);
140 fseek(In, 0, SEEK_END);
141 InputLen = ftell(In);
142 fseek(In, 0, SEEK_SET);
143 unsigned char *Buf = (unsigned char*)malloc(InputLen);
144 size_t NumBytesRead = fread(Buf, 1, InputLen, In);
145 assert(NumBytesRead == InputLen);
kcc54dce2c2018-05-23 20:57:11 +0000146 PrintableStringForLabel = (char*)malloc(InputLen + 2);
kccdbf20a02018-05-10 19:59:01 +0000147 fclose(In);
148
149 fprintf(stderr, "INFO: running '%s'\n", Input);
150 for (size_t I = 1; I <= InputLen; I++) {
kcc0a250e22018-05-24 01:43:48 +0000151 size_t Idx = I - 1;
dor1sd7a96a22019-04-12 21:00:12 +0000152 if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
153 dfsan_label L = dfsan_create_label("", nullptr);
154 assert(L == I - InputLabelBeg);
kcc0a250e22018-05-24 01:43:48 +0000155 dfsan_set_label(L, Buf + Idx, 1);
dor1sd7a96a22019-04-12 21:00:12 +0000156 }
kccdbf20a02018-05-10 19:59:01 +0000157 }
158 dfsan_label SizeL = dfsan_create_label("", nullptr);
dor1sd7a96a22019-04-12 21:00:12 +0000159 InputSizeLabel = SizeL;
160 assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
kccdbf20a02018-05-10 19:59:01 +0000161 dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
162
163 LLVMFuzzerTestOneInput(Buf, InputLen);
164 free(Buf);
165
kcc0a250e22018-05-24 01:43:48 +0000166 bool OutIsStdout = argc == 4;
kccdbf20a02018-05-10 19:59:01 +0000167 fprintf(stderr, "INFO: writing dataflow to %s\n",
kcc0a250e22018-05-24 01:43:48 +0000168 OutIsStdout ? "<stdout>" : argv[4]);
169 FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
kccdbf20a02018-05-10 19:59:01 +0000170 PrintDataFlow(Out);
171 if (!OutIsStdout) fclose(Out);
172}
173
174extern "C" {
175
176void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
177 uint32_t *stop) {
178 assert(NumFuncs == 0 && "This tool does not support DSOs");
179 assert(start < stop && "The code is not instrumented for coverage");
180 if (start == stop || *start) return; // Initialize only once.
181 for (uint32_t *x = start; x < stop; x++)
182 *x = ++NumFuncs; // The first index is 1.
183 FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
184 fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
185}
186
187void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
188 const uintptr_t *pcs_end) {
189 assert(NumFuncs == (pcs_end - pcs_beg) / 2);
190 FuncsBeg = pcs_beg;
191}
192
193void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused.
194
195void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
196 uint32_t FuncNum = *guard - 1; // Guards start from 1.
197 assert(FuncNum < NumFuncs);
198 CurrentFunc = FuncNum;
199}
200
201void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
202 dfsan_label L1, dfsan_label UnusedL) {
203 assert(CurrentFunc < NumFuncs);
204 FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
205}
206
207#define HOOK(Name, Type) \
208 void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
209 assert(CurrentFunc < NumFuncs); \
210 FuncLabels[CurrentFunc] = \
211 dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \
212 }
213
214HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
215HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
216HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
217HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
218HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
219HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
220HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
221HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
222
223} // extern "C"