blob: 2b9171af9a15f77731f9b2e88c0cfd8b6247c34c [file] [log] [blame]
dor1s6fb30862019-06-11 14:30:18 +00001//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// A single header library providing an utility class to break up an array of
dor1sf1a25802019-06-18 20:29:11 +00009// bytes. Whenever run on the same input, provides the same output, as long as
10// its methods are called in the same order, with the same arguments.
dor1s6fb30862019-06-11 14:30:18 +000011//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
14#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
15
16#include <limits.h>
17#include <stddef.h>
18#include <stdint.h>
19
20#include <algorithm>
21#include <cstring>
dor1sf1a25802019-06-18 20:29:11 +000022#include <initializer_list>
dor1s6fb30862019-06-11 14:30:18 +000023#include <string>
24#include <type_traits>
25#include <utility>
26#include <vector>
27
28class FuzzedDataProvider {
dor1sf1a25802019-06-18 20:29:11 +000029public:
dor1s6fb30862019-06-11 14:30:18 +000030 // |data| is an array of length |size| that the FuzzedDataProvider wraps to
31 // provide more granular access. |data| must outlive the FuzzedDataProvider.
dor1sf1a25802019-06-18 20:29:11 +000032 FuzzedDataProvider(const uint8_t *data, size_t size)
dor1s6fb30862019-06-11 14:30:18 +000033 : data_ptr_(data), remaining_bytes_(size) {}
34 ~FuzzedDataProvider() = default;
35
36 // Returns a std::vector containing |num_bytes| of input data. If fewer than
37 // |num_bytes| of data remain, returns a shorter std::vector containing all
dor1sf1a25802019-06-18 20:29:11 +000038 // of the data that's left. Can be used with any byte sized type, such as
39 // char, unsigned char, uint8_t, etc.
40 template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
dor1s6fb30862019-06-11 14:30:18 +000041 num_bytes = std::min(num_bytes, remaining_bytes_);
dor1sf1a25802019-06-18 20:29:11 +000042 return ConsumeBytes<T>(num_bytes, num_bytes);
43 }
dor1s6fb30862019-06-11 14:30:18 +000044
dor1sf1a25802019-06-18 20:29:11 +000045 // Similar to |ConsumeBytes|, but also appends the terminator value at the end
46 // of the resulting vector. Useful, when a mutable null-terminated C-string is
47 // needed, for example. But that is a rare case. Better avoid it, if possible,
48 // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
49 template <typename T>
50 std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
51 T terminator = 0) {
52 num_bytes = std::min(num_bytes, remaining_bytes_);
53 std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
54 result.back() = terminator;
dor1s6fb30862019-06-11 14:30:18 +000055 return result;
56 }
57
dor1sf1a25802019-06-18 20:29:11 +000058 // Returns a std::string containing |num_bytes| of input data. Using this and
59 // |.c_str()| on the resulting string is the best way to get an immutable
60 // null-terminated C string. If fewer than |num_bytes| of data remain, returns
61 // a shorter std::string containing all of the data that's left.
dor1s6fb30862019-06-11 14:30:18 +000062 std::string ConsumeBytesAsString(size_t num_bytes) {
dor1sf1a25802019-06-18 20:29:11 +000063 static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
dor1s6fb30862019-06-11 14:30:18 +000064 "ConsumeBytesAsString cannot convert the data to a string.");
65
66 num_bytes = std::min(num_bytes, remaining_bytes_);
67 std::string result(
dor1sf1a25802019-06-18 20:29:11 +000068 reinterpret_cast<const std::string::value_type *>(data_ptr_),
69 num_bytes);
dor1s6fb30862019-06-11 14:30:18 +000070 Advance(num_bytes);
71 return result;
72 }
73
dor1sf1a25802019-06-18 20:29:11 +000074 // Returns a number in the range [min, max] by consuming bytes from the
75 // input data. The value might not be uniformly distributed in the given
76 // range. If there's no input data left, always returns |min|. |min| must
77 // be less than or equal to |max|.
78 template <typename T> T ConsumeIntegralInRange(T min, T max) {
dor1s6fb30862019-06-11 14:30:18 +000079 static_assert(std::is_integral<T>::value, "An integral type is required.");
80 static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
81
82 if (min > max)
83 abort();
84
85 // Use the biggest type possible to hold the range and the result.
86 uint64_t range = static_cast<uint64_t>(max) - min;
87 uint64_t result = 0;
88 size_t offset = 0;
89
90 while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
91 remaining_bytes_ != 0) {
92 // Pull bytes off the end of the seed data. Experimentally, this seems to
93 // allow the fuzzer to more easily explore the input space. This makes
94 // sense, since it works by modifying inputs that caused new code to run,
95 // and this data is often used to encode length of data read by
96 // |ConsumeBytes|. Separating out read lengths makes it easier modify the
97 // contents of the data that is actually read.
98 --remaining_bytes_;
99 result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
100 offset += CHAR_BIT;
101 }
102
dor1sf1a25802019-06-18 20:29:11 +0000103 // Avoid division by 0, in case |range + 1| results in overflow.
dor1s6fb30862019-06-11 14:30:18 +0000104 if (range != std::numeric_limits<decltype(range)>::max())
105 result = result % (range + 1);
106
107 return static_cast<T>(min + result);
108 }
109
110 // Returns a std::string of length from 0 to |max_length|. When it runs out of
111 // input data, returns what remains of the input. Designed to be more stable
112 // with respect to a fuzzer inserting characters than just picking a random
113 // length and then consuming that many bytes with |ConsumeBytes|.
114 std::string ConsumeRandomLengthString(size_t max_length) {
115 // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
116 // followed by anything else to the end of the string. As a result of this
117 // logic, a fuzzer can insert characters into the string, and the string
118 // will be lengthened to include those new characters, resulting in a more
119 // stable fuzzer than picking the length of a string independently from
120 // picking its contents.
121 std::string result;
dor1sf1a25802019-06-18 20:29:11 +0000122
123 // Reserve the anticipated capaticity to prevent several reallocations.
124 result.reserve(std::min(max_length, remaining_bytes_));
dor1s6fb30862019-06-11 14:30:18 +0000125 for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
dor1sf1a25802019-06-18 20:29:11 +0000126 char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
dor1s6fb30862019-06-11 14:30:18 +0000127 Advance(1);
128 if (next == '\\' && remaining_bytes_ != 0) {
dor1sf1a25802019-06-18 20:29:11 +0000129 next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
dor1s6fb30862019-06-11 14:30:18 +0000130 Advance(1);
131 if (next != '\\')
dor1sf1a25802019-06-18 20:29:11 +0000132 break;
dor1s6fb30862019-06-11 14:30:18 +0000133 }
134 result += next;
135 }
136
137 result.shrink_to_fit();
138 return result;
139 }
140
141 // Returns a std::vector containing all remaining bytes of the input data.
dor1sf1a25802019-06-18 20:29:11 +0000142 template <typename T> std::vector<T> ConsumeRemainingBytes() {
dor1s6fb30862019-06-11 14:30:18 +0000143 return ConsumeBytes<T>(remaining_bytes_);
144 }
145
146 // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
147 // object.
148 // Returns a std::vector containing all remaining bytes of the input data.
149 std::string ConsumeRemainingBytesAsString() {
150 return ConsumeBytesAsString(remaining_bytes_);
151 }
152
153 // Returns a number in the range [Type's min, Type's max]. The value might
154 // not be uniformly distributed in the given range. If there's no input data
155 // left, always returns |min|.
dor1sf1a25802019-06-18 20:29:11 +0000156 template <typename T> T ConsumeIntegral() {
dor1s6fb30862019-06-11 14:30:18 +0000157 return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
158 std::numeric_limits<T>::max());
159 }
160
161 // Reads one byte and returns a bool, or false when no data remains.
162 bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
163
dor1sf1a25802019-06-18 20:29:11 +0000164 // Returns a copy of a value selected from a fixed-size |array|.
dor1s6fb30862019-06-11 14:30:18 +0000165 template <typename T, size_t size>
dor1sf1a25802019-06-18 20:29:11 +0000166 T PickValueInArray(const T (&array)[size]) {
167 static_assert(size > 0, "The array must be non empty.");
dor1s6fb30862019-06-11 14:30:18 +0000168 return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
169 }
170
dor1sf1a25802019-06-18 20:29:11 +0000171 template <typename T>
172 T PickValueInArray(std::initializer_list<const T> list) {
173 // static_assert(list.size() > 0, "The array must be non empty.");
174 return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
175 }
176
dor1s6fb30862019-06-11 14:30:18 +0000177 // Return an enum value. The enum must start at 0 and be contiguous. It must
178 // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
179 // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
dor1sf1a25802019-06-18 20:29:11 +0000180 template <typename T> T ConsumeEnum() {
dor1s6fb30862019-06-11 14:30:18 +0000181 static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
182 return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
183 0, static_cast<uint32_t>(T::kMaxValue)));
184 }
185
186 // Reports the remaining bytes available for fuzzed input.
187 size_t remaining_bytes() { return remaining_bytes_; }
188
dor1sf1a25802019-06-18 20:29:11 +0000189private:
190 FuzzedDataProvider(const FuzzedDataProvider &) = delete;
191 FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
dor1s6fb30862019-06-11 14:30:18 +0000192
193 void Advance(size_t num_bytes) {
194 if (num_bytes > remaining_bytes_)
195 abort();
196
197 data_ptr_ += num_bytes;
198 remaining_bytes_ -= num_bytes;
199 }
200
dor1sf1a25802019-06-18 20:29:11 +0000201 template <typename T>
202 std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
203 static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
204
205 // The point of using the size-based constructor below is to increase the
206 // odds of having a vector object with capacity being equal to the length.
207 // That part is always implementation specific, but at least both libc++ and
208 // libstdc++ allocate the requested number of bytes in that constructor,
209 // which seems to be a natural choice for other implementations as well.
210 // To increase the odds even more, we also call |shrink_to_fit| below.
211 std::vector<T> result(size);
212 std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
213 Advance(num_bytes_to_consume);
214
215 // Even though |shrink_to_fit| is also implementation specific, we expect it
216 // to provide an additional assurance in case vector's constructor allocated
217 // a buffer which is larger than the actual amount of data we put inside it.
218 result.shrink_to_fit();
219 return result;
220 }
221
222 template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
223 static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
224 static_assert(!std::numeric_limits<TU>::is_signed,
225 "Source type must be unsigned.");
226 static_assert(std::numeric_limits<TS>::is_signed,
227 "Destination type must be signed.");
228
229 // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
230 if (std::numeric_limits<TS>::is_modulo)
231 return static_cast<TS>(value);
232
233 // Avoid using implementation-defined unsigned to signer conversions.
234 // To learn more, see https://stackoverflow.com/questions/13150449.
235 if (value <= std::numeric_limits<TS>::max())
236 return static_cast<TS>(value);
237 else {
238 constexpr auto TS_min = std::numeric_limits<TS>::min();
239 return TS_min + static_cast<char>(value - TS_min);
240 }
241 }
242
243 const uint8_t *data_ptr_;
dor1s6fb30862019-06-11 14:30:18 +0000244 size_t remaining_bytes_;
245};
246
dor1sf1a25802019-06-18 20:29:11 +0000247#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_