Blame - utils/FuzzedDataProvider.h - chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer

blob: 5692060c7f7eb65403022df2ffcf7054590519d3 [file] [log] [blame]

dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	1	//===- FuzzedDataProvider.h - Utility header for fuzz targets ---- C++ - ===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
dor1s	bbb32d0	2019-08-08 19:49:37 +0000	[diff] [blame^]	8	// This a temporary copy of compiler-rt/include/fuzzer/FuzzedDataProvider.h.
dor1s	058d020	2019-08-06 16:02:39 +0000	[diff] [blame]	9	// TODO(mmoroz@chromium.org): delete this copy.
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	10	// A single header library providing an utility class to break up an array of
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	11	// bytes. Whenever run on the same input, provides the same output, as long as
				12	// its methods are called in the same order, with the same arguments.
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
				16	#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
				17
				18	#include <limits.h>
				19	#include <stddef.h>
				20	#include <stdint.h>
				21
				22	#include <algorithm>
				23	#include <cstring>
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	24	#include <initializer_list>
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	25	#include <string>
				26	#include <type_traits>
				27	#include <utility>
				28	#include <vector>
				29
				30	class FuzzedDataProvider {
dor1s	78e9a67	2019-08-05 19:55:52 +0000	[diff] [blame]	31	public:
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	32	// \|data\| is an array of length \|size\| that the FuzzedDataProvider wraps to
				33	// provide more granular access. \|data\| must outlive the FuzzedDataProvider.
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	34	FuzzedDataProvider(const uint8_t *data, size_t size)
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	35	: data_ptr_(data), remaining_bytes_(size) {}
				36	~FuzzedDataProvider() = default;
				37
				38	// Returns a std::vector containing \|num_bytes\| of input data. If fewer than
				39	// \|num_bytes\| of data remain, returns a shorter std::vector containing all
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	40	// of the data that's left. Can be used with any byte sized type, such as
				41	// char, unsigned char, uint8_t, etc.
				42	template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	43	num_bytes = std::min(num_bytes, remaining_bytes_);
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	44	return ConsumeBytes<T>(num_bytes, num_bytes);
				45	}
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	46
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	47	// Similar to \|ConsumeBytes\|, but also appends the terminator value at the end
				48	// of the resulting vector. Useful, when a mutable null-terminated C-string is
				49	// needed, for example. But that is a rare case. Better avoid it, if possible,
				50	// and prefer using \|ConsumeBytes\| or \|ConsumeBytesAsString\| methods.
				51	template <typename T>
				52	std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
				53	T terminator = 0) {
				54	num_bytes = std::min(num_bytes, remaining_bytes_);
				55	std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
				56	result.back() = terminator;
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	57	return result;
				58	}
				59
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	60	// Returns a std::string containing \|num_bytes\| of input data. Using this and
				61	// \|.c_str()\| on the resulting string is the best way to get an immutable
				62	// null-terminated C string. If fewer than \|num_bytes\| of data remain, returns
				63	// a shorter std::string containing all of the data that's left.
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	64	std::string ConsumeBytesAsString(size_t num_bytes) {
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	65	static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	66	"ConsumeBytesAsString cannot convert the data to a string.");
				67
				68	num_bytes = std::min(num_bytes, remaining_bytes_);
				69	std::string result(
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	70	reinterpret_cast<const std::string::value_type *>(data_ptr_),
				71	num_bytes);
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	72	Advance(num_bytes);
				73	return result;
				74	}
				75
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	76	// Returns a number in the range [min, max] by consuming bytes from the
				77	// input data. The value might not be uniformly distributed in the given
				78	// range. If there's no input data left, always returns \|min\|. \|min\| must
				79	// be less than or equal to \|max\|.
				80	template <typename T> T ConsumeIntegralInRange(T min, T max) {
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	81	static_assert(std::is_integral<T>::value, "An integral type is required.");
				82	static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
				83
				84	if (min > max)
				85	abort();
				86
				87	// Use the biggest type possible to hold the range and the result.
				88	uint64_t range = static_cast<uint64_t>(max) - min;
				89	uint64_t result = 0;
				90	size_t offset = 0;
				91
				92	while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
				93	remaining_bytes_ != 0) {
				94	// Pull bytes off the end of the seed data. Experimentally, this seems to
				95	// allow the fuzzer to more easily explore the input space. This makes
				96	// sense, since it works by modifying inputs that caused new code to run,
				97	// and this data is often used to encode length of data read by
				98	// \|ConsumeBytes\|. Separating out read lengths makes it easier modify the
				99	// contents of the data that is actually read.
				100	--remaining_bytes_;
				101	result = (result << CHAR_BIT) \| data_ptr_[remaining_bytes_];
				102	offset += CHAR_BIT;
				103	}
				104
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	105	// Avoid division by 0, in case \|range + 1\| results in overflow.
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	106	if (range != std::numeric_limits<decltype(range)>::max())
				107	result = result % (range + 1);
				108
				109	return static_cast<T>(min + result);
				110	}
				111
				112	// Returns a std::string of length from 0 to \|max_length\|. When it runs out of
				113	// input data, returns what remains of the input. Designed to be more stable
				114	// with respect to a fuzzer inserting characters than just picking a random
				115	// length and then consuming that many bytes with \|ConsumeBytes\|.
				116	std::string ConsumeRandomLengthString(size_t max_length) {
				117	// Reads bytes from the start of \|data_ptr_\|. Maps "\\" to "\", and maps "\"
				118	// followed by anything else to the end of the string. As a result of this
				119	// logic, a fuzzer can insert characters into the string, and the string
				120	// will be lengthened to include those new characters, resulting in a more
				121	// stable fuzzer than picking the length of a string independently from
				122	// picking its contents.
				123	std::string result;
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	124
				125	// Reserve the anticipated capaticity to prevent several reallocations.
				126	result.reserve(std::min(max_length, remaining_bytes_));
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	127	for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	128	char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	129	Advance(1);
				130	if (next == '\\' && remaining_bytes_ != 0) {
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	131	next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	132	Advance(1);
				133	if (next != '\\')
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	134	break;
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	135	}
				136	result += next;
				137	}
				138
				139	result.shrink_to_fit();
				140	return result;
				141	}
				142
				143	// Returns a std::vector containing all remaining bytes of the input data.
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	144	template <typename T> std::vector<T> ConsumeRemainingBytes() {
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	145	return ConsumeBytes<T>(remaining_bytes_);
				146	}
				147
				148	// Prefer using \|ConsumeRemainingBytes\| unless you actually need a std::string
				149	// object.
				150	// Returns a std::vector containing all remaining bytes of the input data.
				151	std::string ConsumeRemainingBytesAsString() {
				152	return ConsumeBytesAsString(remaining_bytes_);
				153	}
				154
				155	// Returns a number in the range [Type's min, Type's max]. The value might
				156	// not be uniformly distributed in the given range. If there's no input data
				157	// left, always returns \|min\|.
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	158	template <typename T> T ConsumeIntegral() {
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	159	return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
				160	std::numeric_limits<T>::max());
				161	}
				162
				163	// Reads one byte and returns a bool, or false when no data remains.
				164	bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
				165
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	166	// Returns a copy of a value selected from a fixed-size \|array\|.
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	167	template <typename T, size_t size>
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	168	T PickValueInArray(const T (&array)[size]) {
				169	static_assert(size > 0, "The array must be non empty.");
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	170	return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
				171	}
				172
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	173	template <typename T>
				174	T PickValueInArray(std::initializer_list<const T> list) {
				175	// static_assert(list.size() > 0, "The array must be non empty.");
				176	return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
				177	}
				178
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	179	// Return an enum value. The enum must start at 0 and be contiguous. It must
				180	// also contain \|kMaxValue\| aliased to its largest (inclusive) value. Such as:
				181	// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	182	template <typename T> T ConsumeEnum() {
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	183	static_assert(std::is_enum<T>::value, "\|T\| must be an enum type.");
				184	return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
				185	0, static_cast<uint32_t>(T::kMaxValue)));
				186	}
				187
				188	// Reports the remaining bytes available for fuzzed input.
				189	size_t remaining_bytes() { return remaining_bytes_; }
				190
dor1s	78e9a67	2019-08-05 19:55:52 +0000	[diff] [blame]	191	private:
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	192	FuzzedDataProvider(const FuzzedDataProvider &) = delete;
				193	FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	194
				195	void Advance(size_t num_bytes) {
				196	if (num_bytes > remaining_bytes_)
				197	abort();
				198
				199	data_ptr_ += num_bytes;
				200	remaining_bytes_ -= num_bytes;
				201	}
				202
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	203	template <typename T>
				204	std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
				205	static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
				206
				207	// The point of using the size-based constructor below is to increase the
				208	// odds of having a vector object with capacity being equal to the length.
				209	// That part is always implementation specific, but at least both libc++ and
				210	// libstdc++ allocate the requested number of bytes in that constructor,
				211	// which seems to be a natural choice for other implementations as well.
				212	// To increase the odds even more, we also call \|shrink_to_fit\| below.
				213	std::vector<T> result(size);
				214	std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
				215	Advance(num_bytes_to_consume);
				216
				217	// Even though \|shrink_to_fit\| is also implementation specific, we expect it
				218	// to provide an additional assurance in case vector's constructor allocated
				219	// a buffer which is larger than the actual amount of data we put inside it.
				220	result.shrink_to_fit();
				221	return result;
				222	}
				223
				224	template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
				225	static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
				226	static_assert(!std::numeric_limits<TU>::is_signed,
				227	"Source type must be unsigned.");
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	228
				229	// TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
				230	if (std::numeric_limits<TS>::is_modulo)
				231	return static_cast<TS>(value);
				232
				233	// Avoid using implementation-defined unsigned to signer conversions.
				234	// To learn more, see https://stackoverflow.com/questions/13150449.
				235	if (value <= std::numeric_limits<TS>::max())
				236	return static_cast<TS>(value);
				237	else {
				238	constexpr auto TS_min = std::numeric_limits<TS>::min();
				239	return TS_min + static_cast<char>(value - TS_min);
				240	}
				241	}
				242
				243	const uint8_t *data_ptr_;
dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	244	size_t remaining_bytes_;
				245	};
				246
dor1s	f1a2580	2019-06-18 20:29:11 +0000	[diff] [blame]	247	#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_