Blame - utils/FuzzedDataProvider.h - chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer

blob: 252f1f6692403666a114dac799d8b0208989c73a [file] [log] [blame]

dor1s	6fb3086	2019-06-11 14:30:18 +0000	[diff] [blame]	1	//===- FuzzedDataProvider.h - Utility header for fuzz targets ---- C++ - ===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8	// A single header library providing an utility class to break up an array of
				9	// bytes (supposedly provided by a fuzzing engine) for multiple consumers.
				10	// Whenever run on the same input, provides the same output, as long as its
				11	// methods are called in the same order, with the same arguments.
				12	//===----------------------------------------------------------------------===//
				13
				14	#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
				15	#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
				16
				17	#include <limits.h>
				18	#include <stddef.h>
				19	#include <stdint.h>
				20
				21	#include <algorithm>
				22	#include <cstring>
				23	#include <string>
				24	#include <type_traits>
				25	#include <utility>
				26	#include <vector>
				27
				28	class FuzzedDataProvider {
				29	public:
				30	typedef uint8_t data_type;
				31
				32	// \|data\| is an array of length \|size\| that the FuzzedDataProvider wraps to
				33	// provide more granular access. \|data\| must outlive the FuzzedDataProvider.
				34	FuzzedDataProvider(const uint8_t* data, size_t size)
				35	: data_ptr_(data), remaining_bytes_(size) {}
				36	~FuzzedDataProvider() = default;
				37
				38	// Returns a std::vector containing \|num_bytes\| of input data. If fewer than
				39	// \|num_bytes\| of data remain, returns a shorter std::vector containing all
				40	// of the data that's left.
				41	template <typename T = data_type>
				42	std::vector<T> ConsumeBytes(size_t num_bytes) {
				43	static_assert(sizeof(T) == sizeof(data_type), "Incompatible data type.");
				44
				45	num_bytes = std::min(num_bytes, remaining_bytes_);
				46
				47	// The point of using the size-based constructor below is to increase the
				48	// odds of having a vector object with capacity being equal to the length.
				49	// That part is always implementation specific, but at least both libc++ and
				50	// libstdc++ allocate the requested number of bytes in that constructor,
				51	// which seems to be a natual choice for other implementations as well.
				52	// To increase the odds even more, we also call \|shrink_to_fit\| below.
				53	std::vector<T> result(num_bytes);
				54	std::memcpy(result.data(), data_ptr_, num_bytes);
				55	Advance(num_bytes);
				56
				57	// Even though \|shrink_to_fit\| is also implementation specific, we expect it
				58	// to provide an additional assurance in case vector's constructor allocated
				59	// a buffer which is larger than the actual amount of data we put inside it.
				60	result.shrink_to_fit();
				61	return result;
				62	}
				63
				64	// Prefer using \|ConsumeBytes\| unless you actually need a std::string object.
				65	// Returns a std::string containing \|num_bytes\| of input data. If fewer than
				66	// \|num_bytes\| of data remain, returns a shorter std::string containing all
				67	// of the data that's left.
				68	std::string ConsumeBytesAsString(size_t num_bytes) {
				69	static_assert(sizeof(std::string::value_type) == sizeof(data_type),
				70	"ConsumeBytesAsString cannot convert the data to a string.");
				71
				72	num_bytes = std::min(num_bytes, remaining_bytes_);
				73	std::string result(
				74	reinterpret_cast<const std::string::value_type*>(data_ptr_), num_bytes);
				75	Advance(num_bytes);
				76	return result;
				77	}
				78
				79	// Returns a number in the range [min, max] by consuming bytes from the input
				80	// data. The value might not be uniformly distributed in the given range. If
				81	// there's no input data left, always returns \|min\|. \|min\| must be less than
				82	// or equal to \|max\|.
				83	template <typename T>
				84	T ConsumeIntegralInRange(T min, T max) {
				85	static_assert(std::is_integral<T>::value, "An integral type is required.");
				86	static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
				87
				88	if (min > max)
				89	abort();
				90
				91	// Use the biggest type possible to hold the range and the result.
				92	uint64_t range = static_cast<uint64_t>(max) - min;
				93	uint64_t result = 0;
				94	size_t offset = 0;
				95
				96	while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
				97	remaining_bytes_ != 0) {
				98	// Pull bytes off the end of the seed data. Experimentally, this seems to
				99	// allow the fuzzer to more easily explore the input space. This makes
				100	// sense, since it works by modifying inputs that caused new code to run,
				101	// and this data is often used to encode length of data read by
				102	// \|ConsumeBytes\|. Separating out read lengths makes it easier modify the
				103	// contents of the data that is actually read.
				104	--remaining_bytes_;
				105	result = (result << CHAR_BIT) \| data_ptr_[remaining_bytes_];
				106	offset += CHAR_BIT;
				107	}
				108
				109	// Avoid division by 0, in the case \|range + 1\| results in overflow.
				110	if (range != std::numeric_limits<decltype(range)>::max())
				111	result = result % (range + 1);
				112
				113	return static_cast<T>(min + result);
				114	}
				115
				116	// Returns a std::string of length from 0 to \|max_length\|. When it runs out of
				117	// input data, returns what remains of the input. Designed to be more stable
				118	// with respect to a fuzzer inserting characters than just picking a random
				119	// length and then consuming that many bytes with \|ConsumeBytes\|.
				120	std::string ConsumeRandomLengthString(size_t max_length) {
				121	// Reads bytes from the start of \|data_ptr_\|. Maps "\\" to "\", and maps "\"
				122	// followed by anything else to the end of the string. As a result of this
				123	// logic, a fuzzer can insert characters into the string, and the string
				124	// will be lengthened to include those new characters, resulting in a more
				125	// stable fuzzer than picking the length of a string independently from
				126	// picking its contents.
				127	std::string result;
				128	for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
				129	char next = static_cast<char>(data_ptr_[0]);
				130	Advance(1);
				131	if (next == '\\' && remaining_bytes_ != 0) {
				132	next = static_cast<char>(data_ptr_[0]);
				133	Advance(1);
				134	if (next != '\\')
				135	return result;
				136	}
				137	result += next;
				138	}
				139
				140	result.shrink_to_fit();
				141	return result;
				142	}
				143
				144	// Returns a std::vector containing all remaining bytes of the input data.
				145	template <typename T = data_type>
				146	std::vector<T> ConsumeRemainingBytes() {
				147	return ConsumeBytes<T>(remaining_bytes_);
				148	}
				149
				150	// Prefer using \|ConsumeRemainingBytes\| unless you actually need a std::string
				151	// object.
				152	// Returns a std::vector containing all remaining bytes of the input data.
				153	std::string ConsumeRemainingBytesAsString() {
				154	return ConsumeBytesAsString(remaining_bytes_);
				155	}
				156
				157	// Returns a number in the range [Type's min, Type's max]. The value might
				158	// not be uniformly distributed in the given range. If there's no input data
				159	// left, always returns \|min\|.
				160	template <typename T>
				161	T ConsumeIntegral() {
				162	return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
				163	std::numeric_limits<T>::max());
				164	}
				165
				166	// Reads one byte and returns a bool, or false when no data remains.
				167	bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
				168
				169	// Returns a value from \|array\|, consuming as many bytes as needed to do so.
				170	// \|array\| must be a fixed-size array.
				171	template <typename T, size_t size>
				172	T PickValueInArray(T (&array)[size]) {
				173	return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
				174	}
				175
				176	// Return an enum value. The enum must start at 0 and be contiguous. It must
				177	// also contain \|kMaxValue\| aliased to its largest (inclusive) value. Such as:
				178	// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
				179	template <typename T>
				180	T ConsumeEnum() {
				181	static_assert(std::is_enum<T>::value, "\|T\| must be an enum type.");
				182	return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
				183	0, static_cast<uint32_t>(T::kMaxValue)));
				184	}
				185
				186	// Reports the remaining bytes available for fuzzed input.
				187	size_t remaining_bytes() { return remaining_bytes_; }
				188
				189	private:
				190	FuzzedDataProvider(const FuzzedDataProvider&) = delete;
				191	FuzzedDataProvider& operator=(const FuzzedDataProvider&) = delete;
				192
				193	void Advance(size_t num_bytes) {
				194	if (num_bytes > remaining_bytes_)
				195	abort();
				196
				197	data_ptr_ += num_bytes;
				198	remaining_bytes_ -= num_bytes;
				199	}
				200
				201	const data_type* data_ptr_;
				202	size_t remaining_bytes_;
				203	};
				204
				205	#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_