[compiler-rt] r363735 - [libFuzzer] Improve FuzzedDataProvider helper.
Max Moroz via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 13:29:11 PDT 2019
Author: dor1s
Date: Tue Jun 18 13:29:11 2019
New Revision: 363735
URL: http://llvm.org/viewvc/llvm-project?rev=363735&view=rev
Log:
[libFuzzer] Improve FuzzedDataProvider helper.
Summary:
The following changes are made based on the feedback from Tim King:
- Removed default template parameters, to have less assumptions.
- Implemented `ConsumeBytesWithTerminator` method.
- Made `PickValueInArray` method work with `initializer_list` argument.
- Got rid of `data_type` type alias, that was redundant.
- Refactored `ConsumeBytes` logic into a private method for better code reuse.
- Replaced implementation defined unsigned to signed conversion.
- Fixed `ConsumeRandomLengthString` to always call `shrink_to_fit`.
- Clarified and fixed some commments.
- Applied clang-format to both the library and the unittest source.
Tested on Linux, Mac, Windows.
Reviewers: morehouse, metzman
Reviewed By: morehouse
Subscribers: delcypher, #sanitizers, llvm-commits, kcc
Tags: #llvm, #sanitizers
Differential Revision: https://reviews.llvm.org/D63348
Modified:
compiler-rt/trunk/lib/fuzzer/tests/FuzzedDataProviderUnittest.cpp
compiler-rt/trunk/lib/fuzzer/utils/FuzzedDataProvider.h
Modified: compiler-rt/trunk/lib/fuzzer/tests/FuzzedDataProviderUnittest.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/tests/FuzzedDataProviderUnittest.cpp?rev=363735&r1=363734&r2=363735&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/tests/FuzzedDataProviderUnittest.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/tests/FuzzedDataProviderUnittest.cpp Tue Jun 18 13:29:11 2019
@@ -107,12 +107,13 @@ const uint8_t Data[] = {
TEST(FuzzedDataProvider, ConsumeBytes) {
FuzzedDataProvider DataProv(Data, sizeof(Data));
- EXPECT_EQ(std::vector<char>(1, 0x8A), DataProv.ConsumeBytes<char>(1));
+ EXPECT_EQ(std::vector<unsigned char>(1, 0x8A),
+ DataProv.ConsumeBytes<unsigned char>(1));
EXPECT_EQ(std::vector<uint8_t>(
{0x19, 0x0D, 0x44, 0x37, 0x0D, 0x38, 0x5E, 0x9B, 0xAA, 0xF3}),
DataProv.ConsumeBytes<uint8_t>(10));
- std::vector<unsigned char> UChars = DataProv.ConsumeBytes(24);
+ std::vector<unsigned char> UChars = DataProv.ConsumeBytes<unsigned char>(24);
EXPECT_EQ(std::vector<unsigned char>({0xDA, 0xAA, 0x88, 0xF2, 0x9B, 0x6C,
0xBA, 0xBE, 0xB1, 0xF2, 0xCF, 0x13,
0xB8, 0xAC, 0x1A, 0x7F, 0x1C, 0xC9,
@@ -123,6 +124,28 @@ TEST(FuzzedDataProvider, ConsumeBytes) {
DataProv.ConsumeBytes<signed char>(31337));
}
+TEST(FuzzedDataProvider, ConsumeBytesWithTerminator) {
+ FuzzedDataProvider DataProv(Data, sizeof(Data));
+ EXPECT_EQ(std::vector<unsigned char>({0x8A, 0x00}),
+ DataProv.ConsumeBytesWithTerminator<unsigned char>(1));
+ EXPECT_EQ(std::vector<uint8_t>({0x19, 0x0D, 0x44, 0x37, 0x0D, 0x38, 0x5E,
+ 0x9B, 0xAA, 0xF3, 111}),
+ DataProv.ConsumeBytesWithTerminator<uint8_t>(10, 111));
+
+ std::vector<unsigned char> UChars =
+ DataProv.ConsumeBytesWithTerminator<unsigned char>(24);
+ EXPECT_EQ(std::vector<unsigned char>(
+ {0xDA, 0xAA, 0x88, 0xF2, 0x9B, 0x6C, 0xBA, 0xBE, 0xB1,
+ 0xF2, 0xCF, 0x13, 0xB8, 0xAC, 0x1A, 0x7F, 0x1C, 0xC9,
+ 0x90, 0xD0, 0xD9, 0x5C, 0x42, 0xB3, 0x00}),
+ UChars);
+
+ std::vector<signed char> Expected(Data + 1 + 10 + 24, Data + sizeof(Data));
+ Expected.push_back(65);
+ EXPECT_EQ(Expected,
+ DataProv.ConsumeBytesWithTerminator<signed char>(31337, 65));
+}
+
TEST(FuzzedDataProvider, ConsumeBytesAsString) {
FuzzedDataProvider DataProv(Data, sizeof(Data));
EXPECT_EQ(std::string("\x8A\x19\x0D\x44\x37\x0D\x38\x5E\x9B\xAA\xF3\xDA"),
@@ -182,14 +205,15 @@ TEST(FuzzedDataProvider, ConsumeRemainin
{
FuzzedDataProvider DataProv(Data, sizeof(Data));
EXPECT_EQ(std::vector<uint8_t>(Data, Data + sizeof(Data)),
- DataProv.ConsumeRemainingBytes());
- EXPECT_EQ(std::vector<uint8_t>(), DataProv.ConsumeRemainingBytes());
+ DataProv.ConsumeRemainingBytes<uint8_t>());
+ EXPECT_EQ(std::vector<uint8_t>(),
+ DataProv.ConsumeRemainingBytes<uint8_t>());
}
{
FuzzedDataProvider DataProv(Data, sizeof(Data));
EXPECT_EQ(std::vector<uint8_t>(Data, Data + 123),
- DataProv.ConsumeBytes(123));
+ DataProv.ConsumeBytes<uint8_t>(123));
EXPECT_EQ(std::vector<char>(Data + 123, Data + sizeof(Data)),
DataProv.ConsumeRemainingBytes<char>());
}
@@ -206,7 +230,7 @@ TEST(FuzzedDataProvider, ConsumeRemainin
{
FuzzedDataProvider DataProv(Data, sizeof(Data));
EXPECT_EQ(std::vector<uint8_t>(Data, Data + 123),
- DataProv.ConsumeBytes(123));
+ DataProv.ConsumeBytes<uint8_t>(123));
EXPECT_EQ(std::string(Data + 123, Data + sizeof(Data)),
DataProv.ConsumeRemainingBytesAsString());
}
@@ -265,9 +289,17 @@ TEST(FuzzedDataProvider, PickValueInArra
EXPECT_EQ(uint8_t(0x69), DataProv.PickValueInArray(Data));
EXPECT_EQ(uint8_t(0xD6), DataProv.PickValueInArray(Data));
+ EXPECT_EQ(uint32_t(777), DataProv.PickValueInArray<uint32_t>({1337, 777}));
+ EXPECT_EQ(uint32_t(777), DataProv.PickValueInArray<uint32_t>({1337, 777}));
+ EXPECT_EQ(uint64_t(1337), DataProv.PickValueInArray<uint64_t>({1337, 777}));
+ EXPECT_EQ(size_t(777), DataProv.PickValueInArray<size_t>({1337, 777}));
+ EXPECT_EQ(int16_t(1337), DataProv.PickValueInArray<int16_t>({1337, 777}));
+ EXPECT_EQ(int32_t(777), DataProv.PickValueInArray<int32_t>({1337, 777}));
+ EXPECT_EQ(int64_t(777), DataProv.PickValueInArray<int64_t>({1337, 777}));
+
// Exhaust the buffer.
auto String = DataProv.ConsumeBytesAsString(31337);
- EXPECT_EQ(size_t(1007), String.length());
+ EXPECT_EQ(size_t(1000), String.length());
EXPECT_EQ(uint8_t(0x8A), DataProv.PickValueInArray(Data));
}
@@ -306,12 +338,13 @@ TEST(FuzzedDataProvider, remaining_bytes
EXPECT_EQ(size_t(1024), DataProv.remaining_bytes());
EXPECT_EQ(false, DataProv.ConsumeBool());
EXPECT_EQ(size_t(1024 - 1), DataProv.remaining_bytes());
- EXPECT_EQ(std::vector<uint8_t>(Data, Data + 8), DataProv.ConsumeBytes(8));
+ EXPECT_EQ(std::vector<uint8_t>(Data, Data + 8),
+ DataProv.ConsumeBytes<uint8_t>(8));
EXPECT_EQ(size_t(1024 - 1 - 8), DataProv.remaining_bytes());
// Exhaust the buffer.
EXPECT_EQ(std::vector<uint8_t>(Data + 8, Data + sizeof(Data) - 1),
- DataProv.ConsumeRemainingBytes());
+ DataProv.ConsumeRemainingBytes<uint8_t>());
EXPECT_EQ(size_t(0), DataProv.remaining_bytes());
}
Modified: compiler-rt/trunk/lib/fuzzer/utils/FuzzedDataProvider.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/utils/FuzzedDataProvider.h?rev=363735&r1=363734&r2=363735&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/utils/FuzzedDataProvider.h (original)
+++ compiler-rt/trunk/lib/fuzzer/utils/FuzzedDataProvider.h Tue Jun 18 13:29:11 2019
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
// A single header library providing an utility class to break up an array of
-// bytes (supposedly provided by a fuzzing engine) for multiple consumers.
-// Whenever run on the same input, provides the same output, as long as its
-// methods are called in the same order, with the same arguments.
+// bytes. Whenever run on the same input, provides the same output, as long as
+// its methods are called in the same order, with the same arguments.
//===----------------------------------------------------------------------===//
#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
@@ -20,68 +19,63 @@
#include <algorithm>
#include <cstring>
+#include <initializer_list>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
class FuzzedDataProvider {
- public:
- typedef uint8_t data_type;
-
+public:
// |data| is an array of length |size| that the FuzzedDataProvider wraps to
// provide more granular access. |data| must outlive the FuzzedDataProvider.
- FuzzedDataProvider(const uint8_t* data, size_t size)
+ FuzzedDataProvider(const uint8_t *data, size_t size)
: data_ptr_(data), remaining_bytes_(size) {}
~FuzzedDataProvider() = default;
// Returns a std::vector containing |num_bytes| of input data. If fewer than
// |num_bytes| of data remain, returns a shorter std::vector containing all
- // of the data that's left.
- template <typename T = data_type>
- std::vector<T> ConsumeBytes(size_t num_bytes) {
- static_assert(sizeof(T) == sizeof(data_type), "Incompatible data type.");
-
+ // of the data that's left. Can be used with any byte sized type, such as
+ // char, unsigned char, uint8_t, etc.
+ template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
num_bytes = std::min(num_bytes, remaining_bytes_);
+ return ConsumeBytes<T>(num_bytes, num_bytes);
+ }
- // The point of using the size-based constructor below is to increase the
- // odds of having a vector object with capacity being equal to the length.
- // That part is always implementation specific, but at least both libc++ and
- // libstdc++ allocate the requested number of bytes in that constructor,
- // which seems to be a natual choice for other implementations as well.
- // To increase the odds even more, we also call |shrink_to_fit| below.
- std::vector<T> result(num_bytes);
- std::memcpy(result.data(), data_ptr_, num_bytes);
- Advance(num_bytes);
-
- // Even though |shrink_to_fit| is also implementation specific, we expect it
- // to provide an additional assurance in case vector's constructor allocated
- // a buffer which is larger than the actual amount of data we put inside it.
- result.shrink_to_fit();
+ // Similar to |ConsumeBytes|, but also appends the terminator value at the end
+ // of the resulting vector. Useful, when a mutable null-terminated C-string is
+ // needed, for example. But that is a rare case. Better avoid it, if possible,
+ // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
+ template <typename T>
+ std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
+ T terminator = 0) {
+ num_bytes = std::min(num_bytes, remaining_bytes_);
+ std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
+ result.back() = terminator;
return result;
}
- // Prefer using |ConsumeBytes| unless you actually need a std::string object.
- // Returns a std::string containing |num_bytes| of input data. If fewer than
- // |num_bytes| of data remain, returns a shorter std::string containing all
- // of the data that's left.
+ // Returns a std::string containing |num_bytes| of input data. Using this and
+ // |.c_str()| on the resulting string is the best way to get an immutable
+ // null-terminated C string. If fewer than |num_bytes| of data remain, returns
+ // a shorter std::string containing all of the data that's left.
std::string ConsumeBytesAsString(size_t num_bytes) {
- static_assert(sizeof(std::string::value_type) == sizeof(data_type),
+ static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
"ConsumeBytesAsString cannot convert the data to a string.");
num_bytes = std::min(num_bytes, remaining_bytes_);
std::string result(
- reinterpret_cast<const std::string::value_type*>(data_ptr_), num_bytes);
+ reinterpret_cast<const std::string::value_type *>(data_ptr_),
+ num_bytes);
Advance(num_bytes);
return result;
}
- // Returns a number in the range [min, max] by consuming bytes from the input
- // data. The value might not be uniformly distributed in the given range. If
- // there's no input data left, always returns |min|. |min| must be less than
- // or equal to |max|.
- template <typename T>
- T ConsumeIntegralInRange(T min, T max) {
+ // Returns a number in the range [min, max] by consuming bytes from the
+ // input data. The value might not be uniformly distributed in the given
+ // range. If there's no input data left, always returns |min|. |min| must
+ // be less than or equal to |max|.
+ template <typename T> T ConsumeIntegralInRange(T min, T max) {
static_assert(std::is_integral<T>::value, "An integral type is required.");
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
@@ -106,7 +100,7 @@ class FuzzedDataProvider {
offset += CHAR_BIT;
}
- // Avoid division by 0, in the case |range + 1| results in overflow.
+ // Avoid division by 0, in case |range + 1| results in overflow.
if (range != std::numeric_limits<decltype(range)>::max())
result = result % (range + 1);
@@ -125,14 +119,17 @@ class FuzzedDataProvider {
// stable fuzzer than picking the length of a string independently from
// picking its contents.
std::string result;
+
+ // Reserve the anticipated capaticity to prevent several reallocations.
+ result.reserve(std::min(max_length, remaining_bytes_));
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
- char next = static_cast<char>(data_ptr_[0]);
+ char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
Advance(1);
if (next == '\\' && remaining_bytes_ != 0) {
- next = static_cast<char>(data_ptr_[0]);
+ next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
Advance(1);
if (next != '\\')
- return result;
+ break;
}
result += next;
}
@@ -142,8 +139,7 @@ class FuzzedDataProvider {
}
// Returns a std::vector containing all remaining bytes of the input data.
- template <typename T = data_type>
- std::vector<T> ConsumeRemainingBytes() {
+ template <typename T> std::vector<T> ConsumeRemainingBytes() {
return ConsumeBytes<T>(remaining_bytes_);
}
@@ -157,8 +153,7 @@ class FuzzedDataProvider {
// Returns a number in the range [Type's min, Type's max]. The value might
// not be uniformly distributed in the given range. If there's no input data
// left, always returns |min|.
- template <typename T>
- T ConsumeIntegral() {
+ template <typename T> T ConsumeIntegral() {
return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
@@ -166,18 +161,23 @@ class FuzzedDataProvider {
// Reads one byte and returns a bool, or false when no data remains.
bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
- // Returns a value from |array|, consuming as many bytes as needed to do so.
- // |array| must be a fixed-size array.
+ // Returns a copy of a value selected from a fixed-size |array|.
template <typename T, size_t size>
- T PickValueInArray(T (&array)[size]) {
+ T PickValueInArray(const T (&array)[size]) {
+ static_assert(size > 0, "The array must be non empty.");
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
}
+ template <typename T>
+ T PickValueInArray(std::initializer_list<const T> list) {
+ // static_assert(list.size() > 0, "The array must be non empty.");
+ return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
+ }
+
// Return an enum value. The enum must start at 0 and be contiguous. It must
// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
- template <typename T>
- T ConsumeEnum() {
+ template <typename T> T ConsumeEnum() {
static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
0, static_cast<uint32_t>(T::kMaxValue)));
@@ -186,9 +186,9 @@ class FuzzedDataProvider {
// Reports the remaining bytes available for fuzzed input.
size_t remaining_bytes() { return remaining_bytes_; }
- private:
- FuzzedDataProvider(const FuzzedDataProvider&) = delete;
- FuzzedDataProvider& operator=(const FuzzedDataProvider&) = delete;
+private:
+ FuzzedDataProvider(const FuzzedDataProvider &) = delete;
+ FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
void Advance(size_t num_bytes) {
if (num_bytes > remaining_bytes_)
@@ -198,8 +198,50 @@ class FuzzedDataProvider {
remaining_bytes_ -= num_bytes;
}
- const data_type* data_ptr_;
+ template <typename T>
+ std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
+ static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
+
+ // The point of using the size-based constructor below is to increase the
+ // odds of having a vector object with capacity being equal to the length.
+ // That part is always implementation specific, but at least both libc++ and
+ // libstdc++ allocate the requested number of bytes in that constructor,
+ // which seems to be a natural choice for other implementations as well.
+ // To increase the odds even more, we also call |shrink_to_fit| below.
+ std::vector<T> result(size);
+ std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
+ Advance(num_bytes_to_consume);
+
+ // Even though |shrink_to_fit| is also implementation specific, we expect it
+ // to provide an additional assurance in case vector's constructor allocated
+ // a buffer which is larger than the actual amount of data we put inside it.
+ result.shrink_to_fit();
+ return result;
+ }
+
+ template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
+ static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
+ static_assert(!std::numeric_limits<TU>::is_signed,
+ "Source type must be unsigned.");
+ static_assert(std::numeric_limits<TS>::is_signed,
+ "Destination type must be signed.");
+
+ // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
+ if (std::numeric_limits<TS>::is_modulo)
+ return static_cast<TS>(value);
+
+ // Avoid using implementation-defined unsigned to signer conversions.
+ // To learn more, see https://stackoverflow.com/questions/13150449.
+ if (value <= std::numeric_limits<TS>::max())
+ return static_cast<TS>(value);
+ else {
+ constexpr auto TS_min = std::numeric_limits<TS>::min();
+ return TS_min + static_cast<char>(value - TS_min);
+ }
+ }
+
+ const uint8_t *data_ptr_;
size_t remaining_bytes_;
};
-#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
More information about the llvm-commits
mailing list