[llvm] r257985 - [libFuzzer] replace vector with a simpler data structure in the Dictionaries to avoid memory allocations on hot path
Kostya Serebryany via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 15 21:54:03 PST 2016
On Fri, Jan 15, 2016 at 8:10 PM, David Blaikie <dblaikie at gmail.com> wrote:
>
>
> On Fri, Jan 15, 2016 at 7:53 PM, Kostya Serebryany via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: kcc
>> Date: Fri Jan 15 21:53:32 2016
>> New Revision: 257985
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=257985&view=rev
>> Log:
>> [libFuzzer] replace vector with a simpler data structure in the
>> Dictionaries to avoid memory allocations on hot path
>>
>> Modified:
>> llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
>> llvm/trunk/lib/Fuzzer/FuzzerInternal.h
>> llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp
>> llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp
>> llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp
>> llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
>>
>> Modified: llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp (original)
>> +++ llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp Fri Jan 15 21:53:32 2016
>> @@ -301,7 +301,8 @@ int FuzzerDriver(const std::vector<std::
>> Fuzzer F(USF, Options);
>>
>> for (auto &U: Dictionary)
>> - USF.GetMD().AddWordToManualDictionary(U);
>> + if (U.size() <= Word::GetMaxSize())
>> + USF.GetMD().AddWordToManualDictionary(Word(U.data(), U.size()));
>>
>> // Timer
>> if (Flags.timeout > 0)
>>
>> Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=257985&r1=257984&r2=257985&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
>> +++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Fri Jan 15 21:53:32 2016
>> @@ -18,6 +18,7 @@
>> #include <cstddef>
>> #include <cstdlib>
>> #include <string>
>> +#include <string.h>
>> #include <vector>
>> #include <unordered_set>
>>
>> @@ -27,6 +28,40 @@ namespace fuzzer {
>> using namespace std::chrono;
>> typedef std::vector<uint8_t> Unit;
>>
>> +// A simple POD sized array of bytes.
>> +template<size_t kMaxSize>
>> +class FixedWord {
>> + public:
>> +
>> + FixedWord() : Size(0) {}
>> + FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
>> +
>> + void Set(const uint8_t *B, uint8_t S) {
>> + assert(S <= kMaxSize);
>> + memcpy(Data, B, S);
>> + Size = S;
>> + }
>> +
>> + bool operator == (const FixedWord<kMaxSize> &w) const {
>> + return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
>> + }
>> +
>> + bool operator < (const FixedWord<kMaxSize> &w) const {
>> + if (Size != w.Size) return Size < w.Size;
>> + return memcmp(Data, w.Data, Size) < 0;
>> + }
>> +
>> + static size_t GetMaxSize() { return kMaxSize; }
>> + const uint8_t *data() const { return Data; }
>> + uint8_t size() const { return Size; }
>> +
>> + private:
>> + uint8_t Size;
>> + uint8_t Data[kMaxSize];
>> +};
>> +
>> +typedef FixedWord<27> Word; // 28 bytes.
>> +
>> std::string FileToString(const std::string &Path);
>> Unit FileToVector(const std::string &Path);
>> void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
>> @@ -43,6 +78,7 @@ void PrintHexArray(const uint8_t *Data,
>> const char *PrintAfter = "");
>> void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter
>> = "");
>> void PrintASCII(const Unit &U, const char *PrintAfter = "");
>> +void PrintASCII(const Word &W, const char *PrintAfter = "");
>> std::string Hash(const Unit &U);
>> void SetTimer(int Seconds);
>> std::string Base64(const Unit &U);
>> @@ -118,9 +154,9 @@ class MutationDispatcher {
>> size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t
>> *Data2,
>> size_t Size2, uint8_t *Out, size_t MaxOutSize);
>>
>> - void AddWordToManualDictionary(const Unit &Word);
>> + void AddWordToManualDictionary(const Word &W);
>>
>> - void AddWordToAutoDictionary(const Unit &Word, size_t PositionHint);
>> + void AddWordToAutoDictionary(const Word &W, size_t PositionHint);
>> void ClearAutoDictionary();
>> void PrintRecommendedDictionary();
>>
>>
>> Modified: llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp (original)
>> +++ llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp Fri Jan 15 21:53:32 2016
>> @@ -23,16 +23,15 @@ struct Mutator {
>> };
>>
>> struct DictionaryEntry {
>> - Unit Word;
>> + Word W;
>> size_t PositionHint;
>> };
>>
>> struct Dictionary : public std::vector<DictionaryEntry>{
>> - bool ContainsWord(const Unit &W) const {
>> + bool ContainsWord(const Word &W) const {
>> return end() !=
>> - std::find_if(begin(), end(), [&](const DictionaryEntry &DE) {
>> - return DE.Word == W;
>> - });
>> + std::find_if(begin(), end(),
>> + [&](const DictionaryEntry &DE) { return DE.W ==
>> W; });
>>
>
> This ^ looks like std::any_of, perhaps? (maybe even llvm::any_of, for the
> ultimate convenience)
>
heh, yes indeed! Will change next week.
(My last careful reading of <algorithm> predates c++11. Need to refresh!>
can't use llvm::, so it'll be std::
>
>
>> }
>> };
>>
>> @@ -161,20 +160,20 @@ size_t MutationDispatcher::Impl::AddWord
>> size_t MaxSize) {
>> if (D.empty()) return 0;
>> const DictionaryEntry &DE = D[Rand(D.size())];
>> - const Unit &Word = DE.Word;
>> + const Word &W = DE.W;
>> size_t PositionHint = DE.PositionHint;
>> bool UsePositionHint = PositionHint !=
>> std::numeric_limits<size_t>::max() &&
>> - PositionHint + Word.size() < Size &&
>> Rand.RandBool();
>> - if (Rand.RandBool()) { // Insert Word.
>> - if (Size + Word.size() > MaxSize) return 0;
>> + PositionHint + W.size() < Size &&
>> Rand.RandBool();
>> + if (Rand.RandBool()) { // Insert W.
>> + if (Size + W.size() > MaxSize) return 0;
>> size_t Idx = UsePositionHint ? PositionHint : Rand(Size + 1);
>> - memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx);
>> - memcpy(Data + Idx, Word.data(), Word.size());
>> - Size += Word.size();
>> - } else { // Overwrite some bytes with Word.
>> - if (Word.size() > Size) return 0;
>> - size_t Idx = UsePositionHint ? PositionHint : Rand(Size -
>> Word.size());
>> - memcpy(Data + Idx, Word.data(), Word.size());
>> + memmove(Data + Idx + W.size(), Data + Idx, Size - Idx);
>> + memcpy(Data + Idx, W.data(), W.size());
>> + Size += W.size();
>> + } else { // Overwrite some bytes with W.
>> + if (W.size() > Size) return 0;
>> + size_t Idx = UsePositionHint ? PositionHint : Rand(Size - W.size());
>> + memcpy(Data + Idx, W.data(), W.size());
>> }
>> CurrentDictionaryEntrySequence.push_back(DE);
>> return Size;
>> @@ -238,16 +237,16 @@ void MutationDispatcher::StartMutationSe
>> void MutationDispatcher::RecordSuccessfulMutationSequence() {
>> for (auto &DE : MDImpl->CurrentDictionaryEntrySequence)
>> // Linear search is fine here as this happens seldom.
>> - if (!MDImpl->PersistentAutoDictionary.ContainsWord(DE.Word))
>> + if (!MDImpl->PersistentAutoDictionary.ContainsWord(DE.W))
>> MDImpl->PersistentAutoDictionary.push_back(
>> - {DE.Word, std::numeric_limits<size_t>::max()});
>> + {DE.W, std::numeric_limits<size_t>::max()});
>> }
>>
>> void MutationDispatcher::PrintRecommendedDictionary() {
>> - std::vector<Unit> V;
>> + std::vector<Word> V;
>> for (auto &DE : MDImpl->PersistentAutoDictionary)
>> - if (!MDImpl->ManualDictionary.ContainsWord(DE.Word))
>> - V.push_back(DE.Word);
>> + if (!MDImpl->ManualDictionary.ContainsWord(DE.W))
>> + V.push_back(DE.W);
>> if (V.empty()) return;
>> Printf("###### Recommended dictionary. ######\n");
>> for (auto &U: V) {
>> @@ -265,7 +264,7 @@ void MutationDispatcher::PrintMutationSe
>> Printf(" DE: ");
>> for (auto &DE : MDImpl->CurrentDictionaryEntrySequence) {
>> Printf("\"");
>> - PrintASCII(DE.Word, "\"-");
>> + PrintASCII(DE.W, "\"-");
>> }
>> }
>> }
>> @@ -299,16 +298,16 @@ void MutationDispatcher::SetCorpus(const
>> MDImpl->SetCorpus(Corpus);
>> }
>>
>> -void MutationDispatcher::AddWordToManualDictionary(const Unit &Word) {
>> +void MutationDispatcher::AddWordToManualDictionary(const Word &W) {
>> MDImpl->ManualDictionary.push_back(
>> - {Word, std::numeric_limits<size_t>::max()});
>> + {W, std::numeric_limits<size_t>::max()});
>> }
>>
>> -void MutationDispatcher::AddWordToAutoDictionary(const Unit &Word,
>> +void MutationDispatcher::AddWordToAutoDictionary(const Word &W,
>> size_t PositionHint) {
>> static const size_t kMaxAutoDictSize = 1 << 14;
>> if (MDImpl->TempAutoDictionary.size() >= kMaxAutoDictSize) return;
>> - MDImpl->TempAutoDictionary.push_back({Word, PositionHint});
>> + MDImpl->TempAutoDictionary.push_back({W, PositionHint});
>> }
>>
>> void MutationDispatcher::ClearAutoDictionary() {
>>
>> Modified: llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp (original)
>> +++ llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp Fri Jan 15 21:53:32 2016
>> @@ -164,14 +164,10 @@ struct LabelRange {
>>
>> // For now, very simple: put Size bytes of Data at position Pos.
>> struct TraceBasedMutation {
>> - static const size_t kMaxSize = 28;
>> - uint32_t Pos : 24;
>> - uint32_t Size : 8;
>> - uint8_t Data[kMaxSize];
>> + uint32_t Pos;
>> + Word W;
>> };
>>
>> -const size_t TraceBasedMutation::kMaxSize;
>> -
>> // Declared as static globals for faster checks inside the hooks.
>> static bool RecordingTraces = false;
>> static bool RecordingMemcmp = false;
>> @@ -223,12 +219,11 @@ class TraceState {
>> RecordingMemcmp = false;
>> for (size_t i = 0; i < NumMutations; i++) {
>> auto &M = Mutations[i];
>> - Unit U(M.Data, M.Data + M.Size);
>> if (Options.Verbosity >= 2) {
>> - AutoDictUnitCounts[U]++;
>> + AutoDictUnitCounts[M.W]++;
>> AutoDictAdds++;
>> if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
>> - typedef std::pair<size_t, Unit> CU;
>> + typedef std::pair<size_t, Word> CU;
>> std::vector<CU> CountedUnits;
>> for (auto &I : AutoDictUnitCounts)
>> CountedUnits.push_back(std::make_pair(I.second, I.first));
>> @@ -242,17 +237,15 @@ class TraceState {
>> }
>> }
>> }
>> - USF.GetMD().AddWordToAutoDictionary(U, M.Pos);
>> + USF.GetMD().AddWordToAutoDictionary(M.W, M.Pos);
>> }
>> }
>>
>> void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
>> if (NumMutations >= kMaxMutations) return;
>> - assert(Size <= TraceBasedMutation::kMaxSize);
>> auto &M = Mutations[NumMutations++];
>> M.Pos = Pos;
>> - M.Size = Size;
>> - memcpy(M.Data, Data, Size);
>> + M.W.Set(Data, Size);
>> }
>>
>> void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
>> @@ -274,7 +267,7 @@ class TraceState {
>> const Fuzzer::FuzzingOptions &Options;
>> uint8_t **CurrentUnitData;
>> size_t *CurrentUnitSize;
>> - std::map<Unit, size_t> AutoDictUnitCounts;
>> + std::map<Word, size_t> AutoDictUnitCounts;
>> size_t AutoDictAdds = 0;
>> static thread_local bool IsMyThread;
>> };
>> @@ -423,7 +416,7 @@ void TraceState::TraceCmpCallback(uintpt
>> void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t
>> *Data1,
>> const uint8_t *Data2) {
>> if (!RecordingMemcmp || !IsMyThread) return;
>> - CmpSize = std::min(CmpSize, TraceBasedMutation::kMaxSize);
>> + CmpSize = std::min(CmpSize, Word::GetMaxSize());
>> int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
>> int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
>> if ((Added1 || Added2) && Options.Verbosity >= 3) {
>>
>> Modified: llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp (original)
>> +++ llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp Fri Jan 15 21:53:32 2016
>> @@ -49,10 +49,12 @@ void PrintASCII(const uint8_t *Data, siz
>> Printf("%s", PrintAfter);
>> }
>>
>> +void PrintASCII(const Word &W, const char *PrintAfter) {
>> + PrintASCII(W.data(), W.size(), PrintAfter);
>> +}
>> +
>> void PrintASCII(const Unit &U, const char *PrintAfter) {
>> - for (auto X : U)
>> - PrintASCIIByte(X);
>> - Printf("%s", PrintAfter);
>> + PrintASCII(U.data(), U.size(), PrintAfter);
>> }
>>
>> std::string Hash(const Unit &U) {
>>
>> Modified: llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp (original)
>> +++ llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp Fri Jan 15 21:53:32 2016
>> @@ -247,8 +247,8 @@ void TestAddWordFromDictionary(Mutator M
>> MutationDispatcher MD(Rand);
>> uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD};
>> uint8_t Word2[3] = {0xFF, 0xEE, 0xEF};
>> - MD.AddWordToManualDictionary(Unit(Word1, Word1 + sizeof(Word1)));
>> - MD.AddWordToManualDictionary(Unit(Word2, Word2 + sizeof(Word2)));
>> + MD.AddWordToManualDictionary(Word(Word1, sizeof(Word1)));
>> + MD.AddWordToManualDictionary(Word(Word2, sizeof(Word2)));
>> int FoundMask = 0;
>> uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD};
>> uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22};
>> @@ -285,16 +285,16 @@ TEST(FuzzerMutate, AddWordFromDictionary
>> void TestAddWordFromDictionaryWithHint(Mutator M, int NumIter) {
>> FuzzerRandomLibc Rand(0);
>> MutationDispatcher MD(Rand);
>> - uint8_t Word[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xFF, 0xEE, 0xEF};
>> + uint8_t W[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xFF, 0xEE, 0xEF};
>> size_t PosHint = 7777;
>> - MD.AddWordToAutoDictionary(Unit(Word, Word + sizeof(Word)), PosHint);
>> + MD.AddWordToAutoDictionary(Word(W, sizeof(W)), PosHint);
>> int FoundMask = 0;
>> for (int i = 0; i < NumIter; i++) {
>> uint8_t T[10000];
>> memset(T, 0, sizeof(T));
>> size_t NewSize = (MD.*M)(T, 9000, 10000);
>> - if (NewSize >= PosHint + sizeof(Word) &&
>> - !memcmp(Word, T + PosHint, sizeof(Word)))
>> + if (NewSize >= PosHint + sizeof(W) &&
>> + !memcmp(W, T + PosHint, sizeof(W)))
>> FoundMask = 1;
>> }
>> EXPECT_EQ(FoundMask, 1);
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160115/cec4913a/attachment-0001.html>
More information about the llvm-commits
mailing list