[llvm] r257985 - [libFuzzer] replace vector with a simpler data structure in the Dictionaries to avoid memory allocations on hot path

Kostya Serebryany via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 19 12:40:03 PST 2016


Done in r258178, thanks again for the hint.

On Fri, Jan 15, 2016 at 9:54 PM, Kostya Serebryany <kcc at google.com> wrote:

>
>
>
> On Fri, Jan 15, 2016 at 8:10 PM, David Blaikie <dblaikie at gmail.com> wrote:
>
>>
>>
>> On Fri, Jan 15, 2016 at 7:53 PM, Kostya Serebryany via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>>> Author: kcc
>>> Date: Fri Jan 15 21:53:32 2016
>>> New Revision: 257985
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=257985&view=rev
>>> Log:
>>> [libFuzzer] replace vector with a simpler data structure in the
>>> Dictionaries to avoid memory allocations on hot path
>>>
>>> Modified:
>>>     llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
>>>     llvm/trunk/lib/Fuzzer/FuzzerInternal.h
>>>     llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp
>>>     llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp
>>>     llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp
>>>     llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
>>>
>>> Modified: llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp (original)
>>> +++ llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp Fri Jan 15 21:53:32 2016
>>> @@ -301,7 +301,8 @@ int FuzzerDriver(const std::vector<std::
>>>    Fuzzer F(USF, Options);
>>>
>>>    for (auto &U: Dictionary)
>>> -    USF.GetMD().AddWordToManualDictionary(U);
>>> +    if (U.size() <= Word::GetMaxSize())
>>> +      USF.GetMD().AddWordToManualDictionary(Word(U.data(), U.size()));
>>>
>>>    // Timer
>>>    if (Flags.timeout > 0)
>>>
>>> Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=257985&r1=257984&r2=257985&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
>>> +++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Fri Jan 15 21:53:32 2016
>>> @@ -18,6 +18,7 @@
>>>  #include <cstddef>
>>>  #include <cstdlib>
>>>  #include <string>
>>> +#include <string.h>
>>>  #include <vector>
>>>  #include <unordered_set>
>>>
>>> @@ -27,6 +28,40 @@ namespace fuzzer {
>>>  using namespace std::chrono;
>>>  typedef std::vector<uint8_t> Unit;
>>>
>>> +// A simple POD sized array of bytes.
>>> +template<size_t kMaxSize>
>>> +class FixedWord {
>>> + public:
>>> +
>>> +  FixedWord() : Size(0) {}
>>> +  FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
>>> +
>>> +  void Set(const uint8_t *B, uint8_t S) {
>>> +    assert(S <= kMaxSize);
>>> +    memcpy(Data, B, S);
>>> +    Size = S;
>>> +  }
>>> +
>>> +  bool operator == (const FixedWord<kMaxSize> &w) const {
>>> +    return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
>>> +  }
>>> +
>>> +  bool operator < (const FixedWord<kMaxSize> &w) const {
>>> +    if (Size != w.Size) return Size < w.Size;
>>> +    return memcmp(Data, w.Data, Size) < 0;
>>> +  }
>>> +
>>> +  static size_t GetMaxSize() { return kMaxSize; }
>>> +  const uint8_t *data() const { return Data; }
>>> +  uint8_t size() const { return Size; }
>>> +
>>> + private:
>>> +  uint8_t Size;
>>> +  uint8_t Data[kMaxSize];
>>> +};
>>> +
>>> +typedef FixedWord<27> Word;  // 28 bytes.
>>> +
>>>  std::string FileToString(const std::string &Path);
>>>  Unit FileToVector(const std::string &Path);
>>>  void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
>>> @@ -43,6 +78,7 @@ void PrintHexArray(const uint8_t *Data,
>>>                     const char *PrintAfter = "");
>>>  void PrintASCII(const uint8_t *Data, size_t Size, const char
>>> *PrintAfter = "");
>>>  void PrintASCII(const Unit &U, const char *PrintAfter = "");
>>> +void PrintASCII(const Word &W, const char *PrintAfter = "");
>>>  std::string Hash(const Unit &U);
>>>  void SetTimer(int Seconds);
>>>  std::string Base64(const Unit &U);
>>> @@ -118,9 +154,9 @@ class MutationDispatcher {
>>>    size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t
>>> *Data2,
>>>                     size_t Size2, uint8_t *Out, size_t MaxOutSize);
>>>
>>> -  void AddWordToManualDictionary(const Unit &Word);
>>> +  void AddWordToManualDictionary(const Word &W);
>>>
>>> -  void AddWordToAutoDictionary(const Unit &Word, size_t PositionHint);
>>> +  void AddWordToAutoDictionary(const Word &W, size_t PositionHint);
>>>    void ClearAutoDictionary();
>>>    void PrintRecommendedDictionary();
>>>
>>>
>>> Modified: llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp (original)
>>> +++ llvm/trunk/lib/Fuzzer/FuzzerMutate.cpp Fri Jan 15 21:53:32 2016
>>> @@ -23,16 +23,15 @@ struct Mutator {
>>>  };
>>>
>>>  struct DictionaryEntry {
>>> -  Unit Word;
>>> +  Word W;
>>>    size_t PositionHint;
>>>  };
>>>
>>>  struct Dictionary : public std::vector<DictionaryEntry>{
>>> -  bool ContainsWord(const Unit &W) const {
>>> +  bool ContainsWord(const Word &W) const {
>>>      return end() !=
>>> -           std::find_if(begin(), end(), [&](const DictionaryEntry &DE) {
>>> -             return DE.Word == W;
>>> -           });
>>> +           std::find_if(begin(), end(),
>>> +                        [&](const DictionaryEntry &DE) { return DE.W ==
>>> W; });
>>>
>>
>> This ^ looks like std::any_of, perhaps? (maybe even llvm::any_of, for the
>> ultimate convenience)
>>
>
> heh, yes indeed! Will change next week.
> (My last careful reading of <algorithm> predates c++11. Need to refresh!>
> can't use llvm::, so it'll be std::
>
>
>>
>>
>>>    }
>>>  };
>>>
>>> @@ -161,20 +160,20 @@ size_t MutationDispatcher::Impl::AddWord
>>>      size_t MaxSize) {
>>>    if (D.empty()) return 0;
>>>    const DictionaryEntry &DE = D[Rand(D.size())];
>>> -  const Unit &Word = DE.Word;
>>> +  const Word &W = DE.W;
>>>    size_t PositionHint = DE.PositionHint;
>>>    bool UsePositionHint = PositionHint !=
>>> std::numeric_limits<size_t>::max() &&
>>> -                         PositionHint + Word.size() < Size &&
>>> Rand.RandBool();
>>> -  if (Rand.RandBool()) {  // Insert Word.
>>> -    if (Size + Word.size() > MaxSize) return 0;
>>> +                         PositionHint + W.size() < Size &&
>>> Rand.RandBool();
>>> +  if (Rand.RandBool()) {  // Insert W.
>>> +    if (Size + W.size() > MaxSize) return 0;
>>>      size_t Idx = UsePositionHint ? PositionHint : Rand(Size + 1);
>>> -    memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx);
>>> -    memcpy(Data + Idx, Word.data(), Word.size());
>>> -    Size += Word.size();
>>> -  } else {  // Overwrite some bytes with Word.
>>> -    if (Word.size() > Size) return 0;
>>> -    size_t Idx = UsePositionHint ? PositionHint : Rand(Size -
>>> Word.size());
>>> -    memcpy(Data + Idx, Word.data(), Word.size());
>>> +    memmove(Data + Idx + W.size(), Data + Idx, Size - Idx);
>>> +    memcpy(Data + Idx, W.data(), W.size());
>>> +    Size += W.size();
>>> +  } else {  // Overwrite some bytes with W.
>>> +    if (W.size() > Size) return 0;
>>> +    size_t Idx = UsePositionHint ? PositionHint : Rand(Size - W.size());
>>> +    memcpy(Data + Idx, W.data(), W.size());
>>>    }
>>>    CurrentDictionaryEntrySequence.push_back(DE);
>>>    return Size;
>>> @@ -238,16 +237,16 @@ void MutationDispatcher::StartMutationSe
>>>  void MutationDispatcher::RecordSuccessfulMutationSequence() {
>>>    for (auto &DE : MDImpl->CurrentDictionaryEntrySequence)
>>>      // Linear search is fine here as this happens seldom.
>>> -    if (!MDImpl->PersistentAutoDictionary.ContainsWord(DE.Word))
>>> +    if (!MDImpl->PersistentAutoDictionary.ContainsWord(DE.W))
>>>        MDImpl->PersistentAutoDictionary.push_back(
>>> -          {DE.Word, std::numeric_limits<size_t>::max()});
>>> +          {DE.W, std::numeric_limits<size_t>::max()});
>>>  }
>>>
>>>  void MutationDispatcher::PrintRecommendedDictionary() {
>>> -  std::vector<Unit> V;
>>> +  std::vector<Word> V;
>>>    for (auto &DE : MDImpl->PersistentAutoDictionary)
>>> -    if (!MDImpl->ManualDictionary.ContainsWord(DE.Word))
>>> -      V.push_back(DE.Word);
>>> +    if (!MDImpl->ManualDictionary.ContainsWord(DE.W))
>>> +      V.push_back(DE.W);
>>>    if (V.empty()) return;
>>>    Printf("###### Recommended dictionary. ######\n");
>>>    for (auto &U: V) {
>>> @@ -265,7 +264,7 @@ void MutationDispatcher::PrintMutationSe
>>>      Printf(" DE: ");
>>>      for (auto &DE : MDImpl->CurrentDictionaryEntrySequence) {
>>>        Printf("\"");
>>> -      PrintASCII(DE.Word, "\"-");
>>> +      PrintASCII(DE.W, "\"-");
>>>      }
>>>    }
>>>  }
>>> @@ -299,16 +298,16 @@ void MutationDispatcher::SetCorpus(const
>>>    MDImpl->SetCorpus(Corpus);
>>>  }
>>>
>>> -void MutationDispatcher::AddWordToManualDictionary(const Unit &Word) {
>>> +void MutationDispatcher::AddWordToManualDictionary(const Word &W) {
>>>    MDImpl->ManualDictionary.push_back(
>>> -      {Word, std::numeric_limits<size_t>::max()});
>>> +      {W, std::numeric_limits<size_t>::max()});
>>>  }
>>>
>>> -void MutationDispatcher::AddWordToAutoDictionary(const Unit &Word,
>>> +void MutationDispatcher::AddWordToAutoDictionary(const Word &W,
>>>                                                   size_t PositionHint) {
>>>    static const size_t kMaxAutoDictSize = 1 << 14;
>>>    if (MDImpl->TempAutoDictionary.size() >= kMaxAutoDictSize) return;
>>> -  MDImpl->TempAutoDictionary.push_back({Word, PositionHint});
>>> +  MDImpl->TempAutoDictionary.push_back({W, PositionHint});
>>>  }
>>>
>>>  void MutationDispatcher::ClearAutoDictionary() {
>>>
>>> Modified: llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp (original)
>>> +++ llvm/trunk/lib/Fuzzer/FuzzerTraceState.cpp Fri Jan 15 21:53:32 2016
>>> @@ -164,14 +164,10 @@ struct LabelRange {
>>>
>>>  // For now, very simple: put Size bytes of Data at position Pos.
>>>  struct TraceBasedMutation {
>>> -  static const size_t kMaxSize = 28;
>>> -  uint32_t Pos : 24;
>>> -  uint32_t Size : 8;
>>> -  uint8_t  Data[kMaxSize];
>>> +  uint32_t Pos;
>>> +  Word W;
>>>  };
>>>
>>> -const size_t TraceBasedMutation::kMaxSize;
>>> -
>>>  // Declared as static globals for faster checks inside the hooks.
>>>  static bool RecordingTraces = false;
>>>  static bool RecordingMemcmp = false;
>>> @@ -223,12 +219,11 @@ class TraceState {
>>>      RecordingMemcmp = false;
>>>      for (size_t i = 0; i < NumMutations; i++) {
>>>        auto &M = Mutations[i];
>>> -      Unit U(M.Data, M.Data + M.Size);
>>>        if (Options.Verbosity >= 2) {
>>> -        AutoDictUnitCounts[U]++;
>>> +        AutoDictUnitCounts[M.W]++;
>>>          AutoDictAdds++;
>>>          if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
>>> -          typedef std::pair<size_t, Unit> CU;
>>> +          typedef std::pair<size_t, Word> CU;
>>>            std::vector<CU> CountedUnits;
>>>            for (auto &I : AutoDictUnitCounts)
>>>              CountedUnits.push_back(std::make_pair(I.second, I.first));
>>> @@ -242,17 +237,15 @@ class TraceState {
>>>            }
>>>          }
>>>        }
>>> -      USF.GetMD().AddWordToAutoDictionary(U, M.Pos);
>>> +      USF.GetMD().AddWordToAutoDictionary(M.W, M.Pos);
>>>      }
>>>    }
>>>
>>>    void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
>>>      if (NumMutations >= kMaxMutations) return;
>>> -    assert(Size <= TraceBasedMutation::kMaxSize);
>>>      auto &M = Mutations[NumMutations++];
>>>      M.Pos = Pos;
>>> -    M.Size = Size;
>>> -    memcpy(M.Data, Data, Size);
>>> +    M.W.Set(Data, Size);
>>>    }
>>>
>>>    void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
>>> @@ -274,7 +267,7 @@ class TraceState {
>>>    const Fuzzer::FuzzingOptions &Options;
>>>    uint8_t **CurrentUnitData;
>>>    size_t *CurrentUnitSize;
>>> -  std::map<Unit, size_t> AutoDictUnitCounts;
>>> +  std::map<Word, size_t> AutoDictUnitCounts;
>>>    size_t AutoDictAdds = 0;
>>>    static thread_local bool IsMyThread;
>>>  };
>>> @@ -423,7 +416,7 @@ void TraceState::TraceCmpCallback(uintpt
>>>  void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t
>>> *Data1,
>>>                                       const uint8_t *Data2) {
>>>    if (!RecordingMemcmp || !IsMyThread) return;
>>> -  CmpSize = std::min(CmpSize, TraceBasedMutation::kMaxSize);
>>> +  CmpSize = std::min(CmpSize, Word::GetMaxSize());
>>>    int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
>>>    int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
>>>    if ((Added1 || Added2) && Options.Verbosity >= 3) {
>>>
>>> Modified: llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp (original)
>>> +++ llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp Fri Jan 15 21:53:32 2016
>>> @@ -49,10 +49,12 @@ void PrintASCII(const uint8_t *Data, siz
>>>    Printf("%s", PrintAfter);
>>>  }
>>>
>>> +void PrintASCII(const Word &W, const char *PrintAfter) {
>>> +  PrintASCII(W.data(), W.size(), PrintAfter);
>>> +}
>>> +
>>>  void PrintASCII(const Unit &U, const char *PrintAfter) {
>>> -  for (auto X : U)
>>> -    PrintASCIIByte(X);
>>> -  Printf("%s", PrintAfter);
>>> +  PrintASCII(U.data(), U.size(), PrintAfter);
>>>  }
>>>
>>>  std::string Hash(const Unit &U) {
>>>
>>> Modified: llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp?rev=257985&r1=257984&r2=257985&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp (original)
>>> +++ llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp Fri Jan 15 21:53:32
>>> 2016
>>> @@ -247,8 +247,8 @@ void TestAddWordFromDictionary(Mutator M
>>>    MutationDispatcher MD(Rand);
>>>    uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD};
>>>    uint8_t Word2[3] = {0xFF, 0xEE, 0xEF};
>>> -  MD.AddWordToManualDictionary(Unit(Word1, Word1 + sizeof(Word1)));
>>> -  MD.AddWordToManualDictionary(Unit(Word2, Word2 + sizeof(Word2)));
>>> +  MD.AddWordToManualDictionary(Word(Word1, sizeof(Word1)));
>>> +  MD.AddWordToManualDictionary(Word(Word2, sizeof(Word2)));
>>>    int FoundMask = 0;
>>>    uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD};
>>>    uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22};
>>> @@ -285,16 +285,16 @@ TEST(FuzzerMutate, AddWordFromDictionary
>>>  void TestAddWordFromDictionaryWithHint(Mutator M, int NumIter) {
>>>    FuzzerRandomLibc Rand(0);
>>>    MutationDispatcher MD(Rand);
>>> -  uint8_t Word[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xFF, 0xEE, 0xEF};
>>> +  uint8_t W[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xFF, 0xEE, 0xEF};
>>>    size_t PosHint = 7777;
>>> -  MD.AddWordToAutoDictionary(Unit(Word, Word + sizeof(Word)), PosHint);
>>> +  MD.AddWordToAutoDictionary(Word(W, sizeof(W)), PosHint);
>>>    int FoundMask = 0;
>>>    for (int i = 0; i < NumIter; i++) {
>>>      uint8_t T[10000];
>>>      memset(T, 0, sizeof(T));
>>>      size_t NewSize = (MD.*M)(T, 9000, 10000);
>>> -    if (NewSize >= PosHint + sizeof(Word) &&
>>> -        !memcmp(Word, T + PosHint, sizeof(Word)))
>>> +    if (NewSize >= PosHint + sizeof(W) &&
>>> +        !memcmp(W, T + PosHint, sizeof(W)))
>>>        FoundMask = 1;
>>>    }
>>>    EXPECT_EQ(FoundMask, 1);
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160119/ab35464f/attachment.html>


More information about the llvm-commits mailing list