[llvm] 39b6a7f - [FuzzMutate] Module size heuristics

Peter Rong via llvm-commits llvm-commits at lists.llvm.org
Tue May 9 13:58:16 PDT 2023


Author: Zhenkai Weng
Date: 2023-05-09T13:58:10-07:00
New Revision: 39b6a7f06ea970db6b09932a4582376fba71f6b9

URL: https://github.com/llvm/llvm-project/commit/39b6a7f06ea970db6b09932a4582376fba71f6b9
DIFF: https://github.com/llvm/llvm-project/commit/39b6a7f06ea970db6b09932a4582376fba71f6b9.diff

LOG: [FuzzMutate] Module size heuristics

IRMutation::mutateModule() currently requires the bitcode size of the module.
To compute the bitcode size, one way is to write the module to a buffer using
BitcodeWriter and calculating the buffer size. This would be fine for a single
mutation, but infeasible for repeated mutations due to the large overhead. It
turns out that the only IR strategy weight calculation method that depends on
the current module size is InstDeleterStrategy, which deletes instructions more
frequently as the module size approaches a given max size. However, there is no
real need for the size to be in bytes of bitcode, so we can use a different
metric. One alternative is to let the size be the number of objects in the
Module, including instructions, basic blocks, globals, and aliases. Although
getting the number of instructions is still O(n), it should have significantly
less overhead than BitcodeWriter. This suggestion would cause a change to the
IRMutator API, since IRMutator::mutateModule() can calculate the Module size
itself.

Reviewed By: Peter

Differential Revision: https://reviews.llvm.org/D149989

Added: 
    

Modified: 
    llvm/include/llvm/FuzzMutate/IRMutator.h
    llvm/lib/FuzzMutate/IRMutator.cpp
    llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
    llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
    llvm/unittests/FuzzMutate/StrategiesTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/FuzzMutate/IRMutator.h b/llvm/include/llvm/FuzzMutate/IRMutator.h
index e01b91106fec1..dd4534bd9d1a8 100644
--- a/llvm/include/llvm/FuzzMutate/IRMutator.h
+++ b/llvm/include/llvm/FuzzMutate/IRMutator.h
@@ -70,7 +70,19 @@ class IRMutator {
       : AllowedTypes(std::move(AllowedTypes)),
         Strategies(std::move(Strategies)) {}
 
-  void mutateModule(Module &M, int Seed, size_t CurSize, size_t MaxSize);
+  /// Calculate the size of module as the number of objects in it, i.e.
+  /// instructions, basic blocks, functions, and aliases.
+  ///
+  /// \param M module
+  /// \return number of objects in module
+  static size_t getModuleSize(const Module &M);
+
+  /// Mutate given module. No change will be made if no strategy is selected.
+  ///
+  /// \param M  module to mutate
+  /// \param Seed seed for random mutation
+  /// \param MaxSize max module size (see getModuleSize)
+  void mutateModule(Module &M, int Seed, size_t MaxSize);
 };
 
 /// Strategy that injects operations into the function.

diff  --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp
index 37a26a8397f2f..90dd532c33c4f 100644
--- a/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -56,17 +56,23 @@ void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
   mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
 }
 
-void IRMutator::mutateModule(Module &M, int Seed, size_t CurSize,
-                             size_t MaxSize) {
+size_t llvm::IRMutator::getModuleSize(const Module &M) {
+  return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
+}
+
+void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
   std::vector<Type *> Types;
   for (const auto &Getter : AllowedTypes)
     Types.push_back(Getter(M.getContext()));
   RandomIRBuilder IB(Seed, Types);
 
+  size_t CurSize = IRMutator::getModuleSize(M);
   auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
   for (const auto &Strategy : Strategies)
     RS.sample(Strategy.get(),
               Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
+  if (RS.totalWeight() == 0)
+    return;
   auto Strategy = RS.getSelection();
 
   Strategy->mutate(M, IB);

diff  --git a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
index dbcdba730d2d3..eda165d55b0be 100644
--- a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
+++ b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
@@ -45,7 +45,7 @@ static cl::opt<char>
              cl::Prefix, cl::init('2'));
 
 static cl::opt<std::string>
-TargetTriple("mtriple", cl::desc("Override target triple for module"));
+    TargetTriple("mtriple", cl::desc("Override target triple for module"));
 
 static std::unique_ptr<TargetMachine> TM;
 static std::unique_ptr<IRMutator> Mutator;
@@ -73,7 +73,7 @@ extern "C" LLVM_ATTRIBUTE_USED size_t LLVMFuzzerCustomMutator(
   else
     M = parseModule(Data, Size, Context);
 
-  Mutator->mutateModule(*M, Seed, Size, MaxSize);
+  Mutator->mutateModule(*M, Seed, MaxSize); // use max bitcode size as a guide
 
   return writeModule(*M, Data, MaxSize);
 }

diff  --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
index 9473d6f3fc7ad..d952ea1b06d99 100644
--- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
+++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
@@ -44,11 +44,9 @@ std::unique_ptr<IRMutator> createOptMutator() {
       Type::getInt64Ty, Type::getFloatTy, Type::getDoubleTy};
 
   std::vector<std::unique_ptr<IRMutationStrategy>> Strategies;
-  Strategies.push_back(
-      std::make_unique<InjectorIRStrategy>(
-          InjectorIRStrategy::getDefaultOps()));
-  Strategies.push_back(
-      std::make_unique<InstDeleterIRStrategy>());
+  Strategies.push_back(std::make_unique<InjectorIRStrategy>(
+      InjectorIRStrategy::getDefaultOps()));
+  Strategies.push_back(std::make_unique<InstDeleterIRStrategy>());
   Strategies.push_back(std::make_unique<InstModificationIRStrategy>());
 
   return std::make_unique<IRMutator>(std::move(Types), std::move(Strategies));
@@ -58,7 +56,7 @@ extern "C" LLVM_ATTRIBUTE_USED size_t LLVMFuzzerCustomMutator(
     uint8_t *Data, size_t Size, size_t MaxSize, unsigned int Seed) {
 
   assert(Mutator &&
-      "IR mutator should have been created during fuzzer initialization");
+         "IR mutator should have been created during fuzzer initialization");
 
   LLVMContext Context;
   auto M = parseAndVerify(Data, Size, Context);
@@ -67,7 +65,7 @@ extern "C" LLVM_ATTRIBUTE_USED size_t LLVMFuzzerCustomMutator(
     return 0;
   }
 
-  Mutator->mutateModule(*M, Seed, Size, MaxSize);
+  Mutator->mutateModule(*M, Seed, MaxSize);
 
   if (verifyModule(*M, &errs())) {
     errs() << "mutation result doesn't pass verification\n";
@@ -77,7 +75,7 @@ extern "C" LLVM_ATTRIBUTE_USED size_t LLVMFuzzerCustomMutator(
     // Avoid adding incorrect test cases to the corpus.
     return 0;
   }
-  
+
   std::string Buf;
   {
     raw_string_ostream OS(Buf);
@@ -85,15 +83,15 @@ extern "C" LLVM_ATTRIBUTE_USED size_t LLVMFuzzerCustomMutator(
   }
   if (Buf.size() > MaxSize)
     return 0;
-  
+
   // There are some invariants which are not checked by the verifier in favor
   // of having them checked by the parser. They may be considered as bugs in the
   // verifier and should be fixed there. However until all of those are covered
   // we want to check for them explicitly. Otherwise we will add incorrect input
-  // to the corpus and this is going to confuse the fuzzer which will start 
+  // to the corpus and this is going to confuse the fuzzer which will start
   // exploration of the bitcode reader error handling code.
-  auto NewM = parseAndVerify(
-      reinterpret_cast<const uint8_t*>(Buf.data()), Buf.size(), Context);
+  auto NewM = parseAndVerify(reinterpret_cast<const uint8_t *>(Buf.data()),
+                             Buf.size(), Context);
   if (!NewM) {
     errs() << "mutator failed to re-read the module\n";
 #ifndef NDEBUG
@@ -174,8 +172,8 @@ static void handleLLVMFatalError(void *, const char *Message, bool) {
   abort();
 }
 
-extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(
-    int *argc, char ***argv) {
+extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
+                                                        char ***argv) {
   EnableDebugBuffering = true;
 
   // Make sure we print the summary and the current unit when LLVM errors out.

diff  --git a/llvm/unittests/FuzzMutate/StrategiesTest.cpp b/llvm/unittests/FuzzMutate/StrategiesTest.cpp
index 850ae16dea442..b89ca106cc76a 100644
--- a/llvm/unittests/FuzzMutate/StrategiesTest.cpp
+++ b/llvm/unittests/FuzzMutate/StrategiesTest.cpp
@@ -84,7 +84,7 @@ void IterateOnSource(StringRef Source, IRMutator &Mutator) {
     auto M = parseAssembly(Source.data(), Ctx);
     ASSERT_TRUE(M && !verifyModule(*M, &errs()));
 
-    Mutator.mutateModule(*M, Seed, Source.size(), Source.size() + 100);
+    Mutator.mutateModule(*M, Seed, IRMutator::getModuleSize(*M) + 100);
     EXPECT_TRUE(!verifyModule(*M, &errs()));
   }
 }
@@ -97,7 +97,7 @@ static void mutateAndVerifyModule(StringRef Source,
   std::mt19937 mt(Seed);
   std::uniform_int_distribution<int> RandInt(INT_MIN, INT_MAX);
   for (int i = 0; i < repeat; i++) {
-    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 1024);
+    Mutator->mutateModule(*M, RandInt(mt), IRMutator::getModuleSize(*M) + 1024);
     ASSERT_FALSE(verifyModule(*M, &errs()));
   }
 }
@@ -118,7 +118,7 @@ TEST(InjectorIRStrategyTest, EmptyModule) {
   auto Mutator = createInjectorMutator();
   ASSERT_TRUE(Mutator);
 
-  Mutator->mutateModule(*M, Seed, 1, 1);
+  Mutator->mutateModule(*M, Seed, IRMutator::getModuleSize(*M) + 1);
   EXPECT_TRUE(!verifyModule(*M, &errs()));
 }
 
@@ -194,7 +194,7 @@ static void checkModifyNoUnsignedAndNoSignedWrap(StringRef Opc) {
   bool FoundNUW = false;
   bool FoundNSW = false;
   for (int i = 0; i < 100; ++i) {
-    Mutator->mutateModule(*M, Seed + i, Source.size(), Source.size() + 100);
+    Mutator->mutateModule(*M, Seed + i, IRMutator::getModuleSize(*M) + 100);
     EXPECT_TRUE(!verifyModule(*M, &errs()));
     FoundNUW |= AddI->hasNoUnsignedWrap();
     FoundNSW |= AddI->hasNoSignedWrap();
@@ -237,7 +237,7 @@ TEST(InstModificationIRStrategyTest, ICmp) {
   ASSERT_TRUE(M && !verifyModule(*M, &errs()));
   bool FoundNE = false;
   for (int i = 0; i < 100; ++i) {
-    Mutator->mutateModule(*M, Seed + i, Source.size(), Source.size() + 100);
+    Mutator->mutateModule(*M, Seed + i, IRMutator::getModuleSize(*M) + 100);
     EXPECT_TRUE(!verifyModule(*M, &errs()));
     FoundNE |= CI->getPredicate() == CmpInst::ICMP_NE;
   }
@@ -262,7 +262,7 @@ TEST(InstModificationIRStrategyTest, FCmp) {
   ASSERT_TRUE(M && !verifyModule(*M, &errs()));
   bool FoundONE = false;
   for (int i = 0; i < 100; ++i) {
-    Mutator->mutateModule(*M, Seed + i, Source.size(), Source.size() + 100);
+    Mutator->mutateModule(*M, Seed + i, IRMutator::getModuleSize(*M) + 100);
     EXPECT_TRUE(!verifyModule(*M, &errs()));
     FoundONE |= CI->getPredicate() == CmpInst::FCMP_ONE;
   }
@@ -287,7 +287,7 @@ TEST(InstModificationIRStrategyTest, GEP) {
   ASSERT_TRUE(M && !verifyModule(*M, &errs()));
   bool FoundInbounds = false;
   for (int i = 0; i < 100; ++i) {
-    Mutator->mutateModule(*M, Seed + i, Source.size(), Source.size() + 100);
+    Mutator->mutateModule(*M, Seed + i, IRMutator::getModuleSize(*M) + 100);
     EXPECT_TRUE(!verifyModule(*M, &errs()));
     FoundInbounds |= GEP->isInBounds();
   }
@@ -311,7 +311,7 @@ void VerfyOperandShuffled(StringRef Source, std::pair<int, int> ShuffleItems) {
   ASSERT_TRUE(Inst->getOperand(ShuffleItems.second) ==
               dyn_cast<Value>(F.getArg(ShuffleItems.second)));
 
-  Mutator->mutateModule(*M, 0, Source.size(), Source.size() + 100);
+  Mutator->mutateModule(*M, 0, IRMutator::getModuleSize(*M) + 100);
   ASSERT_TRUE(!verifyModule(*M, &errs()));
 
   ASSERT_TRUE(Inst->getOperand(ShuffleItems.first) ==
@@ -350,7 +350,7 @@ void VerfyDivDidntShuffle(StringRef Source) {
   EXPECT_TRUE(isa<Constant>(Inst->getOperand(0)));
   EXPECT_TRUE(Inst->getOperand(1) == dyn_cast<Value>(F.getArg(0)));
 
-  Mutator->mutateModule(*M, Seed, Source.size(), Source.size() + 100);
+  Mutator->mutateModule(*M, Seed, IRMutator::getModuleSize(*M) + 100);
   EXPECT_TRUE(!verifyModule(*M, &errs()));
 
   // Didn't shuffle.
@@ -383,7 +383,7 @@ TEST(FunctionIRStrategy, Func) {
   auto M = parseAssembly(Source, Ctx);
   srand(Seed);
   for (int i = 0; i < 100; i++) {
-    Mutator->mutateModule(*M, rand(), 0, 1024);
+    Mutator->mutateModule(*M, rand(), 1024);
     EXPECT_TRUE(!verifyModule(*M, &errs()));
   }
 }
@@ -406,7 +406,7 @@ TEST(InstModificationIRStrategy, Exact) {
   BinaryOperator *AShr = cast<BinaryOperator>(&*F.begin()->begin());
   bool FoundExact = false;
   for (int i = 0; i < 100; ++i) {
-    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 100);
+    Mutator->mutateModule(*M, RandInt(mt), IRMutator::getModuleSize(*M) + 100);
     ASSERT_FALSE(verifyModule(*M, &errs()));
     FoundExact |= AShr->isExact();
   }
@@ -453,7 +453,7 @@ TEST(InstModificationIRStrategy, FastMath) {
   }
   ASSERT_TRUE(M && !verifyModule(*M, &errs()));
   for (int i = 0; i < 300; ++i) {
-    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 100);
+    Mutator->mutateModule(*M, RandInt(mt), IRMutator::getModuleSize(*M) + 100);
     for (auto p : FPOpsHasFastMath)
       FPOpsHasFastMath[p.first] |= p.first->getFastMathFlags().any();
     ASSERT_FALSE(verifyModule(*M, &errs()));
@@ -573,7 +573,7 @@ static void VerifyBlockShuffle(StringRef Source) {
   std::mt19937 mt(Seed);
   std::uniform_int_distribution<int> RandInt(INT_MIN, INT_MAX);
   for (int i = 0; i < 100; i++) {
-    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 1024);
+    Mutator->mutateModule(*M, RandInt(mt), IRMutator::getModuleSize(*M) + 1024);
     for (BasicBlock &BB : *F) {
       int PostShuffleIntCnt = BB.size();
       EXPECT_EQ(PostShuffleIntCnt, PreShuffleInstCnt[&BB]);


        


More information about the llvm-commits mailing list