[llvm] bc277eb - [FuzzMutate] New InsertCFGStrategy

Peter Rong via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 12 15:21:12 PST 2022


Author: Peter Rong
Date: 2022-12-12T15:21:07-08:00
New Revision: bc277eb16b253ee4c6a2151ba5c1a20747526a47

URL: https://github.com/llvm/llvm-project/commit/bc277eb16b253ee4c6a2151ba5c1a20747526a47
DIFF: https://github.com/llvm/llvm-project/commit/bc277eb16b253ee4c6a2151ba5c1a20747526a47.diff

LOG: [FuzzMutate] New InsertCFGStrategy

Mutating CFG is hard as we have to maintain dominator relations.
We avoid this problem by inserting a CFG into a splitted block.

switch, ret, and br instructions are generated.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D139067

Added: 
    

Modified: 
    llvm/include/llvm/FuzzMutate/IRMutator.h
    llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
    llvm/lib/FuzzMutate/CMakeLists.txt
    llvm/lib/FuzzMutate/IRMutator.cpp
    llvm/lib/FuzzMutate/RandomIRBuilder.cpp
    llvm/unittests/FuzzMutate/StrategiesTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/FuzzMutate/IRMutator.h b/llvm/include/llvm/FuzzMutate/IRMutator.h
index 0c266f60b4cba..9a1f7f002b90d 100644
--- a/llvm/include/llvm/FuzzMutate/IRMutator.h
+++ b/llvm/include/llvm/FuzzMutate/IRMutator.h
@@ -118,6 +118,26 @@ class InstModificationIRStrategy : public IRMutationStrategy {
   void mutate(Instruction &Inst, RandomIRBuilder &IB) override;
 };
 
+/// Strategy to split a random block and insert a random CFG in between.
+class InsertCFGStrategy : public IRMutationStrategy {
+private:
+  uint64_t MaxNumCases;
+  enum CFGToSink { Return, DirectSink, SinkOrSelfLoop, EndOfCFGToLink };
+
+public:
+  InsertCFGStrategy(uint64_t MNC = 8) : MaxNumCases(MNC){};
+  uint64_t getWeight(size_t CurrentSize, size_t MaxSize,
+                     uint64_t CurrentWeight) override {
+    return 5;
+  }
+
+  void mutate(BasicBlock &BB, RandomIRBuilder &IB) override;
+
+private:
+  void connectBlocksToSink(ArrayRef<BasicBlock *> Blocks, BasicBlock *Sink,
+                           RandomIRBuilder &IB);
+};
+
 /// Strategy to insert PHI Nodes at the head of each basic block.
 class InsertPHIStrategy : public IRMutationStrategy {
 public:

diff  --git a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
index d91333eb29e69..a7048f6def896 100644
--- a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
+++ b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
@@ -48,10 +48,12 @@ struct RandomIRBuilder {
   /// values in \c Srcs should be source operands that have already been
   /// selected.
   Value *findOrCreateSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
-                            ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred);
+                            ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred,
+                            bool allowConstant = true);
   /// Create some Value suitable as a source for some operation.
   Value *newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
-                   ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred);
+                   ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred,
+                   bool allowConstant = true);
   /// Find a viable user for \c V in \c Insts, which should all be contained in
   /// \c BB. This may also create some new instruction in \c BB and use that.
   void connectToSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);

diff  --git a/llvm/lib/FuzzMutate/CMakeLists.txt b/llvm/lib/FuzzMutate/CMakeLists.txt
index 0aea7fe0d8f0f..e41f8eeb28439 100644
--- a/llvm/lib/FuzzMutate/CMakeLists.txt
+++ b/llvm/lib/FuzzMutate/CMakeLists.txt
@@ -33,4 +33,5 @@ add_llvm_component_library(LLVMFuzzMutate
   Scalar
   Support
   Target
+  TransformUtils
   )

diff  --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp
index a3407ab408c15..cb4717c6d30f8 100644
--- a/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Transforms/Scalar/DCE.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 
@@ -299,6 +300,136 @@ void InstModificationIRStrategy::mutate(Instruction &Inst,
     RS.getSelection()();
 }
 
+/// Return a case value that is not already taken to make sure we don't have two
+/// cases with same value.
+static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
+                                   uint64_t MaxValue, RandomIRBuilder &IB) {
+  uint64_t tmp;
+  do {
+    tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
+  } while (CasesTaken.count(tmp) != 0);
+  CasesTaken.insert(tmp);
+  return tmp;
+}
+
+void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
+  SmallVector<Instruction *, 32> Insts;
+  for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
+    Insts.push_back(&*I);
+  if (Insts.size() < 1)
+    return;
+
+  // Choose a point where we split the block.
+  uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
+  auto InstsBeforeSplit = makeArrayRef(Insts).slice(0, IP);
+
+  // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
+  // directly jumps to `Sink`. Here, we have to create a new terminator for
+  // `Source`.
+  BasicBlock *Block = Insts[IP]->getParent();
+  BasicBlock *Source = Block;
+  BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
+
+  Function *F = BB.getParent();
+  LLVMContext &C = F->getParent()->getContext();
+  // A coin decides if it is branch or switch
+  if (uniform<uint64_t>(IB.Rand, 0, 1)) {
+    // Branch
+    BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
+    BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
+    Value *Cond =
+        IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
+                              fuzzerop::onlyType(Type::getInt1Ty(C)), false);
+    BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
+    // Remove the old terminator.
+    ReplaceInstWithInst(Source->getTerminator(), Branch);
+    // Connect these blocks to `Sink`
+    connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
+  } else {
+    // Switch
+    // Determine Integer type, it IS possible we use a boolean to switch.
+    auto RS =
+        makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
+                      return Ty->isIntegerTy();
+                    }));
+    assert(RS && "There is no integer type in all allowed types, is the "
+                 "setting correct?");
+    Type *Ty = RS.getSelection();
+    IntegerType *IntTy = cast<IntegerType>(Ty);
+
+    uint64_t BitSize = IntTy->getBitWidth();
+    uint64_t MaxCaseVal =
+        (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
+    // Create Switch inst in Block
+    Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
+                                        fuzzerop::onlyType(IntTy), false);
+    BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
+    uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
+    NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
+    SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
+    // Remove the old terminator.
+    ReplaceInstWithInst(Source->getTerminator(), Switch);
+
+    // Create blocks, for each block assign a case value.
+    SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
+    SmallSet<uint64_t, 4> CasesTaken;
+    for (uint64_t i = 0; i < NumCases; i++) {
+      uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
+      BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
+      ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
+      Switch->addCase(OnValue, CaseBlock);
+      Blocks.push_back(CaseBlock);
+    }
+
+    // Connect these blocks to `Sink`
+    connectBlocksToSink(Blocks, Sink, IB);
+  }
+}
+
+/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
+/// even have terminator.
+void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
+                                            BasicBlock *Sink,
+                                            RandomIRBuilder &IB) {
+  uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
+  for (uint64_t i = 0; i < Blocks.size(); i++) {
+    // We have at least one block that directly goes to sink.
+    CFGToSink ToSink = (i == DirectSinkIdx)
+                           ? CFGToSink::DirectSink
+                           : static_cast<CFGToSink>(uniform<uint64_t>(
+                                 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
+    BasicBlock *BB = Blocks[i];
+    Function *F = BB->getParent();
+    LLVMContext &C = F->getParent()->getContext();
+    switch (ToSink) {
+    case CFGToSink::Return: {
+      Type *RetTy = F->getReturnType();
+      Value *RetValue = nullptr;
+      if (!RetTy->isVoidTy())
+        RetValue =
+            IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
+      ReturnInst::Create(C, RetValue, BB);
+      break;
+    }
+    case CFGToSink::DirectSink: {
+      BranchInst::Create(Sink, BB);
+      break;
+    }
+    case CFGToSink::SinkOrSelfLoop: {
+      SmallVector<BasicBlock *, 2> Branches({Sink, BB});
+      // A coin decides which block is true branch.
+      uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
+      Value *Cond = IB.findOrCreateSource(
+          *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
+      BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
+      break;
+    }
+    case CFGToSink::EndOfCFGToLink:
+      llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
+    }
+  }
+}
+
 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
   // Can't insert PHI node to entry node.
   if (&BB == &BB.getParent()->getEntryBlock())

diff  --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index d3574e1770d3b..bb9f91d0bb375 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -12,6 +12,7 @@
 #include "llvm/FuzzMutate/Random.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 
@@ -26,7 +27,8 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
                                            ArrayRef<Instruction *> Insts,
                                            ArrayRef<Value *> Srcs,
-                                           SourcePred Pred) {
+                                           SourcePred Pred,
+                                           bool allowConstant) {
   auto MatchesPred = [&Srcs, &Pred](Instruction *Inst) {
     return Pred.matches(Srcs, Inst);
   };
@@ -35,11 +37,12 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
   RS.sample(nullptr, /*Weight=*/1);
   if (Instruction *Src = RS.getSelection())
     return Src;
-  return newSource(BB, Insts, Srcs, Pred);
+  return newSource(BB, Insts, Srcs, Pred, allowConstant);
 }
 
 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
-                                  ArrayRef<Value *> Srcs, SourcePred Pred) {
+                                  ArrayRef<Value *> Srcs, SourcePred Pred,
+                                  bool allowConstant) {
   // Generate some constants to choose from.
   auto RS = makeSampler<Value *>(Rand);
   RS.sample(Pred.generate(Srcs, KnownTypes));
@@ -66,8 +69,26 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
       NewLoad->eraseFromParent();
   }
 
-  assert(!RS.isEmpty() && "Failed to generate sources");
-  return RS.getSelection();
+  Value *newSrc = RS.getSelection();
+  // Generate a stack alloca and store the constant to it if constant is not
+  // allowed, our hope is that later mutations can generate some values and
+  // store to this placeholder.
+  if (!allowConstant && isa<Constant>(newSrc)) {
+    Type *Ty = newSrc->getType();
+    Function *F = BB.getParent();
+    BasicBlock *EntryBB = &F->getEntryBlock();
+    /// TODO: For all Allocas, maybe allocate an array.
+    DataLayout DL(BB.getParent()->getParent());
+    AllocaInst *Alloca = new AllocaInst(Ty, DL.getProgramAddressSpace(), "A",
+                                        EntryBB->getTerminator());
+    new StoreInst(newSrc, Alloca, EntryBB->getTerminator());
+    if (BB.getTerminator()) {
+      newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator());
+    } else {
+      newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
+    }
+  }
+  return newSrc;
 }
 
 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,

diff  --git a/llvm/unittests/FuzzMutate/StrategiesTest.cpp b/llvm/unittests/FuzzMutate/StrategiesTest.cpp
index 3be9d565d1547..68a7676e28963 100644
--- a/llvm/unittests/FuzzMutate/StrategiesTest.cpp
+++ b/llvm/unittests/FuzzMutate/StrategiesTest.cpp
@@ -311,8 +311,41 @@ TEST(InstModificationIRStrategyTest, DidntShuffleFRem) {
   VerfyDivDidntShuffle(Source);
 }
 
-TEST(InsertPHIStrategy, PHI) {
+template <class Strategy>
+static void mutateAndVerifyModule(StringRef Source, int repeat = 100) {
   LLVMContext Ctx;
+  auto Mutator = createMutator<Strategy>();
+  ASSERT_TRUE(Mutator);
+
+  auto M = parseAssembly(Source.data(), Ctx);
+  std::mt19937 mt(Seed);
+  std::uniform_int_distribution<int> RandInt(INT_MIN, INT_MAX);
+  for (int i = 0; i < repeat; i++) {
+    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 1024);
+    ASSERT_FALSE(verifyModule(*M, &errs()));
+  }
+}
+
+TEST(InsertCFGStrategy, CFG) {
+  StringRef Source = "\n\
+      define i32 @test(i1 %C1, i1 %C2, i1 %C3, i16 %S1, i16 %S2, i32 %I1) { \n\
+        Entry:  \n\
+         %I2 = add i32 %I1, 1 \n\
+          %C = and i1 %C1, %C2 \n\
+        br label %Body \n\
+        Body: \n\
+         %IB = add i32 %I1, %I2 \n\
+          %CB = and i1 %C1, %C \n\
+        br label %Exit \n\
+        Exit: \n\
+          %IE = add i32 %IB, %I2 \n\
+          %CE = and i1 %CB, %C \n\
+         ret i32 %IE \n\
+      }";
+  mutateAndVerifyModule<InsertCFGStrategy>(Source);
+}
+
+TEST(InsertPHIStrategy, PHI) {
   StringRef Source = "\n\
         define void @test(i1 %C1, i1 %C2, i32 %I, double %FP) { \n\
         Entry:  \n\
@@ -339,16 +372,7 @@ TEST(InsertPHIStrategy, PHI) {
         Exit: ; pred Entry, OnOne \n\
           ret void \n\
         }";
-  auto Mutator = createMutator<InsertPHIStrategy>();
-  ASSERT_TRUE(Mutator);
-
-  auto M = parseAssembly(Source.data(), Ctx);
-  std::mt19937 mt(Seed);
-  std::uniform_int_distribution<int> RandInt(INT_MIN, INT_MAX);
-  for (int i = 0; i < 100; i++) {
-    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 1024);
-    ASSERT_FALSE(verifyModule(*M, &errs()));
-  }
+  mutateAndVerifyModule<InsertPHIStrategy>(Source);
 }
 
 TEST(InsertPHIStrategy, PHIWithSameIncomingBlock) {
@@ -378,7 +402,6 @@ TEST(InsertPHIStrategy, PHIWithSameIncomingBlock) {
 }
 
 TEST(SinkInstructionStrategy, Operand) {
-  LLVMContext Ctx;
   StringRef Source = "\n\
       define i32 @test(i1 %C1, i1 %C2, i1 %C3, i32 %I, i32 %J) { \n\
         Entry:  \n\
@@ -397,16 +420,7 @@ TEST(SinkInstructionStrategy, Operand) {
         Exit:  \n\
           ret i32 %I  \n\
       }";
-  auto Mutator = createMutator<SinkInstructionStrategy>();
-  ASSERT_TRUE(Mutator);
-
-  auto M = parseAssembly(Source.data(), Ctx);
-  std::mt19937 mt(Seed);
-  std::uniform_int_distribution<int> RandInt(INT_MIN, INT_MAX);
-  for (int i = 0; i < 100; i++) {
-    Mutator->mutateModule(*M, RandInt(mt), Source.size(), Source.size() + 1024);
-    EXPECT_FALSE(verifyModule(*M, &errs()));
-  }
+  mutateAndVerifyModule<SinkInstructionStrategy>(Source);
 }
 
 static void VerifyBlockShuffle(StringRef Source) {


        


More information about the llvm-commits mailing list