[llvm] [FuzzMutate] Prevent the mutator from generating illegal memory operations (PR #144885)

Manuel Carrasco via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 30 09:07:26 PDT 2025


https://github.com/mgcarrasco updated https://github.com/llvm/llvm-project/pull/144885

>From f99da92a6d404bdccebf78e7167e519d9eb8482e Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Thu, 19 Jun 2025 03:10:44 -0700
Subject: [PATCH 1/3] [llvm][FuzzMutate] Legally load values from memory for
 AMDGCN.

---
 llvm/lib/FuzzMutate/RandomIRBuilder.cpp | 60 ++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 5e793997aee6e..054fefee8508a 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -108,6 +108,40 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
   return findOrCreateSource(BB, Insts, {}, anyType());
 }
 
+// Some architectures do not allow arbitrary load instructions on any sort of
+// pointer. This function takes into account the target triple and legally
+// loads a value from memory.
+static std::pair<Instruction *, SmallVector<Instruction *>>
+buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
+                     const Twine &LoadName) {
+  SmallVector<Instruction *> NewInsts;
+  Value *LoadPtr = Ptr;
+
+  if (M && M->getTargetTriple().isAMDGCN()) {
+    // Check if we should perform an address space cast
+    PointerType *pointerType = dyn_cast<PointerType>(Ptr->getType());
+    if (pointerType && pointerType->getAddressSpace() == 8) {
+      // Perform address space cast from address space 8 to address space 7
+      auto NewPtr = new AddrSpaceCastInst(
+          Ptr, PointerType::get(M->getContext(), 7), LoadName + ".ASC", IP);
+      NewInsts.push_back(NewPtr);
+      LoadPtr = NewPtr;
+    }
+  }
+
+  Instruction *L = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
+  NewInsts.push_back(L);
+
+  return std::make_pair(L, NewInsts);
+}
+
+static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
+  // Remove in reverse order (uses before defs)
+  for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) {
+    (*it)->eraseFromParent();
+  }
+}
+
 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
                                            ArrayRef<Instruction *> Insts,
                                            ArrayRef<Value *> Srcs,
@@ -158,19 +192,20 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
       Module *M = BB.getParent()->getParent();
       auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
       Type *Ty = GV->getValueType();
-      LoadInst *LoadGV = nullptr;
-      if (BB.getTerminator()) {
-        LoadGV = new LoadInst(Ty, GV, "LGV", BB.getFirstInsertionPt());
-      } else {
-        LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
-      }
+      InsertPosition IP = BB.getTerminator()
+                              ? InsertPosition(BB.getFirstInsertionPt())
+                              : InsertPosition(&BB);
+      // Build a legal load and track new instructions in case a rollback is
+      // needed.
+      auto [LoadGV, NewInsts] = buildTargetLegalLoad(Ty, GV, IP, M, "LGV");
       // Because we might be generating new values, we have to check if it
       // matches again.
       if (DidCreate) {
         if (Pred.matches(Srcs, LoadGV)) {
           return LoadGV;
         }
-        LoadGV->eraseFromParent();
+        // Remove newly inserted instructions
+        eraseNewInstructions(NewInsts);
         // If no one is using this GlobalVariable, delete it too.
         if (GV->use_empty()) {
           GV->eraseFromParent();
@@ -208,13 +243,18 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
     }
     // Pick the type independently.
     Type *AccessTy = RS.getSelection()->getType();
-    auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", IP);
+    // Build a legal load and track new instructions in case a rollback is
+    // needed.
+    auto [NewLoad, NewInsts] =
+        buildTargetLegalLoad(AccessTy, Ptr, IP, BB.getModule(), "L");
 
     // Only sample this load if it really matches the descriptor
     if (Pred.matches(Srcs, NewLoad))
       RS.sample(NewLoad, RS.totalWeight());
-    else
-      NewLoad->eraseFromParent();
+    else {
+      // Remove newly inserted instructions
+      eraseNewInstructions(NewInsts);
+    }
   }
 
   Value *newSrc = RS.getSelection();

>From 4da8fdb74c86865ecf590f23ad7b006d3efa2aa1 Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Thu, 19 Jun 2025 04:52:20 -0700
Subject: [PATCH 2/3] [llvm][FuzzMutate] Legally store values to memory for
 AMDGCN.

---
 llvm/lib/FuzzMutate/RandomIRBuilder.cpp | 58 ++++++++++++++++---------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 054fefee8508a..8b034137e4702 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -108,31 +108,46 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
   return findOrCreateSource(BB, Insts, {}, anyType());
 }
 
-// Some architectures do not allow arbitrary load instructions on any sort of
-// pointer. This function takes into account the target triple and legally
-// loads a value from memory.
-static std::pair<Instruction *, SmallVector<Instruction *>>
-buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
-                     const Twine &LoadName) {
-  SmallVector<Instruction *> NewInsts;
-  Value *LoadPtr = Ptr;
-
+// Adapts the current pointer for a legal mem operation on the target arch.
+static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP,
+                                  const Twine &Name,
+                                  SmallVector<Instruction *> *NewInsts) {
   if (M && M->getTargetTriple().isAMDGCN()) {
     // Check if we should perform an address space cast
     PointerType *pointerType = dyn_cast<PointerType>(Ptr->getType());
     if (pointerType && pointerType->getAddressSpace() == 8) {
       // Perform address space cast from address space 8 to address space 7
       auto NewPtr = new AddrSpaceCastInst(
-          Ptr, PointerType::get(M->getContext(), 7), LoadName + ".ASC", IP);
-      NewInsts.push_back(NewPtr);
-      LoadPtr = NewPtr;
+          Ptr, PointerType::get(M->getContext(), 7), Name + ".ASC", IP);
+      if (NewInsts)
+        NewInsts->push_back(NewPtr);
+      return NewPtr;
     }
   }
 
-  Instruction *L = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
-  NewInsts.push_back(L);
+  return Ptr;
+}
+
+// Stores a value to memory, considering the target triple's restrictions.
+static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr,
+                                          InsertPosition IP, Module *M) {
+  Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, "", nullptr);
+  Instruction *Store = new StoreInst(Val, StorePtr, IP);
+  return Store;
+}
+
+// Loads a value from memory, considering the target triple's restrictions.
+static std::pair<Instruction *, SmallVector<Instruction *>>
+buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
+                     const Twine &LoadName) {
+  SmallVector<Instruction *> NewInsts;
+
+  Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, LoadName, &NewInsts);
 
-  return std::make_pair(L, NewInsts);
+  Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
+  NewInsts.push_back(Load);
+
+  return std::make_pair(Load, NewInsts);
 }
 
 static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
@@ -360,8 +375,10 @@ Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
       std::shuffle(Dominators.begin(), Dominators.end(), Rand);
       for (BasicBlock *Dom : Dominators) {
         for (Instruction &I : *Dom) {
-          if (isa<PointerType>(I.getType()))
-            return new StoreInst(V, &I, Insts.back()->getIterator());
+          if (isa<PointerType>(I.getType())) {
+            return buildTargetLegalStore(V, &I, Insts.back()->getIterator(),
+                                         I.getModule());
+          }
         }
       }
       break;
@@ -384,10 +401,10 @@ Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
       /// TODO: allocate a new stack memory.
       return newSink(BB, Insts, V);
     case SinkToGlobalVariable: {
-      Module *M = BB.getParent()->getParent();
+      Module *M = BB.getModule();
       auto [GV, DidCreate] =
           findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
-      return new StoreInst(V, GV, Insts.back()->getIterator());
+      return buildTargetLegalStore(V, GV, Insts.back()->getIterator(), M);
     }
     case EndOfValueSink:
     default:
@@ -409,7 +426,8 @@ Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
     }
   }
 
-  return new StoreInst(V, Ptr, Insts.back()->getIterator());
+  return buildTargetLegalStore(V, Ptr, Insts.back()->getIterator(),
+                               BB.getModule());
 }
 
 Value *RandomIRBuilder::findPointer(BasicBlock &BB,

>From c5b2a5bb4094f710cb7c8aa3fd5fd37162f7f7b5 Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco at amd.com>
Date: Mon, 30 Jun 2025 04:27:16 -0700
Subject: [PATCH 3/3] [llvm][FuzzMutate] Add unit test for checking the
 addrspace.

---
 llvm/unittests/FuzzMutate/StrategiesTest.cpp | 48 ++++++++++++++++++--
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/llvm/unittests/FuzzMutate/StrategiesTest.cpp b/llvm/unittests/FuzzMutate/StrategiesTest.cpp
index 28d950269b49d..18ca0df66864a 100644
--- a/llvm/unittests/FuzzMutate/StrategiesTest.cpp
+++ b/llvm/unittests/FuzzMutate/StrategiesTest.cpp
@@ -89,9 +89,10 @@ void IterateOnSource(StringRef Source, IRMutator &Mutator) {
   }
 }
 
-static void mutateAndVerifyModule(StringRef Source,
-                                  std::unique_ptr<IRMutator> &Mutator,
-                                  int repeat = 100) {
+static void
+mutateAndVerifyModule(StringRef Source, std::unique_ptr<IRMutator> &Mutator,
+                      int repeat = 100,
+                      const std::function<void(Module &)> *Check = nullptr) {
   LLVMContext Ctx;
   auto M = parseAssembly(Source.data(), Ctx);
   std::mt19937 mt(Seed);
@@ -99,13 +100,19 @@ static void mutateAndVerifyModule(StringRef Source,
   for (int i = 0; i < repeat; i++) {
     Mutator->mutateModule(*M, RandInt(mt), IRMutator::getModuleSize(*M) + 1024);
     ASSERT_FALSE(verifyModule(*M, &errs()));
+    if (Check) {
+      (*Check)(*M);
+    }
   }
 }
+
 template <class Strategy>
-static void mutateAndVerifyModule(StringRef Source, int repeat = 100) {
+static void
+mutateAndVerifyModule(StringRef Source, int repeat = 100,
+                      const std::function<void(Module &)> *Check = nullptr) {
   auto Mutator = createMutator<Strategy>();
   ASSERT_TRUE(Mutator);
-  mutateAndVerifyModule(Source, Mutator, repeat);
+  mutateAndVerifyModule(Source, Mutator, repeat, Check);
 }
 
 TEST(InjectorIRStrategyTest, EmptyModule) {
@@ -763,4 +770,35 @@ TEST(AllStrategies, SpecialTerminator) {
   mutateAndVerifyModule<SinkInstructionStrategy>(Source);
 }
 
+TEST(AllStrategies, AMDGCNLegalAddrspace) {
+  StringRef Source = "\n\
+    target triple = \"amdgcn-amd-amdhsa\"\n\
+    ; minimum values required by the fuzzer (e.g., default addrspace for allocas and globals)\n\
+    target datalayout = \"A5-G1\"\n\
+    define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %index) {\n\
+    %desc.int = bitcast <4 x i32> %desc to i128\n\
+    %desc.ptr = inttoptr i128 %desc.int to ptr addrspace(8)\n\
+    ret void\n\
+  }\n\
+  ";
+
+  std::function<void(Module &)> AddrSpaceCheck = [](Module &M) {
+    Function *F = M.getFunction("strict_wwm_amdgpu_cs_main");
+    EXPECT_TRUE(F != nullptr);
+    for (BasicBlock &BB : *F) {
+      for (Instruction &I : BB) {
+        if (StoreInst *S = dyn_cast<StoreInst>(&I)) {
+          EXPECT_TRUE(S->getPointerAddressSpace() != 8);
+        } else if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
+          EXPECT_TRUE(L->getPointerAddressSpace() != 8);
+        }
+      }
+    }
+  };
+
+  int Repeat = 100;
+  mutateAndVerifyModule<SinkInstructionStrategy>(Source, Repeat,
+                                                 &AddrSpaceCheck);
+}
+
 } // namespace



More information about the llvm-commits mailing list