[llvm] [mlir] [Utils][mlir] Fix interaction between CodeExtractor and OpenMPIRBuilder (PR #145051)

Kajetan Puchalski via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 08:41:13 PDT 2025


https://github.com/mrkajetanp updated https://github.com/llvm/llvm-project/pull/145051

>From de6e9161a5d18e09a5f4d87dcc1c191471d9e6da Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Fri, 20 Jun 2025 15:08:59 +0000
Subject: [PATCH 1/2] [Utils][mlir] Fix interaction between CodeExtractor and
 OpenMPIRBuilder

CodeExtractor can currently erroneously insert an alloca into a
different function than it inserts its users into, in cases where code
is being extracted out of a function that has already been outlined.
Fix this by checking that the two blocks being inserted into are
actually in the same function.

OpenMPIRBuilder relies on a callback mechanism to fix-up a module later
on during the finaliser step. In some cases this results in the module
being invalid prior to the finalise step running. Remove calls to
verifyModule wrapped in LLVM_DEBUG from CodeExtractor, as the presence
of those results in the compiler crashing with -mllvm -debug due to
premature module verification where it would not crash without -debug.

Move the call to ompBuilder->finalize() from the destructor for
ModuleTranslation to the end of mlir::translateModuleToLLVMIR, in order
to make sure the module has actually been finalized prior to trying to
verify it.

Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 18 +++---
 .../assumption-cache-invalidation.ll          |  2 +-
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  9 +--
 .../openmp-nested-task-target-parallel.mlir   | 62 +++++++++++++++++++
 4 files changed, 79 insertions(+), 12 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir

diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 1210bdf4a1c98..6d1b8d67fd694 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1563,12 +1563,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
   fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
                                NewValues);
 
-  LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
-    newFunction->dump();
-    report_fatal_error("verification of newFunction failed!");
-  });
-  LLVM_DEBUG(if (verifyFunction(*oldFunction))
-                 report_fatal_error("verification of oldFunction failed!"));
+  LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n");
+  LLVM_DEBUG(newFunction->dump());
+  LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n");
+  LLVM_DEBUG(oldFunction->dump());
   LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
                  report_fatal_error("Stale Asumption cache for old Function!"));
   return newFunction;
@@ -1868,8 +1866,14 @@ CallInst *CodeExtractor::emitReplacerCall(
   // This takes place of the original loop
   BasicBlock *codeReplacer =
       BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP);
+  // In cases with multiple levels of outlining, e.g. with OpenMP,
+  // AllocationBlock may end up in a function different than oldFunction. We
+  // need to make sure we do not use it in those cases, otherwise the alloca
+  // will end up in a different function from its users and break the module.
   BasicBlock *AllocaBlock =
-      AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock();
+      (AllocationBlock && oldFunction == AllocationBlock->getParent())
+          ? AllocationBlock
+          : &oldFunction->getEntryBlock();
 
   // Update the entry count of the function.
   if (BFI)
diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
index 8bc71148352d2..5bc733f5622c7 100644
--- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
+++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
 ; RUN: opt -passes='function(instcombine),hotcoldsplit,function(instsimplify)' %s -o /dev/null
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index e5ca147ea98f8..d8eeac328d59a 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -776,10 +776,7 @@ ModuleTranslation::ModuleTranslation(Operation *module,
          "mlirModule should honor LLVM's module semantics.");
 }
 
-ModuleTranslation::~ModuleTranslation() {
-  if (ompBuilder)
-    ompBuilder->finalize();
-}
+ModuleTranslation::~ModuleTranslation() {}
 
 void ModuleTranslation::forgetMapping(Region &region) {
   SmallVector<Region *> toProcess;
@@ -2332,6 +2329,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext,
   // beforehand.
   translator.debugTranslation->addModuleFlagsIfNotPresent();
 
+  // Call the OpenMP IR Builder callbacks prior to verifying the module
+  if (auto *ompBuilder = translator.getOpenMPBuilder())
+    ompBuilder->finalize();
+
   if (!disableVerification &&
       llvm::verifyModule(*translator.llvmModule, &llvm::errs()))
     return nullptr;
diff --git a/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir
new file mode 100644
index 0000000000000..1589778e0627f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir
@@ -0,0 +1,62 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+// This tests the fix for https://github.com/llvm/llvm-project/issues/138102
+// We are only interested in ensuring that the -mlir-to-llvmir pass doesn't crash
+
+// CHECK-LABEL: define internal void @_QQmain..omp_par
+
+omp.private {type = private} @_QFEi_private_i32 : i32
+omp.private {type = firstprivate} @_QFEc_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+%0 = llvm.load %arg0 : !llvm.ptr -> i32
+llvm.store %0, %arg1 : i32, !llvm.ptr
+omp.yield(%arg1 : !llvm.ptr)
+}
+llvm.func @_QQmain() {
+%0 = llvm.mlir.constant(1 : i64) : i64
+%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+%2 = llvm.mlir.constant(1 : i64) : i64
+%3 = llvm.alloca %2 x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr
+%4 = llvm.mlir.constant(10 : index) : i64
+%5 = llvm.mlir.constant(0 : index) : i64
+%6 = llvm.mlir.constant(10000 : index) : i64
+%7 = llvm.mlir.constant(1 : index) : i64
+%8 = llvm.mlir.constant(1 : i64) : i64
+%9 = llvm.mlir.addressof @_QFECchunksz : !llvm.ptr
+%10 = llvm.mlir.constant(1 : i64) : i64
+%11 = llvm.trunc %7 : i64 to i32
+llvm.br ^bb1(%11, %4 : i32, i64)
+^bb1(%12: i32, %13: i64):  // 2 preds: ^bb0, ^bb2
+%14 = llvm.icmp "sgt" %13, %5 : i64
+llvm.store %12, %3 : i32, !llvm.ptr
+omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) {
+  %19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"}
+  %20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"}
+  %21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"}
+  omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+    %22 = llvm.mlir.constant(9999 : i32) : i32
+    %23 = llvm.mlir.constant(1 : i32) : i32
+    omp.parallel {
+      %24 = llvm.load %arg2 : !llvm.ptr -> i32
+      %25 = llvm.add %24, %22 : i32
+      omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
+        omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) {
+          llvm.store %arg5, %arg4 : i32, !llvm.ptr
+          omp.yield
+        }
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
+  omp.terminator
+}
+llvm.return
+}
+llvm.mlir.global internal constant @_QFECchunksz() {addr_space = 0 : i32} : i32 {
+%0 = llvm.mlir.constant(10000 : i32) : i32
+llvm.return %0 : i32
+}
+llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 {
+%0 = llvm.mlir.constant(100000 : i32) : i32
+llvm.return %0 : i32
+}

>From 10ed535fca35ccb68bdd1346d3af98fe3bd9a03a Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Fri, 20 Jun 2025 15:39:17 +0000
Subject: [PATCH 2/2] Run OMPIRBuilder->finalize() in the destructor if it has
 not already run

---
 llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 8 +++++++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp        | 4 ++++
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp     | 5 ++++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 93fb0d8e8d078..19a4058b64382 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -484,7 +484,7 @@ class OpenMPIRBuilder {
   /// not have an effect on \p M (see initialize)
   OpenMPIRBuilder(Module &M)
       : M(M), Builder(M.getContext()), OffloadInfoManager(this),
-        T(M.getTargetTriple()) {}
+        T(M.getTargetTriple()), IsFinalized(false) {}
   LLVM_ABI ~OpenMPIRBuilder();
 
   class AtomicInfo : public llvm::AtomicInfo {
@@ -521,6 +521,10 @@ class OpenMPIRBuilder {
   ///                              all functions are finalized.
   LLVM_ABI void finalize(Function *Fn = nullptr);
 
+  /// Check whether the finalize function has already run
+  /// \return true if the finalize function has already run
+  LLVM_ABI bool isFinalized();
+
   /// Add attributes known for \p FnID to \p Fn.
   LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
 
@@ -3286,6 +3290,8 @@ class OpenMPIRBuilder {
   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
                                 AtomicRMWInst::BinOp RMWOp);
 
+  bool IsFinalized;
+
 public:
   /// a struct to pack relevant information while generating atomic Ops
   struct AtomicOpValue {
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7cbbbff511c88..dab8fbdeff160 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -824,8 +824,12 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
         M.getGlobalVariable("__openmp_nvptx_data_transfer_temporary_storage")};
     emitUsed("llvm.compiler.used", LLVMCompilerUsed);
   }
+
+  IsFinalized = true;
 }
 
+bool OpenMPIRBuilder::isFinalized() { return IsFinalized; }
+
 OpenMPIRBuilder::~OpenMPIRBuilder() {
   assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
 }
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index d8eeac328d59a..93d2967229cca 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -776,7 +776,10 @@ ModuleTranslation::ModuleTranslation(Operation *module,
          "mlirModule should honor LLVM's module semantics.");
 }
 
-ModuleTranslation::~ModuleTranslation() {}
+ModuleTranslation::~ModuleTranslation() {
+  if (ompBuilder && !ompBuilder->isFinalized())
+    ompBuilder->finalize();
+}
 
 void ModuleTranslation::forgetMapping(Region &region) {
   SmallVector<Region *> toProcess;



More information about the llvm-commits mailing list