[llvm] [mlir] [Utils][mlir] Fix interaction between CodeExtractor and OpenMPIRBuilder (PR #145051)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 08:23:33 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-openmp
Author: Kajetan Puchalski (mrkajetanp)
<details>
<summary>Changes</summary>
CodeExtractor can currently erroneously insert an alloca into a different function than it inserts its users into, in cases where code is being extracted out of a function that has already been outlined. Fix this by checking that the two blocks being inserted into are actually in the same function.
OpenMPIRBuilder relies on a callback mechanism to fix-up a module later on during the finaliser step. In some cases this results in the module being invalid prior to the finalise step running. Remove calls to verifyModule wrapped in LLVM_DEBUG from CodeExtractor, as the presence of those results in the compiler crashing with -mllvm -debug due to premature module verification where it would not crash without -debug.
Move the call to ompBuilder->finalize() from the destructor for ModuleTranslation to the end of mlir::translateModuleToLLVMIR, in order to make sure the module has actually been finalized prior to trying to verify it.
Resolves https://github.com/llvm/llvm-project/issues/138102.
---
Full diff: https://github.com/llvm/llvm-project/pull/145051.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Utils/CodeExtractor.cpp (+11-7)
- (modified) llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll (+1-1)
- (modified) mlir/lib/Target/LLVMIR/ModuleTranslation.cpp (+5-4)
- (added) mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir (+62)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 1210bdf4a1c98..6d1b8d67fd694 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1563,12 +1563,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
NewValues);
- LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
- newFunction->dump();
- report_fatal_error("verification of newFunction failed!");
- });
- LLVM_DEBUG(if (verifyFunction(*oldFunction))
- report_fatal_error("verification of oldFunction failed!"));
+ LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n");
+ LLVM_DEBUG(newFunction->dump());
+ LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n");
+ LLVM_DEBUG(oldFunction->dump());
LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
report_fatal_error("Stale Asumption cache for old Function!"));
return newFunction;
@@ -1868,8 +1866,14 @@ CallInst *CodeExtractor::emitReplacerCall(
// This takes place of the original loop
BasicBlock *codeReplacer =
BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP);
+ // In cases with multiple levels of outlining, e.g. with OpenMP,
+ // AllocationBlock may end up in a function different than oldFunction. We
+ // need to make sure we do not use it in those cases, otherwise the alloca
+ // will end up in a different function from its users and break the module.
BasicBlock *AllocaBlock =
- AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock();
+ (AllocationBlock && oldFunction == AllocationBlock->getParent())
+ ? AllocationBlock
+ : &oldFunction->getEntryBlock();
// Update the entry count of the function.
if (BFI)
diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
index 8bc71148352d2..5bc733f5622c7 100644
--- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
+++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
; RUN: opt -passes='function(instcombine),hotcoldsplit,function(instsimplify)' %s -o /dev/null
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index e5ca147ea98f8..d8eeac328d59a 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -776,10 +776,7 @@ ModuleTranslation::ModuleTranslation(Operation *module,
"mlirModule should honor LLVM's module semantics.");
}
-ModuleTranslation::~ModuleTranslation() {
- if (ompBuilder)
- ompBuilder->finalize();
-}
+ModuleTranslation::~ModuleTranslation() {}
void ModuleTranslation::forgetMapping(Region ®ion) {
SmallVector<Region *> toProcess;
@@ -2332,6 +2329,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext,
// beforehand.
translator.debugTranslation->addModuleFlagsIfNotPresent();
+ // Call the OpenMP IR Builder callbacks prior to verifying the module
+ if (auto *ompBuilder = translator.getOpenMPBuilder())
+ ompBuilder->finalize();
+
if (!disableVerification &&
llvm::verifyModule(*translator.llvmModule, &llvm::errs()))
return nullptr;
diff --git a/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir
new file mode 100644
index 0000000000000..1589778e0627f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir
@@ -0,0 +1,62 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+// This tests the fix for https://github.com/llvm/llvm-project/issues/138102
+// We are only interested in ensuring that the -mlir-to-llvmir pass doesn't crash
+
+// CHECK-LABEL: define internal void @_QQmain..omp_par
+
+omp.private {type = private} @_QFEi_private_i32 : i32
+omp.private {type = firstprivate} @_QFEc_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+%0 = llvm.load %arg0 : !llvm.ptr -> i32
+llvm.store %0, %arg1 : i32, !llvm.ptr
+omp.yield(%arg1 : !llvm.ptr)
+}
+llvm.func @_QQmain() {
+%0 = llvm.mlir.constant(1 : i64) : i64
+%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+%2 = llvm.mlir.constant(1 : i64) : i64
+%3 = llvm.alloca %2 x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr
+%4 = llvm.mlir.constant(10 : index) : i64
+%5 = llvm.mlir.constant(0 : index) : i64
+%6 = llvm.mlir.constant(10000 : index) : i64
+%7 = llvm.mlir.constant(1 : index) : i64
+%8 = llvm.mlir.constant(1 : i64) : i64
+%9 = llvm.mlir.addressof @_QFECchunksz : !llvm.ptr
+%10 = llvm.mlir.constant(1 : i64) : i64
+%11 = llvm.trunc %7 : i64 to i32
+llvm.br ^bb1(%11, %4 : i32, i64)
+^bb1(%12: i32, %13: i64): // 2 preds: ^bb0, ^bb2
+%14 = llvm.icmp "sgt" %13, %5 : i64
+llvm.store %12, %3 : i32, !llvm.ptr
+omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) {
+ %19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"}
+ %20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"}
+ %21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"}
+ omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ %22 = llvm.mlir.constant(9999 : i32) : i32
+ %23 = llvm.mlir.constant(1 : i32) : i32
+ omp.parallel {
+ %24 = llvm.load %arg2 : !llvm.ptr -> i32
+ %25 = llvm.add %24, %22 : i32
+ omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
+ omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) {
+ llvm.store %arg5, %arg4 : i32, !llvm.ptr
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+}
+llvm.return
+}
+llvm.mlir.global internal constant @_QFECchunksz() {addr_space = 0 : i32} : i32 {
+%0 = llvm.mlir.constant(10000 : i32) : i32
+llvm.return %0 : i32
+}
+llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 {
+%0 = llvm.mlir.constant(100000 : i32) : i32
+llvm.return %0 : i32
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/145051
More information about the llvm-commits
mailing list