[llvm] [mlir] [OMPIRBuilder] Don't use invalid debug loc in reduction functions. (PR #147950)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 10 05:24:32 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-flang-openmp

Author: Abid Qadeer (abidh)

<details>
<summary>Changes</summary>

We have this pattern of code in OMPIRBuilder for many functions that are used in reduction operations.

 ```
 Function *LtGRFunc = Function::Create
  BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
  Builder.SetInsertPoint(EntryBlock);
```

The insertion point is moved to the new function but the debug location is not updated. This means that reduction function will use the debug location that points to another function. This problem gets hidden because these functions gets inlined but the potential for failure exists.

This patch resets the debug location when insertion point is moved to new function. Some `InsertPointGuard` have been added to make sure why restore the debug location correctly when we are done with the reduction function.

---
Full diff: https://github.com/llvm/llvm-project/pull/147950.diff


2 Files Affected:

- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+16-10) 
- (added) mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir (+121) 


``````````diff
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index db792a3b52d24..170224616ac64 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
 Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
     const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
     AttributeList FuncAttrs) {
-  InsertPointTy SavedIP = Builder.saveIP();
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   LLVMContext &Ctx = M.getContext();
   FunctionType *FuncTy = FunctionType::get(
       Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
@@ -2630,6 +2630,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
   WcFunc->addParamAttr(1, Attribute::NoUndef);
   BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
   Builder.SetInsertPoint(EntryBB);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // ReduceList: thread local Reduce list.
   // At the stage of the computation when this function is called, partially
@@ -2844,7 +2845,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
   }
 
   Builder.CreateRetVoid();
-  Builder.restoreIP(SavedIP);
 
   return WcFunc;
 }
@@ -2853,6 +2853,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
     ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
     AttributeList FuncAttrs) {
   LLVMContext &Ctx = M.getContext();
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   FunctionType *FuncTy =
       FunctionType::get(Builder.getVoidTy(),
                         {Builder.getPtrTy(), Builder.getInt16Ty(),
@@ -2871,6 +2872,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
   SarFunc->addParamAttr(3, Attribute::SExt);
   BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
   Builder.SetInsertPoint(EntryBB);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // Thread local Reduce list used to host the values of data to be reduced.
   Argument *ReduceListArg = SarFunc->getArg(0);
@@ -3017,7 +3019,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
 Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
     ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
     AttributeList FuncAttrs) {
-  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   LLVMContext &Ctx = M.getContext();
   FunctionType *FuncTy = FunctionType::get(
       Builder.getVoidTy(),
@@ -3033,6 +3035,7 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
 
   BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
   Builder.SetInsertPoint(EntryBlock);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // Buffer: global reduction buffer.
   Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3120,14 +3123,13 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
   }
 
   Builder.CreateRetVoid();
-  Builder.restoreIP(OldIP);
   return LtGCFunc;
 }
 
 Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
     ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
     Type *ReductionsBufferTy, AttributeList FuncAttrs) {
-  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   LLVMContext &Ctx = M.getContext();
   FunctionType *FuncTy = FunctionType::get(
       Builder.getVoidTy(),
@@ -3143,6 +3145,7 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
 
   BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
   Builder.SetInsertPoint(EntryBlock);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // Buffer: global reduction buffer.
   Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3203,14 +3206,13 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
   Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
       ->addFnAttr(Attribute::NoUnwind);
   Builder.CreateRetVoid();
-  Builder.restoreIP(OldIP);
   return LtGRFunc;
 }
 
 Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
     ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
     AttributeList FuncAttrs) {
-  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   LLVMContext &Ctx = M.getContext();
   FunctionType *FuncTy = FunctionType::get(
       Builder.getVoidTy(),
@@ -3226,6 +3228,7 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
 
   BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
   Builder.SetInsertPoint(EntryBlock);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // Buffer: global reduction buffer.
   Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3311,14 +3314,13 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
   }
 
   Builder.CreateRetVoid();
-  Builder.restoreIP(OldIP);
   return LtGCFunc;
 }
 
 Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
     ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
     Type *ReductionsBufferTy, AttributeList FuncAttrs) {
-  OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   LLVMContext &Ctx = M.getContext();
   auto *FuncTy = FunctionType::get(
       Builder.getVoidTy(),
@@ -3334,6 +3336,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
 
   BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
   Builder.SetInsertPoint(EntryBlock);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // Buffer: global reduction buffer.
   Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3394,7 +3397,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
   Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
       ->addFnAttr(Attribute::NoUnwind);
   Builder.CreateRetVoid();
-  Builder.restoreIP(OldIP);
   return LtGRFunc;
 }
 
@@ -3407,6 +3409,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
 Expected<Function *> OpenMPIRBuilder::createReductionFunction(
     StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
     ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
                                    {Builder.getPtrTy(), Builder.getPtrTy()},
                                    /* IsVarArg */ false);
@@ -3419,6 +3422,7 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
   BasicBlock *EntryBB =
       BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
   Builder.SetInsertPoint(EntryBB);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
 
   // Need to alloca memory here and deal with the pointers before getting
   // LHS/RHS pointers out
@@ -3746,10 +3750,12 @@ static Error populateReductionFunction(
     Function *ReductionFunc,
     ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
     IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
+  IRBuilder<>::InsertPointGuard IPG(Builder);
   Module *Module = ReductionFunc->getParent();
   BasicBlock *ReductionFuncBlock =
       BasicBlock::Create(Module->getContext(), "", ReductionFunc);
   Builder.SetInsertPoint(ReductionFuncBlock);
+  Builder.SetCurrentDebugLocation(llvm::DebugLoc());
   Value *LHSArrayPtr = nullptr;
   Value *RHSArrayPtr = nullptr;
   if (IsGPU) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir
new file mode 100644
index 0000000000000..d889ef4f5700c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir
@@ -0,0 +1,121 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+  omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
+  omp.declare_reduction @add_reduction_i32 : i32 init {
+  ^bb0(%arg0: i32 loc("test.f90":8:7)):
+    %0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc2)
+    omp.yield(%0 : i32) loc(#loc2)
+  } combiner {
+  ^bb0(%arg0: i32 loc("test.f90":8:7), %arg1: i32 loc("test.f90":8:7)):
+    %0 = llvm.add %arg0, %arg1 : i32 loc(#loc2)
+    omp.yield(%0 : i32) loc(#loc2)
+  } loc(#loc2)
+  llvm.func @_QQmain() {
+    %0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc4)
+    %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
+    %3 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1)
+    %4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc1)
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc1)
+    %6 = llvm.mlir.constant(8191 : index) : i64 loc(#loc5)
+    %7 = llvm.mlir.constant(0 : index) : i64 loc(#loc5)
+    %8 = llvm.mlir.constant(1 : index) : i64 loc(#loc5)
+    %9 = llvm.mlir.constant(0 : i32) : i32 loc(#loc5)
+    %10 = llvm.mlir.constant(8192 : index) : i64 loc(#loc5)
+    %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr<1> loc(#loc6)
+    %12 = llvm.addrspacecast %11 : !llvm.ptr<1> to !llvm.ptr loc(#loc6)
+    llvm.store %9, %2 : i32, !llvm.ptr loc(#loc7)
+    %15 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} loc(#loc4)
+    %16 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc7)
+    %17 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) loc(#loc7)
+    %18 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.array<8192 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%17) -> !llvm.ptr {name = "arr"} loc(#loc7)
+    omp.target map_entries(%15 -> %arg0, %16 -> %arg1, %18 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      %19 = llvm.mlir.constant(8192 : i32) : i32 loc(#loc5)
+      %20 = llvm.mlir.constant(1 : i32) : i32 loc(#loc5)
+      %21 = llvm.mlir.constant(8192 : index) : i64 loc(#loc6)
+      omp.teams reduction(@add_reduction_i32 %arg0 -> %arg3 : !llvm.ptr) {
+        omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
+          omp.distribute {
+            omp.wsloop reduction(@add_reduction_i32 %arg3 -> %arg5 : !llvm.ptr) {
+              omp.loop_nest (%arg6) : i32 = (%20) to (%19) inclusive step (%20) {
+                llvm.store %arg6, %arg4 : i32, !llvm.ptr loc(#loc2)
+                %22 = llvm.load %arg5 : !llvm.ptr -> i32 loc(#loc8)
+                %23 = llvm.load %arg4 : !llvm.ptr -> i32 loc(#loc8)
+                %34 = llvm.add %22, %23 : i32 loc(#loc8)
+                llvm.store %34, %arg5 : i32, !llvm.ptr loc(#loc8)
+                omp.yield loc(#loc2)
+              } loc(#loc2)
+            } {omp.composite} loc(#loc2)
+          } {omp.composite} loc(#loc2)
+          omp.terminator loc(#loc2)
+        } {omp.composite} loc(#loc2)
+        omp.terminator loc(#loc2)
+      } loc(#loc2)
+      omp.terminator loc(#loc2)
+    } loc(#loc13)
+    llvm.return loc(#loc9)
+  } loc(#loc12)
+  llvm.mlir.global internal @_QFEarr() {addr_space = 1 : i32} : !llvm.array<8192 x i32> {
+    %0 = llvm.mlir.zero : !llvm.array<8192 x i32> loc(#loc6)
+    llvm.return %0 : !llvm.array<8192 x i32> loc(#loc6)
+  } loc(#loc6)
+} loc(#loc)
+
+#loc = loc("test.f90":4:18)
+#loc1 = loc("test.f90":4:18)
+#loc2 = loc("test.f90":8:7)
+#loc3 = loc("test.f90":1:7)
+#loc4 = loc("test.f90":3:18)
+#loc5 = loc(unknown)
+#loc6 = loc("test.f90":5:18)
+#loc7 = loc("test.f90":6:7)
+#loc8 = loc("test.f90":10:7)
+#loc9 = loc("test.f90":16:7)
+
+#di_file = #llvm.di_file<"target7.f90" in "">
+#di_null_type = #llvm.di_null_type
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>,
+ sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang",
+ isOptimized = false, emissionKind = LineTablesOnly>
+#di_subroutine_type = #llvm.di_subroutine_type<
+  callingConvention = DW_CC_program, types = #di_null_type>
+#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>,
+  compileUnit = #di_compile_unit, scope = #di_file, name = "main",
+  file = #di_file, subprogramFlags = "Definition|MainSubprogram",
+  type = #di_subroutine_type>
+#di_subprogram1 = #llvm.di_subprogram<compileUnit = #di_compile_unit,
+  name = "target", file = #di_file, subprogramFlags = "Definition",
+  type = #di_subroutine_type>
+
+
+#loc12 = loc(fused<#di_subprogram>[#loc3])
+#loc13 = loc(fused<#di_subprogram1>[#loc2])
+
+// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @"__omp_offloading_{{.*}}__QQmain_l8_omp$reduction$reduction_func.1"
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func.2
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func.3
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_list_to_global_copy_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_list_to_global_reduce_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_global_to_list_copy_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_global_to_list_reduce_func
+// CHECK-NOT: !dbg
+// CHECK: }

``````````

</details>


https://github.com/llvm/llvm-project/pull/147950


More information about the llvm-commits mailing list