[llvm] [mlir] [OMPIRBuilder][debug] Don't drop debug info for loop constructs. (PR #144393)
Abid Qadeer via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 16 09:58:02 PDT 2025
https://github.com/abidh created https://github.com/llvm/llvm-project/pull/144393
In OMPIRBuilder, we have many cases where we don't handle the debug location correctly while changing the location or insertion point. This is one of those cases.
Please see the following test program.
```
program main
implicit none
integer i, j
integer array(16384)
!$omp target teams distribute
DO i=1,16384
!$omp parallel do
DO j=1,16384
array(j) = i
ENDDO
!$omp end parallel do
ENDDO
!$omp end target teams distribute
print *, array
end program main
```
When tried to compile with the follownig command
`flang -g -O2 -fopenmp test.f90 -o test --offload-arch=gfx90a`
will fail in the verification with the following errors: `!dbg attachment points at wrong subprogram for function`
This happens because we were dropping the debug location in the createCanonicalLoop and the call to the functions like `__kmpc_distribute_static_4u` get generated without a debug location. When it gets inlined, the locations inside it are not adjusted as the call instruction does not have the debug locations (`llvm/lib/Transforms/Utils/InlineFunction.cpp:fixupLineNumbers`). Later Verifier finds that the caller have instructions with debug locations that point to another function and fails.
The fix is simple to not drop the debug location.
>From 56b096c2b157afb22f6fb723be1405ad83a34b29 Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Mon, 16 Jun 2025 17:30:55 +0100
Subject: [PATCH] [OMPIRBuilder][debug] Dont drop debug info for loop
constructs.
In OMPIRBuilder, we have many cases where we dont handle the debug
location correctly while chaning the location or insertion point. This
is one of those cases.
Please see the following test program.
program main
implicit none
integer i, j
integer array(16384)
!$omp target teams distribute
DO i=1,16384
!$omp parallel do
DO j=1,16384
array(j) = i
ENDDO
!$omp end parallel do
ENDDO
!$omp end target teams distribute
print *, array
end program main
When tried to compile with the follownig command
flang -g -O2 -fopenmp test.f90 -o test --offload-arch=gfx90a
will fail in the verification with the following errors:
!dbg attachment points at wrong subprogram for function
This happens because we were dropping the debug locatoin in the
createCanonicalLoop so the call to the functions like
__kmpc_distribute_static_4u was without a debug locations. When it gets
inlined, the locations inside it are not adjusted as the call
instruction does not have the debug locations
(llvm/lib/Transforms/Utils/InlineFunction.cpp:fixupLineNumbers).
Later Verifier finds that this call have instruction with debug locations
that point to another function and fails. The fix is simple to not drop
the debug location.
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 6 +-
.../LLVMIR/omptarget-debug-loop-loc.mlir | 66 +++++++++++++++++++
2 files changed, 71 insertions(+), 1 deletion(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ca3d8438654dc..e3ba2be8b67b0 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4184,7 +4184,11 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
Value *IndVar = Builder.CreateAdd(Span, Start);
return BodyGenCB(Builder.saveIP(), IndVar);
};
- LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
+ LocationDescription LoopLoc =
+ ComputeIP.isSet()
+ ? Loc
+ : LocationDescription(Builder.saveIP(),
+ Builder.getCurrentDebugLocation());
return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
new file mode 100644
index 0000000000000..a755cef98d7c4
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
@@ -0,0 +1,66 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+ omp.private {type = private} @_QFEj_private_i32 : i32 loc(#loc1)
+ omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
+ llvm.func @test() {
+ %3 = llvm.mlir.constant(1 : i64) : i64
+ %4 = llvm.alloca %3 x i32 {bindc_name = "j"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+ %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
+ %6 = llvm.mlir.constant(1 : i64) : i64
+ %7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+ %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+ %9 = llvm.mlir.constant(16383 : index) : i64
+ %10 = llvm.mlir.constant(0 : index) : i64
+ %11 = llvm.mlir.constant(1 : index) : i64
+ %12 = llvm.mlir.constant(16384 : i32) : i32
+ %14 = llvm.mlir.addressof @_QFEarray : !llvm.ptr
+ %18 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
+ %20 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3)
+ %22 = omp.map.bounds lower_bound(%10 : i64) upper_bound(%9 : i64) extent(%9 : i64) stride(%11 : i64) start_idx(%11 : i64) loc(#loc3)
+ %23 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%22) -> !llvm.ptr {name = "array"} loc(#loc3)
+ %24 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
+ omp.target map_entries(%18 -> %arg0, %20 -> %arg2, %23 -> %arg4, %24 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ %25 = llvm.mlir.constant(1 : i32) : i32
+ %27 = llvm.mlir.constant(16384 : i32) : i32
+ omp.teams {
+ omp.distribute private(@_QFEi_private_i32 %arg5 -> %arg6 : !llvm.ptr) {
+ omp.loop_nest (%arg7) : i32 = (%25) to (%27) inclusive step (%25) {
+ omp.parallel {
+ omp.wsloop private(@_QFEj_private_i32 %arg2 -> %arg8 : !llvm.ptr) {
+ omp.loop_nest (%arg9) : i32 = (%25) to (%27) inclusive step (%25) {
+ llvm.store %arg9, %arg8 : i32, !llvm.ptr loc(#loc9)
+ omp.yield
+ } loc(#loc9)
+ } loc(#loc9)
+ omp.terminator loc(#loc9)
+ } loc(#loc9)
+ omp.yield loc(#loc9)
+ } loc(#loc9)
+ } loc(#loc9)
+ omp.terminator loc(#loc9)
+ } loc(#loc9)
+ omp.terminator loc(#loc9)
+ } loc(#loc9)
+ llvm.return loc(#loc9)
+ } loc(#loc14)
+ llvm.mlir.global internal @_QFEarray() {addr_space = 0 : i32} : !llvm.array<16384 x i32> {
+ %0 = llvm.mlir.zero : !llvm.array<16384 x i32>
+ llvm.return %0 : !llvm.array<16384 x i32>
+ } loc(#loc2)
+}
+#di_file = #llvm.di_file<"test.f90" in "">
+#di_null_type = #llvm.di_null_type
+#loc1 = loc("test.f90":4:23)
+#loc2 = loc("test.f90":4:15)
+#loc3 = loc("test.f90":1:7)
+#loc4 = loc("test.f90":4:18)
+#loc9 = loc("test.f90":13:11)
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", isOptimized = true, emissionKind = LineTablesOnly>
+#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program, types = #di_null_type>
+#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "main", file = #di_file, subprogramFlags = "Definition|Optimized|MainSubprogram", type = #di_subroutine_type>
+#loc14 = loc(fused<#di_subprogram>[#loc3])
+
+
+// CHECK: call void @__kmpc_distribute_static{{.*}}!dbg
+
More information about the llvm-commits
mailing list