[Mlir-commits] [llvm] [mlir] [flang][OpenMP] Support multi-block reduction combiner regions on the GPU (PR #156837)
Kareem Ergawy
llvmlistbot at llvm.org
Mon Sep 22 22:57:08 PDT 2025
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/156837
>From fc86590a166af58afd3c44d891b257b322b64c0b Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 4 Sep 2025 01:06:21 -0500
Subject: [PATCH 1/3] [flang][OpenMP] Support multi-block reduction combiner
regions on the GPU
Fixes a bug related to insertion points when inlining multi-block
combiner reduction regions. The IP at the end of the inlined region was
not used resulting in emitting BBs with multiple terminators.
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 3 +
.../omptarget-multi-block-reduction.mlir | 85 +++++++++++++++++++
2 files changed, 88 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 220eee3cb8b08..b516c3c3f4efe 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3507,6 +3507,8 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
return AfterIP.takeError();
if (!Builder.GetInsertBlock())
return ReductionFunc;
+
+ Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint());
Builder.CreateStore(Reduced, LHSPtr);
}
}
@@ -3751,6 +3753,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
if (!AfterIP)
return AfterIP.takeError();
+ Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint());
Builder.CreateStore(Reduced, LHS, false);
}
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
new file mode 100644
index 0000000000000..aaf06d2d0e0c2
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
@@ -0,0 +1,85 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Verifies that the IR builder can handle reductions with multi-block combiner
+// regions on the GPU.
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+ llvm.func @bar() {}
+ llvm.func @baz() {}
+
+ omp.declare_reduction @add_reduction_byref_box_5xf32 : !llvm.ptr alloc {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ omp.yield(%2 : !llvm.ptr)
+ } init {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ omp.yield(%arg1 : !llvm.ptr)
+ } combiner {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ llvm.call @bar() : () -> ()
+ llvm.br ^bb3
+
+ ^bb3: // pred: ^bb1
+ llvm.call @baz() : () -> ()
+ omp.yield(%arg0 : !llvm.ptr)
+ }
+ llvm.func @foo_() {
+ %c1 = llvm.mlir.constant(1 : i64) : i64
+ %10 = llvm.alloca %c1 x !llvm.array<5 x f32> {bindc_name = "x"} : (i64) -> !llvm.ptr<5>
+ %11 = llvm.addrspacecast %10 : !llvm.ptr<5> to !llvm.ptr
+ %74 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<5 x f32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%74 -> %arg0 : !llvm.ptr) {
+ %c1_2 = llvm.mlir.constant(1 : i32) : i32
+ %c10 = llvm.mlir.constant(10 : i32) : i32
+ omp.teams reduction(byref @add_reduction_byref_box_5xf32 %arg0 -> %arg2 : !llvm.ptr) {
+ omp.parallel {
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%arg5) : i32 = (%c1_2) to (%c10) inclusive step (%c1_2) {
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: call void @__kmpc_parallel_51({{.*}}, i32 1, i32 -1, i32 -1,
+// CHECK-SAME: ptr @[[PAR_OUTLINED:.*]], ptr null, ptr %2, i64 1)
+
+// CHECK: define internal void @[[PAR_OUTLINED]]{{.*}} {
+// CHECK: .omp.reduction.then:
+// CHECK: br label %omp.reduction.nonatomic.body
+
+// CHECK: omp.reduction.nonatomic.body:
+// CHECK: call void @bar()
+// CHECK: br label %[[BODY_2ND_BB:.*]]
+
+// CHECK: [[BODY_2ND_BB]]:
+// CHECK: call void @baz()
+// CHECK: br label %[[CONT_BB:.*]]
+
+// CHECK: [[CONT_BB]]:
+// CHECK: br label %.omp.reduction.done
+// CHECK: }
+
+// CHECK: define internal void @"{{.*}}$reduction$reduction_func"(ptr noundef %0, ptr noundef %1) #0 {
+// CHECK: br label %omp.reduction.nonatomic.body
+
+// CHECK: [[BODY_2ND_BB:.*]]:
+// CHECK: call void @baz()
+// CHECK: br label %omp.region.cont
+
+
+// CHECK: omp.reduction.nonatomic.body:
+// CHECK: call void @bar()
+// CHECK: br label %[[BODY_2ND_BB]]
+
+// CHECK: }
>From 9c9443641db5686ac52ca5dfe534281db0a6262d Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sat, 13 Sep 2025 06:44:15 -0500
Subject: [PATCH 2/3] review comments
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index b516c3c3f4efe..6d948f184392d 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3508,7 +3508,7 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
if (!Builder.GetInsertBlock())
return ReductionFunc;
- Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint());
+ Builder.restoreIP(*AfterIP);
Builder.CreateStore(Reduced, LHSPtr);
}
}
@@ -3753,7 +3753,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
if (!AfterIP)
return AfterIP.takeError();
- Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint());
+ Builder.restoreIP(*AfterIP);
Builder.CreateStore(Reduced, LHS, false);
}
}
>From d3b28d0b1ab41cc6b8ee5afc01e52ccf08bd23e8 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 16 Sep 2025 07:42:59 -0500
Subject: [PATCH 3/3] more test checks
---
mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
index aaf06d2d0e0c2..87ff0ba786648 100644
--- a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
@@ -67,7 +67,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 :
// CHECK: br label %[[CONT_BB:.*]]
// CHECK: [[CONT_BB]]:
-// CHECK: br label %.omp.reduction.done
+// CHECK-NEXT: %[[RED_RHS:.*]] = phi ptr [ %final.rhs, %{{.*}} ]
+// CHECK-NEXT: store ptr %[[RED_RHS]], ptr %{{.*}}, align 8
+// CHECK-NEXT: br label %.omp.reduction.done
// CHECK: }
// CHECK: define internal void @"{{.*}}$reduction$reduction_func"(ptr noundef %0, ptr noundef %1) #0 {
More information about the Mlir-commits
mailing list