[Mlir-commits] [mlir] [acc] add RegionBranchOpInterface to acc.loop (PR #172940)
Susan Tan ス-ザン タン
llvmlistbot at llvm.org
Fri Dec 19 09:37:41 PST 2025
https://github.com/SusanTan updated https://github.com/llvm/llvm-project/pull/172940
>From d7e969e63f25ad9f2c0c6b8b3b6dee7402d7540c Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Thu, 18 Dec 2025 18:01:46 -0800
Subject: [PATCH 1/4] add RegionBranchOpInterface to acc.loop
---
.../mlir/Dialect/OpenACC/OpenACCOps.td | 1 +
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 16 ++++++-
.../OpenACC/region-branchop-interface.mlir | 47 +++++++++++++++++++
3 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 349dc8bb858b5..14902730ba165 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2576,6 +2576,7 @@ def OpenACC_LoopOp
RecursiveMemoryEffects,
DeclareOpInterfaceMethods<ComputeRegionOpInterface>,
DeclareOpInterfaceMethods<LoopLikeOpInterface>,
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
MemoryEffects<[MemWrite<OpenACC_ConstructResource>]>]> {
let summary = "loop construct";
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 3dea621003a75..93f5a592ee41e 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -391,7 +391,7 @@ void OpenACCDialect::initialize() {
//===----------------------------------------------------------------------===//
// RegionBranchOpInterface for acc.kernels / acc.parallel / acc.serial /
-// acc.kernel_environment / acc.data / acc.host_data
+// acc.kernel_environment / acc.data / acc.host_data / acc.loop
//===----------------------------------------------------------------------===//
/// Generic helper for single-region OpenACC ops that execute their body once
@@ -444,6 +444,20 @@ void HostDataOp::getSuccessorRegions(
regions);
}
+void LoopOp::getSuccessorRegions(RegionBranchPoint point,
+ SmallVectorImpl<RegionSuccessor> ®ions) {
+ if (point.isParent()) {
+ // Entering the loop: parent -> body region.
+ regions.push_back(RegionSuccessor(&getRegion()));
+ return;
+ }
+
+ // acc.yield -> parent
+ regions.push_back(RegionSuccessor(
+ getOperation(), Operation::result_range(getOperation()->result_begin(),
+ getOperation()->result_end())));
+}
+
//===----------------------------------------------------------------------===//
// device_type support helpers
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
index a97fdb7d78ad1..b2d4688ab5e9c 100644
--- a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
+++ b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
@@ -142,4 +142,51 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
return %arg0 : memref<f32>
}
+// -----
+
+// CHECK-LABEL: test_tag: acc_loop_before:
+// CHECK: operand #0
+// CHECK-NEXT: - pre
+// CHECK-LABEL: test_tag: acc_loop_inside:
+// CHECK: operand #0
+// CHECK-NEXT: - loop_region
+// CHECK-LABEL: test_tag: acc_loop_after:
+// CHECK: operand #0
+// CHECK-NEXT: - loop_region
+// CHECK-LABEL: test_tag: acc_loop_post:
+// CHECK: operand #0
+// CHECK-NEXT: - post_loop
+// CHECK-LABEL: test_tag: acc_loop_return:
+// CHECK: operand #0
+// CHECK-NEXT: - post_loop
+func.func @last_mod_openacc_loop(%arg0: memref<f32>) -> memref<f32> {
+ %zero = arith.constant 0.0 : f32
+ // Store before the loop.
+ memref.store %zero, %arg0[] {tag_name = "pre"} : memref<f32>
+ memref.load %arg0[] {tag = "acc_loop_before"} : memref<f32>
+
+ %one = arith.constant 1.0 : f32
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+
+ // Single-region loop: store and load inside.
+ acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)
+ step (%c1_i32 : i32) {
+ // Store inside the loop.
+ memref.store %one, %arg0[] {tag_name = "loop_region"} : memref<f32>
+ memref.load %arg0[] {tag = "acc_loop_inside"} : memref<f32>
+ acc.yield
+ } attributes {auto_ = [#acc.device_type<none>],
+ inclusiveUpperbound = array<i1: true>}
+
+ // After the loop, the last writer is still the store in the loop.
+ memref.load %arg0[] {tag = "acc_loop_after"} : memref<f32>
+
+ // Store after the loop.
+ memref.store %zero, %arg0[] {tag_name = "post_loop"} : memref<f32>
+ memref.load %arg0[] {tag = "acc_loop_post"} : memref<f32>
+
+ // At return, the last writer should be the post-loop store.
+ return {tag = "acc_loop_return"} %arg0 : memref<f32>
+}
>From 38a29d0675798aa51cd0809da103cb9f003dcbfd Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Thu, 18 Dec 2025 18:11:49 -0800
Subject: [PATCH 2/4] refactor
---
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 93f5a592ee41e..3109efbdb2f66 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -446,16 +446,8 @@ void HostDataOp::getSuccessorRegions(
void LoopOp::getSuccessorRegions(RegionBranchPoint point,
SmallVectorImpl<RegionSuccessor> ®ions) {
- if (point.isParent()) {
- // Entering the loop: parent -> body region.
- regions.push_back(RegionSuccessor(&getRegion()));
- return;
- }
-
- // acc.yield -> parent
- regions.push_back(RegionSuccessor(
- getOperation(), Operation::result_range(getOperation()->result_begin(),
- getOperation()->result_end())));
+ getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,
+ regions);
}
//===----------------------------------------------------------------------===//
>From 043b725b98f0d6b703f3ab9f8c30829cfe863c35 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Fri, 19 Dec 2025 09:35:22 -0800
Subject: [PATCH 3/4] change the implementation
---
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 17 ++++-
.../OpenACC/region-branchop-interface.mlir | 68 +++++++++++++++----
2 files changed, 71 insertions(+), 14 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 3109efbdb2f66..7f9f2a59f83be 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -446,8 +446,21 @@ void HostDataOp::getSuccessorRegions(
void LoopOp::getSuccessorRegions(RegionBranchPoint point,
SmallVectorImpl<RegionSuccessor> ®ions) {
- getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,
- regions);
+ // Unstructured loops: the body may contain arbitrary CFG and early exits.
+ // At the RegionBranch level, only model entry into the body and exit to the
+ // parent; any backedges are represented inside the region CFG.
+ if (getUnstructured()) {
+ if (point.isParent()) {
+ regions.push_back(RegionSuccessor(&getRegion()));
+ return;
+ }
+ regions.push_back(RegionSuccessor(getOperation(), getResults()));
+ return;
+ }
+
+ // Structured loops: model a loop-shaped region graph similar to scf.for.
+ regions.push_back(RegionSuccessor(&getRegion()));
+ regions.push_back(RegionSuccessor(getOperation(), getResults()));
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
index b2d4688ab5e9c..5b9347767b418 100644
--- a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
+++ b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
@@ -144,6 +144,8 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
// -----
+// structured acc.loop: the RegionBranch is modeled
+// as scf.for with a backedge to the parent op.
// CHECK-LABEL: test_tag: acc_loop_before:
// CHECK: operand #0
// CHECK-NEXT: - pre
@@ -152,7 +154,10 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
// CHECK-NEXT: - loop_region
// CHECK-LABEL: test_tag: acc_loop_after:
// CHECK: operand #0
-// CHECK-NEXT: - loop_region
+// CHECK-DAG: - pre
+// CHECK-DAG: - loop_region
+// the last writer is either the pre-loop store or
+// the store in the loop depending on the iteration count
// CHECK-LABEL: test_tag: acc_loop_post:
// CHECK: operand #0
// CHECK-NEXT: - post_loop
@@ -161,32 +166,71 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
// CHECK-NEXT: - post_loop
func.func @last_mod_openacc_loop(%arg0: memref<f32>) -> memref<f32> {
%zero = arith.constant 0.0 : f32
- // Store before the loop.
memref.store %zero, %arg0[] {tag_name = "pre"} : memref<f32>
memref.load %arg0[] {tag = "acc_loop_before"} : memref<f32>
-
%one = arith.constant 1.0 : f32
%c1_i32 = arith.constant 1 : i32
%c10_i32 = arith.constant 10 : i32
-
- // Single-region loop: store and load inside.
acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)
step (%c1_i32 : i32) {
- // Store inside the loop.
memref.store %one, %arg0[] {tag_name = "loop_region"} : memref<f32>
memref.load %arg0[] {tag = "acc_loop_inside"} : memref<f32>
acc.yield
} attributes {auto_ = [#acc.device_type<none>],
inclusiveUpperbound = array<i1: true>}
-
- // After the loop, the last writer is still the store in the loop.
memref.load %arg0[] {tag = "acc_loop_after"} : memref<f32>
-
- // Store after the loop.
memref.store %zero, %arg0[] {tag_name = "post_loop"} : memref<f32>
memref.load %arg0[] {tag = "acc_loop_post"} : memref<f32>
-
- // At return, the last writer should be the post-loop store.
return {tag = "acc_loop_return"} %arg0 : memref<f32>
}
+// -----
+
+// Unstructured acc.loop: the RegionBranch is modeled with explicit CFG and early
+// exits, and the RegionBranch graph only exposes a single entry and single
+// exit edge (no region backedge).
+//
+// CHECK-LABEL: test_tag: acc_loop_unstructured_before:
+// CHECK: operand #0
+// CHECK-NEXT: - pre
+// CHECK-LABEL: test_tag: acc_loop_unstructured_after:
+// CHECK: operand #0
+// CHECK-DAG: - loop_unstructured_early
+// CHECK-DAG: - loop_unstructured_normal
+// the last writer can be either of the two stores in the loop
+func.func @last_mod_openacc_loop_unstructured(%arg0: memref<f32>) -> memref<f32> {
+ %zero = arith.constant 0.0 : f32
+ %one = arith.constant 1.0 : f32
+ memref.store %zero, %arg0[] {tag_name = "pre"} : memref<f32>
+ memref.load %arg0[] {tag = "acc_loop_unstructured_before"} : memref<f32>
+ %c0_i32 = arith.constant 0 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %c5_i32 = arith.constant 5 : i32
+ acc.loop {
+ ^entry:
+ cf.br ^header(%c0_i32 : i32)
+
+ ^header(%iv: i32):
+ %is_early = arith.cmpi eq, %iv, %c1_i32 : i32
+ cf.cond_br %is_early, ^early_exit, ^cont
+
+ ^cont:
+ // Normal loop increment and exit when iv reaches 5.
+ %iv_next = arith.addi %iv, %c1_i32 : i32
+ %is_done = arith.cmpi eq, %iv_next, %c5_i32 : i32
+ cf.cond_br %is_done, ^normal_exit, ^header(%iv_next : i32)
+
+ ^early_exit:
+ // One exit path with its own store.
+ memref.store %one, %arg0[] {tag_name = "loop_unstructured_early"} : memref<f32>
+ acc.yield
+
+ ^normal_exit:
+ // Another exit path with a different store.
+ memref.store %one, %arg0[] {tag_name = "loop_unstructured_normal"} : memref<f32>
+ acc.yield
+ } attributes {auto_ = [#acc.device_type<none>], unstructured}
+ memref.load %arg0[] {tag = "acc_loop_unstructured_after"} : memref<f32>
+ return %arg0 : memref<f32>
+}
+
>From 62dccc2689c747c56b53f99a3017a2339ef18445 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Fri, 19 Dec 2025 09:37:28 -0800
Subject: [PATCH 4/4] tweak the test
---
mlir/test/Dialect/OpenACC/region-branchop-interface.mlir | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
index 5b9347767b418..28789868fdef9 100644
--- a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
+++ b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
@@ -176,8 +176,7 @@ func.func @last_mod_openacc_loop(%arg0: memref<f32>) -> memref<f32> {
memref.store %one, %arg0[] {tag_name = "loop_region"} : memref<f32>
memref.load %arg0[] {tag = "acc_loop_inside"} : memref<f32>
acc.yield
- } attributes {auto_ = [#acc.device_type<none>],
- inclusiveUpperbound = array<i1: true>}
+ } attributes {auto_ = [#acc.device_type<none>]}
memref.load %arg0[] {tag = "acc_loop_after"} : memref<f32>
memref.store %zero, %arg0[] {tag_name = "post_loop"} : memref<f32>
memref.load %arg0[] {tag = "acc_loop_post"} : memref<f32>
@@ -221,12 +220,10 @@ func.func @last_mod_openacc_loop_unstructured(%arg0: memref<f32>) -> memref<f32>
cf.cond_br %is_done, ^normal_exit, ^header(%iv_next : i32)
^early_exit:
- // One exit path with its own store.
memref.store %one, %arg0[] {tag_name = "loop_unstructured_early"} : memref<f32>
acc.yield
^normal_exit:
- // Another exit path with a different store.
memref.store %one, %arg0[] {tag_name = "loop_unstructured_normal"} : memref<f32>
acc.yield
} attributes {auto_ = [#acc.device_type<none>], unstructured}
More information about the Mlir-commits
mailing list