[Mlir-commits] [mlir] [acc] add RegionBranchOpInterface to acc.loop (PR #172940)

Susan Tan ス-ザン タン llvmlistbot at llvm.org
Fri Dec 19 09:37:41 PST 2025


https://github.com/SusanTan updated https://github.com/llvm/llvm-project/pull/172940

>From d7e969e63f25ad9f2c0c6b8b3b6dee7402d7540c Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Thu, 18 Dec 2025 18:01:46 -0800
Subject: [PATCH 1/4] add RegionBranchOpInterface to acc.loop

---
 .../mlir/Dialect/OpenACC/OpenACCOps.td        |  1 +
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       | 16 ++++++-
 .../OpenACC/region-branchop-interface.mlir    | 47 +++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 349dc8bb858b5..14902730ba165 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2576,6 +2576,7 @@ def OpenACC_LoopOp
                    RecursiveMemoryEffects,
                    DeclareOpInterfaceMethods<ComputeRegionOpInterface>,
                    DeclareOpInterfaceMethods<LoopLikeOpInterface>,
+                   DeclareOpInterfaceMethods<RegionBranchOpInterface>,
                    MemoryEffects<[MemWrite<OpenACC_ConstructResource>]>]> {
   let summary = "loop construct";
 
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 3dea621003a75..93f5a592ee41e 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -391,7 +391,7 @@ void OpenACCDialect::initialize() {
 
 //===----------------------------------------------------------------------===//
 // RegionBranchOpInterface for acc.kernels / acc.parallel / acc.serial /
-// acc.kernel_environment / acc.data / acc.host_data
+// acc.kernel_environment / acc.data / acc.host_data / acc.loop
 //===----------------------------------------------------------------------===//
 
 /// Generic helper for single-region OpenACC ops that execute their body once
@@ -444,6 +444,20 @@ void HostDataOp::getSuccessorRegions(
                                     regions);
 }
 
+void LoopOp::getSuccessorRegions(RegionBranchPoint point,
+                                 SmallVectorImpl<RegionSuccessor> &regions) {
+  if (point.isParent()) {
+    // Entering the loop: parent -> body region.
+    regions.push_back(RegionSuccessor(&getRegion()));
+    return;
+  }
+
+  // acc.yield -> parent
+  regions.push_back(RegionSuccessor(
+      getOperation(), Operation::result_range(getOperation()->result_begin(),
+                                              getOperation()->result_end())));
+}
+
 //===----------------------------------------------------------------------===//
 // device_type support helpers
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
index a97fdb7d78ad1..b2d4688ab5e9c 100644
--- a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
+++ b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
@@ -142,4 +142,51 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
   return %arg0 : memref<f32>
 }
 
+// -----
+
+// CHECK-LABEL: test_tag: acc_loop_before:
+// CHECK:  operand #0
+// CHECK-NEXT:   - pre
+// CHECK-LABEL: test_tag: acc_loop_inside:
+// CHECK:  operand #0
+// CHECK-NEXT:   - loop_region
+// CHECK-LABEL: test_tag: acc_loop_after:
+// CHECK:  operand #0
+// CHECK-NEXT:   - loop_region
+// CHECK-LABEL: test_tag: acc_loop_post:
+// CHECK:  operand #0
+// CHECK-NEXT:   - post_loop
+// CHECK-LABEL: test_tag: acc_loop_return:
+// CHECK:  operand #0
+// CHECK-NEXT:   - post_loop
+func.func @last_mod_openacc_loop(%arg0: memref<f32>) -> memref<f32> {
+  %zero = arith.constant 0.0 : f32
+  // Store before the loop.
+  memref.store %zero, %arg0[] {tag_name = "pre"} : memref<f32>
+  memref.load %arg0[] {tag = "acc_loop_before"} : memref<f32>
+
+  %one = arith.constant 1.0 : f32
+  %c1_i32 = arith.constant 1 : i32
+  %c10_i32 = arith.constant 10 : i32
+
+  // Single-region loop: store and load inside.
+  acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)
+      step (%c1_i32 : i32) {
+    // Store inside the loop.
+    memref.store %one, %arg0[] {tag_name = "loop_region"} : memref<f32>
+    memref.load %arg0[] {tag = "acc_loop_inside"} : memref<f32>
+    acc.yield
+  } attributes {auto_ = [#acc.device_type<none>],
+                inclusiveUpperbound = array<i1: true>}
+
+  // After the loop, the last writer is still the store in the loop.
+  memref.load %arg0[] {tag = "acc_loop_after"} : memref<f32>
+
+  // Store after the loop.
+  memref.store %zero, %arg0[] {tag_name = "post_loop"} : memref<f32>
+  memref.load %arg0[] {tag = "acc_loop_post"} : memref<f32>
+
+  // At return, the last writer should be the post-loop store.
+  return {tag = "acc_loop_return"} %arg0 : memref<f32>
+}
 

>From 38a29d0675798aa51cd0809da103cb9f003dcbfd Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Thu, 18 Dec 2025 18:11:49 -0800
Subject: [PATCH 2/4] refactor

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 93f5a592ee41e..3109efbdb2f66 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -446,16 +446,8 @@ void HostDataOp::getSuccessorRegions(
 
 void LoopOp::getSuccessorRegions(RegionBranchPoint point,
                                  SmallVectorImpl<RegionSuccessor> &regions) {
-  if (point.isParent()) {
-    // Entering the loop: parent -> body region.
-    regions.push_back(RegionSuccessor(&getRegion()));
-    return;
-  }
-
-  // acc.yield -> parent
-  regions.push_back(RegionSuccessor(
-      getOperation(), Operation::result_range(getOperation()->result_begin(),
-                                              getOperation()->result_end())));
+  getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,
+                                    regions);
 }
 
 //===----------------------------------------------------------------------===//

>From 043b725b98f0d6b703f3ab9f8c30829cfe863c35 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Fri, 19 Dec 2025 09:35:22 -0800
Subject: [PATCH 3/4] change the implementation

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       | 17 ++++-
 .../OpenACC/region-branchop-interface.mlir    | 68 +++++++++++++++----
 2 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 3109efbdb2f66..7f9f2a59f83be 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -446,8 +446,21 @@ void HostDataOp::getSuccessorRegions(
 
 void LoopOp::getSuccessorRegions(RegionBranchPoint point,
                                  SmallVectorImpl<RegionSuccessor> &regions) {
-  getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,
-                                    regions);
+  // Unstructured loops: the body may contain arbitrary CFG and early exits.
+  // At the RegionBranch level, only model entry into the body and exit to the
+  // parent; any backedges are represented inside the region CFG.
+  if (getUnstructured()) {
+    if (point.isParent()) {
+      regions.push_back(RegionSuccessor(&getRegion()));
+      return;
+    }
+    regions.push_back(RegionSuccessor(getOperation(), getResults()));
+    return;
+  }
+
+  // Structured loops: model a loop-shaped region graph similar to scf.for.
+  regions.push_back(RegionSuccessor(&getRegion()));
+  regions.push_back(RegionSuccessor(getOperation(), getResults()));
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
index b2d4688ab5e9c..5b9347767b418 100644
--- a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
+++ b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
@@ -144,6 +144,8 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
 
 // -----
 
+// structured acc.loop: the RegionBranch is modeled
+// as scf.for with a backedge to the parent op.
 // CHECK-LABEL: test_tag: acc_loop_before:
 // CHECK:  operand #0
 // CHECK-NEXT:   - pre
@@ -152,7 +154,10 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
 // CHECK-NEXT:   - loop_region
 // CHECK-LABEL: test_tag: acc_loop_after:
 // CHECK:  operand #0
-// CHECK-NEXT:   - loop_region
+// CHECK-DAG:   - pre
+// CHECK-DAG:   - loop_region
+// the last writer is either the pre-loop store or
+// the store in the loop depending on the iteration count
 // CHECK-LABEL: test_tag: acc_loop_post:
 // CHECK:  operand #0
 // CHECK-NEXT:   - post_loop
@@ -161,32 +166,71 @@ func.func @last_mod_openacc_host_data(%arg0: memref<f32>, %mapped: memref<f32>)
 // CHECK-NEXT:   - post_loop
 func.func @last_mod_openacc_loop(%arg0: memref<f32>) -> memref<f32> {
   %zero = arith.constant 0.0 : f32
-  // Store before the loop.
   memref.store %zero, %arg0[] {tag_name = "pre"} : memref<f32>
   memref.load %arg0[] {tag = "acc_loop_before"} : memref<f32>
-
   %one = arith.constant 1.0 : f32
   %c1_i32 = arith.constant 1 : i32
   %c10_i32 = arith.constant 10 : i32
-
-  // Single-region loop: store and load inside.
   acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)
       step (%c1_i32 : i32) {
-    // Store inside the loop.
     memref.store %one, %arg0[] {tag_name = "loop_region"} : memref<f32>
     memref.load %arg0[] {tag = "acc_loop_inside"} : memref<f32>
     acc.yield
   } attributes {auto_ = [#acc.device_type<none>],
                 inclusiveUpperbound = array<i1: true>}
-
-  // After the loop, the last writer is still the store in the loop.
   memref.load %arg0[] {tag = "acc_loop_after"} : memref<f32>
-
-  // Store after the loop.
   memref.store %zero, %arg0[] {tag_name = "post_loop"} : memref<f32>
   memref.load %arg0[] {tag = "acc_loop_post"} : memref<f32>
-
-  // At return, the last writer should be the post-loop store.
   return {tag = "acc_loop_return"} %arg0 : memref<f32>
 }
 
+// -----
+
+// Unstructured acc.loop: the RegionBranch is modeled with explicit CFG and early
+// exits, and the RegionBranch graph only exposes a single entry and single
+// exit edge (no region backedge).
+//
+// CHECK-LABEL: test_tag: acc_loop_unstructured_before:
+// CHECK:  operand #0
+// CHECK-NEXT:   - pre
+// CHECK-LABEL: test_tag: acc_loop_unstructured_after:
+// CHECK:  operand #0
+// CHECK-DAG:   - loop_unstructured_early
+// CHECK-DAG:   - loop_unstructured_normal
+// the last writer can be either of the two stores in the loop
+func.func @last_mod_openacc_loop_unstructured(%arg0: memref<f32>) -> memref<f32> {
+  %zero = arith.constant 0.0 : f32
+  %one  = arith.constant 1.0 : f32
+  memref.store %zero, %arg0[] {tag_name = "pre"} : memref<f32>
+  memref.load %arg0[] {tag = "acc_loop_unstructured_before"} : memref<f32>
+  %c0_i32 = arith.constant 0 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %c5_i32 = arith.constant 5 : i32
+  acc.loop {
+  ^entry:
+    cf.br ^header(%c0_i32 : i32)
+
+  ^header(%iv: i32):
+    %is_early = arith.cmpi eq, %iv, %c1_i32 : i32
+    cf.cond_br %is_early, ^early_exit, ^cont
+
+  ^cont:
+    // Normal loop increment and exit when iv reaches 5.
+    %iv_next = arith.addi %iv, %c1_i32 : i32
+    %is_done = arith.cmpi eq, %iv_next, %c5_i32 : i32
+    cf.cond_br %is_done, ^normal_exit, ^header(%iv_next : i32)
+
+  ^early_exit:
+    // One exit path with its own store.
+    memref.store %one, %arg0[] {tag_name = "loop_unstructured_early"} : memref<f32>
+    acc.yield
+
+  ^normal_exit:
+    // Another exit path with a different store.
+    memref.store %one, %arg0[] {tag_name = "loop_unstructured_normal"} : memref<f32>
+    acc.yield
+  } attributes {auto_ = [#acc.device_type<none>], unstructured}
+  memref.load %arg0[] {tag = "acc_loop_unstructured_after"} : memref<f32>
+  return %arg0 : memref<f32>
+}
+

>From 62dccc2689c747c56b53f99a3017a2339ef18445 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Fri, 19 Dec 2025 09:37:28 -0800
Subject: [PATCH 4/4] tweak the test

---
 mlir/test/Dialect/OpenACC/region-branchop-interface.mlir | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
index 5b9347767b418..28789868fdef9 100644
--- a/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
+++ b/mlir/test/Dialect/OpenACC/region-branchop-interface.mlir
@@ -176,8 +176,7 @@ func.func @last_mod_openacc_loop(%arg0: memref<f32>) -> memref<f32> {
     memref.store %one, %arg0[] {tag_name = "loop_region"} : memref<f32>
     memref.load %arg0[] {tag = "acc_loop_inside"} : memref<f32>
     acc.yield
-  } attributes {auto_ = [#acc.device_type<none>],
-                inclusiveUpperbound = array<i1: true>}
+  } attributes {auto_ = [#acc.device_type<none>]}
   memref.load %arg0[] {tag = "acc_loop_after"} : memref<f32>
   memref.store %zero, %arg0[] {tag_name = "post_loop"} : memref<f32>
   memref.load %arg0[] {tag = "acc_loop_post"} : memref<f32>
@@ -221,12 +220,10 @@ func.func @last_mod_openacc_loop_unstructured(%arg0: memref<f32>) -> memref<f32>
     cf.cond_br %is_done, ^normal_exit, ^header(%iv_next : i32)
 
   ^early_exit:
-    // One exit path with its own store.
     memref.store %one, %arg0[] {tag_name = "loop_unstructured_early"} : memref<f32>
     acc.yield
 
   ^normal_exit:
-    // Another exit path with a different store.
     memref.store %one, %arg0[] {tag_name = "loop_unstructured_normal"} : memref<f32>
     acc.yield
   } attributes {auto_ = [#acc.device_type<none>], unstructured}



More information about the Mlir-commits mailing list