[Mlir-commits] [mlir] [mlir][acc] Erase empty kernel_environment ops during canonicalization (PR #166633)

Vijay Kandiah llvmlistbot at llvm.org
Wed Nov 5 18:04:42 PST 2025


https://github.com/VijayKandiah updated https://github.com/llvm/llvm-project/pull/166633

>From 25ab71109465af76faa237f091dcecb0af1fb8fc Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at nvidia.com>
Date: Wed, 5 Nov 2025 12:32:17 -0800
Subject: [PATCH 1/5] [mlir][acc] Erase empty kernel_environment ops during
 canonicalization

---
 .../mlir/Dialect/OpenACC/OpenACCOps.td        |  2 +
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       | 40 +++++++++++++++++++
 mlir/test/Dialect/OpenACC/canonicalize.mlir   | 26 ++++++++++++
 3 files changed, 68 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c689b7e46ea9e..5b89f741e296d 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2184,6 +2184,8 @@ def OpenACC_KernelEnvironmentOp : OpenACC_Op<"kernel_environment",
     )
     $region attr-dict
   }];
+
+  let hasCanonicalizer = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index b2f1d840f3bca..fa303ed675daf 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1042,6 +1042,37 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
   }
 };
 
+/// Remove empty acc.kernel_environment operations. If the operation has wait
+/// operands, create a acc.wait operation to preserve synchronization.
+struct RemoveEmptyKernelEnvironment
+    : public OpRewritePattern<acc::KernelEnvironmentOp> {
+  using OpRewritePattern<acc::KernelEnvironmentOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(acc::KernelEnvironmentOp op,
+                                PatternRewriter &rewriter) const override {
+    assert(op->getNumRegions() == 1 && "expected op to have one region");
+
+    Block &block = op.getRegion().front();
+    if (!block.empty())
+      return failure();
+
+    // Remove empty kernel environment
+    // preserve synchronization by creating acc.wait operation if needed
+    if (!op.getWaitOperands().empty()) {
+      rewriter.replaceOpWithNewOp<acc::WaitOp>(
+          op,
+          /*waitOperands=*/op.getWaitOperands(),
+          /*asyncOperand=*/Value(),
+          /*waitDevnum=*/Value(),
+          /*async=*/nullptr,
+          /*ifCond=*/Value());
+    } else
+      rewriter.eraseOp(op);
+
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // Recipe Region Helpers
 //===----------------------------------------------------------------------===//
@@ -2690,6 +2721,15 @@ void acc::HostDataOp::getCanonicalizationPatterns(RewritePatternSet &results,
   results.add<RemoveConstantIfConditionWithRegion<HostDataOp>>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// KernelEnvironmentOp
+//===----------------------------------------------------------------------===//
+
+void acc::KernelEnvironmentOp::getCanonicalizationPatterns(
+    RewritePatternSet &results, MLIRContext *context) {
+  results.add<RemoveEmptyKernelEnvironment>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // LoopOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir
index fdc8e6b5cae6e..6d600a386f926 100644
--- a/mlir/test/Dialect/OpenACC/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir
@@ -219,3 +219,29 @@ func.func @update_unnecessary_computations(%x: memref<i32>) {
 // CHECK-LABEL: func.func @update_unnecessary_computations
 // CHECK-NOT: acc.atomic.update
 // CHECK: acc.atomic.write
+
+// -----
+
+func.func @remove_empty_kernel_environment() {
+  acc.kernel_environment {
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @remove_empty_kernel_environment
+// CHECK-NOT: acc.kernel_environment
+// CHECK: return
+
+// -----
+
+func.func @kernel_environment_with_wait(%q1: i32, %q2: i32) {
+  acc.kernel_environment wait({%q1 : i32, %q2 : i32}) {
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @kernel_environment_with_wait
+// CHECK-SAME: ([[Q1:%.*]]: i32, [[Q2:%.*]]: i32)
+// CHECK-NOT: acc.kernel_environment
+// CHECK: acc.wait([[Q1]], [[Q2]] : i32, i32)
+// CHECK: return

>From d171b853be93aec3a249b709cd447388d0a2b4f3 Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at nvidia.com>
Date: Wed, 5 Nov 2025 12:56:01 -0800
Subject: [PATCH 2/5] [mlir][acc] Fix formatting

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index fa303ed675daf..83b210fa6a463 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1058,7 +1058,7 @@ struct RemoveEmptyKernelEnvironment
 
     // Remove empty kernel environment
     // preserve synchronization by creating acc.wait operation if needed
-    if (!op.getWaitOperands().empty()) {
+    if (!op.getWaitOperands().empty())
       rewriter.replaceOpWithNewOp<acc::WaitOp>(
           op,
           /*waitOperands=*/op.getWaitOperands(),
@@ -1066,7 +1066,7 @@ struct RemoveEmptyKernelEnvironment
           /*waitDevnum=*/Value(),
           /*async=*/nullptr,
           /*ifCond=*/Value());
-    } else
+    else
       rewriter.eraseOp(op);
 
     return success();

>From e7cbdfeab8284e71fcadac4e85860d33d14c3e3c Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at nvidia.com>
Date: Wed, 5 Nov 2025 12:57:11 -0800
Subject: [PATCH 3/5] [mlir][acc] Fix formatting for acc.wait op creation

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 83b210fa6a463..eef36b85a2803 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1061,7 +1061,7 @@ struct RemoveEmptyKernelEnvironment
     if (!op.getWaitOperands().empty())
       rewriter.replaceOpWithNewOp<acc::WaitOp>(
           op,
-          /*waitOperands=*/op.getWaitOperands(),
+          op.getWaitOperands(),
           /*asyncOperand=*/Value(),
           /*waitDevnum=*/Value(),
           /*async=*/nullptr,

>From 11ce389f692a454f373f1d97e8bcb8e5c7b198be Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at nvidia.com>
Date: Wed, 5 Nov 2025 13:01:34 -0800
Subject: [PATCH 4/5] [mlir][acc] Fix format

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index eef36b85a2803..a7c1b74e7d4ff 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1059,13 +1059,11 @@ struct RemoveEmptyKernelEnvironment
     // Remove empty kernel environment
     // preserve synchronization by creating acc.wait operation if needed
     if (!op.getWaitOperands().empty())
-      rewriter.replaceOpWithNewOp<acc::WaitOp>(
-          op,
-          op.getWaitOperands(),
-          /*asyncOperand=*/Value(),
-          /*waitDevnum=*/Value(),
-          /*async=*/nullptr,
-          /*ifCond=*/Value());
+      rewriter.replaceOpWithNewOp<acc::WaitOp>(op, op.getWaitOperands(),
+                                               /*asyncOperand=*/Value(),
+                                               /*waitDevnum=*/Value(),
+                                               /*async=*/nullptr,
+                                               /*ifCond=*/Value());
     else
       rewriter.eraseOp(op);
 

>From d4e5cb8cb11e8df9d28df4964799e1f4e58bda26 Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at nvidia.com>
Date: Wed, 5 Nov 2025 18:04:26 -0800
Subject: [PATCH 5/5] [mlir][acc] Handle complex wait clauses in
 kernel_environment canonicalization

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp     | 36 +++++++++++++++++++--
 mlir/test/Dialect/OpenACC/canonicalize.mlir | 25 +++++++-------
 2 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index a7c1b74e7d4ff..8c9c137b8aebb 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1056,9 +1056,39 @@ struct RemoveEmptyKernelEnvironment
     if (!block.empty())
       return failure();
 
-    // Remove empty kernel environment
-    // preserve synchronization by creating acc.wait operation if needed
-    if (!op.getWaitOperands().empty())
+    // Conservatively disable canonicalization of empty acc.kernel_environment
+    // operations if the wait operands in the kernel_environment cannot be fully
+    // represented by acc.wait operation.
+
+    // Disable canonicalization if device type is not the default
+    if (auto deviceTypeAttr = op.getWaitOperandsDeviceTypeAttr()) {
+      for (auto attr : deviceTypeAttr) {
+        if (auto dtAttr = mlir::dyn_cast<acc::DeviceTypeAttr>(attr)) {
+          if (dtAttr.getValue() != mlir::acc::DeviceType::None)
+            return failure();
+        }
+      }
+    }
+
+    // Disable canonicalization if any wait segment has a devnum
+    if (auto hasDevnumAttr = op.getHasWaitDevnumAttr()) {
+      for (auto attr : hasDevnumAttr) {
+        if (auto boolAttr = mlir::dyn_cast<mlir::BoolAttr>(attr)) {
+          if (boolAttr.getValue())
+            return failure();
+        }
+      }
+    }
+
+    // Disable canonicalization if there are multiple wait segments
+    if (auto segmentsAttr = op.getWaitOperandsSegmentsAttr()) {
+      if (segmentsAttr.size() > 1)
+        return failure();
+    }
+
+    // Remove empty kernel environment.
+    // Preserve synchronization by creating acc.wait operation if needed.
+    if (!op.getWaitOperands().empty() || op.getWaitOnlyAttr())
       rewriter.replaceOpWithNewOp<acc::WaitOp>(op, op.getWaitOperands(),
                                                /*asyncOperand=*/Value(),
                                                /*waitDevnum=*/Value(),
diff --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir
index 6d600a386f926..38d3df31305ad 100644
--- a/mlir/test/Dialect/OpenACC/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir
@@ -222,26 +222,27 @@ func.func @update_unnecessary_computations(%x: memref<i32>) {
 
 // -----
 
-func.func @remove_empty_kernel_environment() {
+func.func @kernel_environment_canonicalization(%q1: i32, %q2: i32, %q3: i32) {
+  // Empty kernel_environment (no wait) - should be removed
   acc.kernel_environment {
   }
-  return
-}
 
-// CHECK-LABEL: func.func @remove_empty_kernel_environment
-// CHECK-NOT: acc.kernel_environment
-// CHECK: return
+  acc.kernel_environment wait({%q1 : i32, %q2 : i32}) {
+  }
 
-// -----
+  acc.kernel_environment wait {
+  }
 
-func.func @kernel_environment_with_wait(%q1: i32, %q2: i32) {
-  acc.kernel_environment wait({%q1 : i32, %q2 : i32}) {
+  acc.kernel_environment wait({%q3 : i32} [#acc.device_type<nvidia>]) {
   }
+
   return
 }
 
-// CHECK-LABEL: func.func @kernel_environment_with_wait
-// CHECK-SAME: ([[Q1:%.*]]: i32, [[Q2:%.*]]: i32)
-// CHECK-NOT: acc.kernel_environment
+// CHECK-LABEL: func.func @kernel_environment_canonicalization
+// CHECK-SAME: ([[Q1:%.*]]: i32, [[Q2:%.*]]: i32, [[Q3:%.*]]: i32)
+// CHECK-NOT: acc.kernel_environment wait({{.*}}[#acc.device_type<none>])
 // CHECK: acc.wait([[Q1]], [[Q2]] : i32, i32)
+// CHECK: acc.wait{{$}}
+// CHECK: acc.kernel_environment wait({{.*}}[#acc.device_type<nvidia>])
 // CHECK: return



More information about the Mlir-commits mailing list