[flang-commits] [flang] [mlir] [acc] Add firstprivate/private to `acc.kernels` (PR #170387)
Susan Tan ス-ザン タン via flang-commits
flang-commits at lists.llvm.org
Wed Dec 3 14:19:31 PST 2025
https://github.com/SusanTan updated https://github.com/llvm/llvm-project/pull/170387
>From 77e571e61c3da1e2f4073a63971ac89e7a1a3396 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 2 Dec 2025 14:57:37 -0800
Subject: [PATCH 1/3] add firstprivate/private to acc kernel
---
flang/lib/Lower/OpenACC.cpp | 7 ++-
.../mlir/Dialect/OpenACC/OpenACCOps.td | 22 +++++---
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 14 +++++
mlir/test/Dialect/OpenACC/ops.mlir | 53 +++++++++++++++++++
4 files changed, 85 insertions(+), 11 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5355ca60181b0..22af354a739cb 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -3024,11 +3024,10 @@ static Op createComputeOp(
}
addOperand(operands, operandSegments, ifCond);
addOperand(operands, operandSegments, selfCond);
- if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>) {
+ if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>)
addOperands(operands, operandSegments, reductionOperands);
- addOperands(operands, operandSegments, privateOperands);
- addOperands(operands, operandSegments, firstprivateOperands);
- }
+ addOperands(operands, operandSegments, privateOperands);
+ addOperands(operands, operandSegments, firstprivateOperands);
addOperands(operands, operandSegments, dataClauseOperands);
Op computeOp;
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 77d1a6f8d53b5..c3073be62be9e 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2002,8 +2002,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
corresponding `device_type` attributes must be modified as well.
}];
- let arguments = (ins
- Variadic<IntOrIndex>:$asyncOperands,
+ let arguments = (ins Variadic<IntOrIndex>:$asyncOperands,
OptionalAttr<DeviceTypeArrayAttr>:$asyncOperandsDeviceType,
OptionalAttr<DeviceTypeArrayAttr>:$asyncOnly,
Variadic<IntOrIndex>:$waitOperands,
@@ -2018,12 +2017,11 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
OptionalAttr<DeviceTypeArrayAttr>:$numWorkersDeviceType,
Variadic<IntOrIndex>:$vectorLength,
OptionalAttr<DeviceTypeArrayAttr>:$vectorLengthDeviceType,
- Optional<I1>:$ifCond,
- Optional<I1>:$selfCond,
- UnitAttr:$selfAttr,
+ Optional<I1>:$ifCond, Optional<I1>:$selfCond, UnitAttr:$selfAttr,
+ Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
+ Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands,
Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
- OptionalAttr<DefaultValueAttr>:$defaultAttr,
- UnitAttr:$combined);
+ OptionalAttr<DefaultValueAttr>:$defaultAttr, UnitAttr:$combined);
let regions = (region AnyRegion:$region);
@@ -2111,6 +2109,14 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
/// types.
void addWaitOperands(MLIRContext *, bool hasDevnum, mlir::ValueRange,
llvm::ArrayRef<DeviceType>);
+
+ /// Adds a private clause variable to this operation, including its recipe.
+ void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op,
+ mlir::acc::PrivateRecipeOp recipe);
+ /// Adds a firstprivate clause variable to this operation, including its
+ /// recipe.
+ void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op,
+ mlir::acc::FirstprivateRecipeOp recipe);
}];
let assemblyFormat = [{
@@ -2119,10 +2125,12 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
`dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)`
| `async` `` custom<DeviceTypeOperandsWithKeywordOnly>($asyncOperands,
type($asyncOperands), $asyncOperandsDeviceType, $asyncOnly)
+ | `firstprivate` `(` $firstprivateOperands `:` type($firstprivateOperands) `)`
| `num_gangs` `(` custom<NumGangs>($numGangs,
type($numGangs), $numGangsDeviceType, $numGangsSegments) `)`
| `num_workers` `(` custom<DeviceTypeOperands>($numWorkers,
type($numWorkers), $numWorkersDeviceType) `)`
+ | `private` `(` $privateOperands `:` type($privateOperands) `)`
| `vector_length` `(` custom<DeviceTypeOperands>($vectorLength,
type($vectorLength), $vectorLengthDeviceType) `)`
| `wait` `` custom<WaitClause>($waitOperands, type($waitOperands),
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 7039bbe1d11ec..7e4dee5b87734 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2675,6 +2675,20 @@ LogicalResult acc::KernelsOp::verify() {
return checkDataOperands<acc::KernelsOp>(*this, getDataClauseOperands());
}
+void acc::KernelsOp::addPrivatization(MLIRContext *context,
+ mlir::acc::PrivateOp op,
+ mlir::acc::PrivateRecipeOp recipe) {
+ op.setRecipeAttr(mlir::SymbolRefAttr::get(context, recipe.getSymName()));
+ getPrivateOperandsMutable().append(op.getResult());
+}
+
+void acc::KernelsOp::addFirstPrivatization(
+ MLIRContext *context, mlir::acc::FirstprivateOp op,
+ mlir::acc::FirstprivateRecipeOp recipe) {
+ op.setRecipeAttr(mlir::SymbolRefAttr::get(context, recipe.getSymName()));
+ getFirstprivateOperandsMutable().append(op.getResult());
+}
+
void acc::KernelsOp::addNumWorkersOperand(
MLIRContext *context, mlir::Value newValue,
llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index e004a88261c78..9301806d1b3fe 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -731,6 +731,59 @@ func.func @testserialop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10
// -----
+// Test acc.kernels with private and firstprivate operands, similar to acc.serial.
+
+acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init {
+^bb0(%arg0: memref<10xf32>):
+ %0 = memref.alloc() : memref<10xf32>
+ acc.yield %0 : memref<10xf32>
+} destroy {
+^bb0(%arg0: memref<10xf32>):
+ memref.dealloc %arg0 : memref<10xf32>
+ acc.terminator
+}
+
+acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init {
+^bb0(%arg0: memref<10x10xf32>):
+ %1 = memref.alloc() : memref<10x10xf32>
+ acc.yield %1 : memref<10x10xf32>
+} destroy {
+^bb0(%arg0: memref<10x10xf32>):
+ memref.dealloc %arg0 : memref<10x10xf32>
+ acc.terminator
+}
+
+acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init {
+^bb0(%arg0: memref<10xf32>):
+ %2 = memref.alloca() : memref<10xf32>
+ acc.yield %2 : memref<10xf32>
+} copy {
+^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>):
+ memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32>
+ acc.terminator
+} destroy {
+^bb0(%arg0: memref<10xf32>):
+ acc.terminator
+}
+
+func.func @testkernelspriv(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () {
+ %priv_a = acc.private varPtr(%a : memref<10xf32>) recipe(@privatization_memref_10_f32) -> memref<10xf32>
+ %priv_c = acc.private varPtr(%c : memref<10x10xf32>) recipe(@privatization_memref_10_10_f32) -> memref<10x10xf32>
+ %firstp = acc.firstprivate varPtr(%b : memref<10xf32>) varType(tensor<10xf32>) recipe(@firstprivatization_memref_10xf32) -> memref<10xf32>
+ acc.kernels firstprivate(%firstp : memref<10xf32>) private(%priv_a, %priv_c : memref<10xf32>, memref<10x10xf32>) {
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @testkernelspriv(
+// CHECK: %[[PRIV_A:.*]] = acc.private varPtr(%{{.*}} : memref<10xf32>) recipe(@privatization_memref_10_f32) -> memref<10xf32>
+// CHECK: %[[PRIV_C:.*]] = acc.private varPtr(%{{.*}} : memref<10x10xf32>) recipe(@privatization_memref_10_10_f32) -> memref<10x10xf32>
+// CHECK: %[[FIRSTP:.*]] = acc.firstprivate varPtr(%{{.*}} : memref<10xf32>) varType(tensor<10xf32>) recipe(@firstprivatization_memref_10xf32) -> memref<10xf32>
+// CHECK: acc.kernels firstprivate(%[[FIRSTP]] : memref<10xf32>) private(%[[PRIV_A]], %[[PRIV_C]] : memref<10xf32>, memref<10x10xf32>) {
+// CHECK-NEXT: }
+
+// -----
+
func.func @testdataop(%a: memref<f32>, %b: memref<f32>, %c: memref<f32>) -> () {
%ifCond = arith.constant true
>From 93b2030a9ac7aaa2089f1db9deef96e6527a4157 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Tue, 2 Dec 2025 15:38:32 -0800
Subject: [PATCH 2/3] add reduction
---
flang/lib/Lower/OpenACC.cpp | 3 +--
mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td | 6 ++++++
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 7 +++++++
mlir/test/Dialect/OpenACC/ops.mlir | 12 ++++++++++++
4 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 22af354a739cb..69c3300ba4390 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -3024,8 +3024,7 @@ static Op createComputeOp(
}
addOperand(operands, operandSegments, ifCond);
addOperand(operands, operandSegments, selfCond);
- if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>)
- addOperands(operands, operandSegments, reductionOperands);
+ addOperands(operands, operandSegments, reductionOperands);
addOperands(operands, operandSegments, privateOperands);
addOperands(operands, operandSegments, firstprivateOperands);
addOperands(operands, operandSegments, dataClauseOperands);
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index c3073be62be9e..fcfe959709f09 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2018,6 +2018,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
Variadic<IntOrIndex>:$vectorLength,
OptionalAttr<DeviceTypeArrayAttr>:$vectorLengthDeviceType,
Optional<I1>:$ifCond, Optional<I1>:$selfCond, UnitAttr:$selfAttr,
+ Variadic<OpenACC_AnyPointerOrMappableType>:$reductionOperands,
Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands,
Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
@@ -2117,6 +2118,10 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
/// recipe.
void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op,
mlir::acc::FirstprivateRecipeOp recipe);
+ /// Adds a reduction clause variable to this operation, including its
+ /// recipe.
+ void addReduction(MLIRContext *, mlir::acc::ReductionOp op,
+ mlir::acc::ReductionRecipeOp recipe);
}];
let assemblyFormat = [{
@@ -2138,6 +2143,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
$waitOnly)
| `self` `(` $selfCond `)`
| `if` `(` $ifCond `)`
+ | `reduction` `(` $reductionOperands `:` type($reductionOperands) `)`
)
$region attr-dict-with-keyword
}];
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 7e4dee5b87734..9235f89b7969a 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2689,6 +2689,13 @@ void acc::KernelsOp::addFirstPrivatization(
getFirstprivateOperandsMutable().append(op.getResult());
}
+void acc::KernelsOp::addReduction(MLIRContext *context,
+ mlir::acc::ReductionOp op,
+ mlir::acc::ReductionRecipeOp recipe) {
+ op.setRecipeAttr(mlir::SymbolRefAttr::get(context, recipe.getSymName()));
+ getReductionOperandsMutable().append(op.getResult());
+}
+
void acc::KernelsOp::addNumWorkersOperand(
MLIRContext *context, mlir::Value newValue,
llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index 9301806d1b3fe..90a8475db0e9e 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -1653,6 +1653,18 @@ func.func @acc_reduc_test(%a : memref<i64>) -> () {
// CHECK: %[[REDUCTION_A:.*]] = acc.reduction varPtr(%[[ARG0]] : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64>
// CHECK-NEXT: acc.serial reduction(%[[REDUCTION_A]] : memref<i64>)
+func.func @acc_kernels_reduc_test(%a : memref<i64>) -> () {
+ %reduction_a = acc.reduction varPtr(%a : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64>
+ acc.kernels reduction(%reduction_a : memref<i64>) {
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @acc_kernels_reduc_test(
+// CHECK-SAME: %[[ARG0:.*]]: memref<i64>)
+// CHECK: %[[REDUCTION_A:.*]] = acc.reduction varPtr(%[[ARG0]] : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64>
+// CHECK-NEXT: acc.kernels reduction(%[[REDUCTION_A]] : memref<i64>)
+
// -----
func.func @testdeclareop(%a: memref<f32>, %b: memref<f32>, %c: memref<f32>) -> () {
>From ab6ab93a8fa1c299b5e96d57b4f3a125323f99b7 Mon Sep 17 00:00:00 2001
From: Susan Tan <zujunt at nvidia.com>
Date: Wed, 3 Dec 2025 14:19:18 -0800
Subject: [PATCH 3/3] add split
---
mlir/test/Dialect/OpenACC/ops.mlir | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index 90a8475db0e9e..5a1c20bcf5a24 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -1653,6 +1653,23 @@ func.func @acc_reduc_test(%a : memref<i64>) -> () {
// CHECK: %[[REDUCTION_A:.*]] = acc.reduction varPtr(%[[ARG0]] : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64>
// CHECK-NEXT: acc.serial reduction(%[[REDUCTION_A]] : memref<i64>)
+// -----
+
+acc.reduction.recipe @reduction_add_memref_i64 : memref<i64> reduction_operator <add> init {
+^bb0(%arg0: memref<i64>):
+ %c0_i64 = arith.constant 0 : i64
+ %alloca = memref.alloca() : memref<i64>
+ memref.store %c0_i64, %alloca[] : memref<i64>
+ acc.yield %alloca : memref<i64>
+} combiner {
+^bb0(%arg0: memref<i64>, %arg1: memref<i64>):
+ %0 = memref.load %arg0[] : memref<i64>
+ %1 = memref.load %arg1[] : memref<i64>
+ %2 = arith.addi %0, %1 : i64
+ memref.store %2, %arg0[] : memref<i64>
+ acc.yield %arg0 : memref<i64>
+}
+
func.func @acc_kernels_reduc_test(%a : memref<i64>) -> () {
%reduction_a = acc.reduction varPtr(%a : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64>
acc.kernels reduction(%reduction_a : memref<i64>) {
More information about the flang-commits
mailing list