[clang] b09b1d6 - [OpenACC][CIR] Implement lowering for 'no_create' clause for comp/comb
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 6 16:48:09 PDT 2025
Author: erichkeane
Date: 2025-06-06T16:36:11-07:00
New Revision: b09b1d65efd7a3d6dd4f61333a5f09c0e69b42a4
URL: https://github.com/llvm/llvm-project/commit/b09b1d65efd7a3d6dd4f61333a5f09c0e69b42a4
DIFF: https://github.com/llvm/llvm-project/commit/b09b1d65efd7a3d6dd4f61333a5f09c0e69b42a4.diff
LOG: [OpenACC][CIR] Implement lowering for 'no_create' clause for comp/comb
no_create has its own 'data-in', plus uses the 'delete' for the data-out
operation. Additionally, like all data clauses it uses the 'async'
functionality previous implemented. This patch implements no_create for
combined/compute constructs completely, and ensures that the feature is
tested.
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
clang/test/CIR/CodeGenOpenACC/combined.cpp
clang/test/CIR/CodeGenOpenACC/kernels.c
clang/test/CIR/CodeGenOpenACC/parallel.c
clang/test/CIR/CodeGenOpenACC/serial.c
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 9f283974d8c8f..392a44917691e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -305,9 +305,19 @@ class OpenACCClauseCIREmitter final
{
mlir::OpBuilder::InsertionGuard guardCase(builder);
builder.setInsertionPointAfter(operation);
- afterOp = builder.create<AfterOpTy>(opInfo.beginLoc, beforeOp.getResult(),
- opInfo.varValue, structured, implicit,
- opInfo.name, opInfo.bounds);
+
+ if constexpr (std::is_same_v<AfterOpTy, mlir::acc::DeleteOp> ||
+ std::is_same_v<AfterOpTy, mlir::acc::DetachOp>) {
+ // Detach/Delete ops don't have the variable reference here, so they
+ // take 1 fewer argument to their build function.
+ afterOp = builder.create<AfterOpTy>(
+ opInfo.beginLoc, beforeOp.getResult(), structured, implicit,
+ opInfo.name, opInfo.bounds);
+ } else {
+ afterOp = builder.create<AfterOpTy>(
+ opInfo.beginLoc, beforeOp.getResult(), opInfo.varValue, structured,
+ implicit, opInfo.name, opInfo.bounds);
+ }
}
// Set the 'rest' of the info for both operations.
@@ -845,6 +855,22 @@ class OpenACCClauseCIREmitter final
return clauseNotImplemented(clause);
}
}
+
+ void VisitNoCreateClause(const OpenACCNoCreateClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
+ mlir::acc::KernelsOp>) {
+ for (auto var : clause.getVarList())
+ addDataOperand<mlir::acc::NoCreateOp, mlir::acc::DeleteOp>(
+ var, mlir::acc::DataClause::acc_no_create, /*structured=*/true,
+ /*implicit=*/false);
+ } else if constexpr (isCombinedType<OpTy>) {
+ applyToComputeOp(clause);
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. data remains.
+ return clauseNotImplemented(clause);
+ }
+ }
};
template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index 5107ee56c568c..74073094cd29a 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -1011,8 +1011,8 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
}
-extern "C" void acc_combined_deviceptr(int *arg1, int *arg2) {
- // CHECK: cir.func @acc_combined_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
+extern "C" void acc_combined_data_clauses(int *arg1, int *arg2) {
+ // CHECK: cir.func @acc_combined_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -1079,4 +1079,39 @@ extern "C" void acc_combined_deviceptr(int *arg1, int *arg2) {
// CHECK-NEXT: } loc
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
+
+#pragma acc parallel loop no_create(arg1)
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
+
+#pragma acc serial loop no_create(arg2)
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
+
+#pragma acc kernels loop no_create(arg1, arg2) device_type(host) async
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
}
diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c
index d276c2c23fbed..67847d0b8ce03 100644
--- a/clang/test/CIR/CodeGenOpenACC/kernels.c
+++ b/clang/test/CIR/CodeGenOpenACC/kernels.c
@@ -417,8 +417,8 @@ void acc_kernels(int cond) {
// CHECK-NEXT: cir.return
}
-void acc_kernels_deviceptr(int *arg1, int *arg2) {
- // CHECK: cir.func @acc_kernels_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
+void acc_kernels_data_clauses(int *arg1, int *arg2) {
+ // CHECK: cir.func @acc_kernels_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -451,4 +451,22 @@ void acc_kernels_deviceptr(int *arg1, int *arg2) {
// CHECK-NEXT: acc.kernels dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
+
+#pragma acc kernels no_create(arg1)
+ ;
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: acc.kernels dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
+
+#pragma acc kernels no_create(arg1, arg2) device_type(nvidia) async
+ ;
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
+ // CHECK-NEXT: acc.kernels dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
}
diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c
index df05a7aa53bb8..665648aa57134 100644
--- a/clang/test/CIR/CodeGenOpenACC/parallel.c
+++ b/clang/test/CIR/CodeGenOpenACC/parallel.c
@@ -444,8 +444,8 @@ void acc_parallel(int cond) {
// CHECK-NEXT: cir.return
}
-void acc_parallel_deviceptr(int *arg1, int *arg2) {
- // CHECK: cir.func @acc_parallel_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
+void acc_parallel_data_clauses(int *arg1, int *arg2) {
+ // CHECK: cir.func @acc_parallel_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -478,4 +478,23 @@ void acc_parallel_deviceptr(int *arg1, int *arg2) {
// CHECK-NEXT: acc.parallel dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
+
+#pragma acc parallel no_create(arg1)
+ ;
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: acc.parallel dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
+
+#pragma acc parallel no_create(arg1, arg2) device_type(radeon) async
+ ;
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
+ // CHECK-NEXT: acc.parallel dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
}
+
diff --git a/clang/test/CIR/CodeGenOpenACC/serial.c b/clang/test/CIR/CodeGenOpenACC/serial.c
index 48bda5387dffb..eb09a9e7aea8e 100644
--- a/clang/test/CIR/CodeGenOpenACC/serial.c
+++ b/clang/test/CIR/CodeGenOpenACC/serial.c
@@ -267,8 +267,8 @@ void acc_serial(int cond) {
// CHECK-NEXT: cir.return
}
-void acc_serial_deviceptr(int *arg1, int *arg2) {
- // CHECK: cir.func @acc_serial_deviceptr(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
+void acc_serial_data_clauses(int *arg1, int *arg2) {
+ // CHECK: cir.func @acc_serial_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr<!s32i>{{.*}}) {
// CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg1", init]
// CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg2", init]
// CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
@@ -302,4 +302,21 @@ void acc_serial_deviceptr(int *arg1, int *arg2) {
// CHECK-NEXT: acc.serial dataOperands(%[[DEVPTR1]], %[[DEVPTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<radeon>]) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
+
+#pragma acc serial no_create(arg1)
+ ;
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: acc.serial dataOperands(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
+#pragma acc serial no_create(arg1, arg2) device_type(nvidia) async
+ ;
+ // CHECK-NEXT: %[[NOCREATE1:.*]] = acc.nocreate varPtr(%[[ARG1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg1"}
+ // CHECK-NEXT: %[[NOCREATE2:.*]] = acc.nocreate varPtr(%[[ARG2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arg2"}
+ // CHECK-NEXT: acc.serial dataOperands(%[[NOCREATE1]], %[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg2"}
+ // CHECK-NEXT: acc.delete accPtr(%[[NOCREATE1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<nvidia>]) {dataClause = #acc<data_clause acc_no_create>, name = "arg1"}
}
More information about the cfe-commits
mailing list