[clang] 3668a3a - [OpenACC][CIR] 'if'/'self' combined construct lowering
via cfe-commits
cfe-commits at lists.llvm.org
Fri May 9 11:26:19 PDT 2025
Author: erichkeane
Date: 2025-05-09T11:26:15-07:00
New Revision: 3668a3a7c8a0a4cdb4bd781529bb72b8588e8f99
URL: https://github.com/llvm/llvm-project/commit/3668a3a7c8a0a4cdb4bd781529bb72b8588e8f99
DIFF: https://github.com/llvm/llvm-project/commit/3668a3a7c8a0a4cdb4bd781529bb72b8588e8f99.diff
LOG: [OpenACC][CIR] 'if'/'self' combined construct lowering
These two require that we correctly set up the 'insertion points' for
the compute construct when doing a combined construct. This patch adds
that and verifies that we're doing it correctly.
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
clang/test/CIR/CodeGenOpenACC/combined.cpp
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index 8892c49e41202..3692560b06e6f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -148,7 +148,8 @@ class OpenACCClauseCIREmitter final
template <typename U = void,
typename = std::enable_if_t<isCombinedType<OpTy>, U>>
void applyToComputeOp(const OpenACCClause &c) {
- // TODO OpenACC: we have to set the insertion scope here correctly still.
+ mlir::OpBuilder::InsertionGuard guardCase(builder);
+ builder.setInsertionPoint(operation.computeOp);
OpenACCClauseCIREmitter<typename OpTy::ComputeOpTy> computeEmitter{
operation.computeOp, cgf, builder, dirKind, dirLoc};
computeEmitter.lastDeviceTypeValues = lastDeviceTypeValues;
@@ -288,9 +289,11 @@ class OpenACCClauseCIREmitter final
} else {
llvm_unreachable("var-list version of self shouldn't get here");
}
+ } else if constexpr (isCombinedType<OpTy>) {
+ applyToComputeOp(clause);
} else {
// TODO: When we've implemented this for everything, switch this to an
- // unreachable. If, combined constructs remain.
+ // unreachable. update construct remains.
return clauseNotImplemented(clause);
}
}
@@ -302,13 +305,15 @@ class OpenACCClauseCIREmitter final
mlir::acc::DataOp, mlir::acc::WaitOp>) {
operation.getIfCondMutable().append(
createCondition(clause.getConditionExpr()));
+ } else if constexpr (isCombinedType<OpTy>) {
+ applyToComputeOp(clause);
} else {
// 'if' applies to most of the constructs, but hold off on lowering them
// until we can write tests/know what we're doing with codegen to make
// sure we get it right.
// TODO: When we've implemented this for everything, switch this to an
- // unreachable. Enter data, exit data, host_data, update, combined
- // constructs remain.
+ // unreachable. Enter data, exit data, host_data, update constructs
+ // remain.
return clauseNotImplemented(clause);
}
}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index 3b2ae8a97d8c5..da8347a7f89c4 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -176,4 +176,80 @@ extern "C" void acc_combined(int N) {
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
// CHECK: acc.yield
// CHECK-NEXT: } loc
+
+#pragma acc kernels loop self
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.kernels combined(loop) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } attributes {selfAttr}
+
+#pragma acc serial loop self(N)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[N_LOAD]] : !s32i), !cir.bool
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
+ // CHECK-NEXT: acc.serial combined(loop) self(%[[CONV_CAST]]) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel loop if(N)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[N_LOAD]] : !s32i), !cir.bool
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
+ // CHECK-NEXT: acc.parallel combined(loop) if(%[[CONV_CAST]]) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial loop if(1)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
+ // CHECK-NEXT: acc.serial combined(loop) if(%[[CONV_CAST]]) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels loop if(N == 1)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[EQ_RES:.*]] = cir.cmp(eq, %[[N_LOAD]], %[[ONE_LITERAL]]) : !s32i, !cir.bool
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES]] : !cir.bool to i1
+ // CHECK-NEXT: acc.kernels combined(loop) if(%[[CONV_CAST]]) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel loop if(N == 1) self(N == 2)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[EQ_RES_IF:.*]] = cir.cmp(eq, %[[N_LOAD]], %[[ONE_LITERAL]]) : !s32i, !cir.bool
+ // CHECK-NEXT: %[[CONV_CAST_IF:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES_IF]] : !cir.bool to i1
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[EQ_RES_SELF:.*]] = cir.cmp(eq, %[[N_LOAD]], %[[TWO_LITERAL]]) : !s32i, !cir.bool
+ // CHECK-NEXT: %[[CONV_CAST_SELF:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES_SELF]] : !cir.bool to i1
+ // CHECK-NEXT: acc.parallel combined(loop) self(%[[CONV_CAST_SELF]]) if(%[[CONV_CAST_IF]]) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
}
More information about the cfe-commits
mailing list