[clang] a783edf - [OpenACC][CIR] 'tile' lowering for combined constructs
via cfe-commits
cfe-commits at lists.llvm.org
Fri May 9 11:54:21 PDT 2025
Author: erichkeane
Date: 2025-05-09T11:54:16-07:00
New Revision: a783edf3db8eaa9797e25cbece7a71370f968d3d
URL: https://github.com/llvm/llvm-project/commit/a783edf3db8eaa9797e25cbece7a71370f968d3d
DIFF: https://github.com/llvm/llvm-project/commit/a783edf3db8eaa9797e25cbece7a71370f968d3d.diff
LOG: [OpenACC][CIR] 'tile' lowering for combined constructs
This clause requires that we attach it to the 'loop', and can generate
variables, so this is the first loop clause to require that we properly
set up the insertion location. This patch does so, as a part of
lowering 'tile' correctly.
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
clang/test/CIR/CodeGenOpenACC/combined.cpp
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index 3692560b06e6f..86997dd057aa3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -138,7 +138,8 @@ class OpenACCClauseCIREmitter final
template <typename U = void,
typename = std::enable_if_t<isCombinedType<OpTy>, U>>
void applyToLoopOp(const OpenACCClause &c) {
- // TODO OpenACC: we have to set the insertion scope here correctly still.
+ mlir::OpBuilder::InsertionGuard guardCase(builder);
+ builder.setInsertionPoint(operation.loopOp);
OpenACCClauseCIREmitter<mlir::acc::LoopOp> loopEmitter{
operation.loopOp, cgf, builder, dirKind, dirLoc};
loopEmitter.lastDeviceTypeValues = lastDeviceTypeValues;
@@ -448,10 +449,10 @@ class OpenACCClauseCIREmitter final
operation.setTileForDeviceTypes(builder.getContext(),
lastDeviceTypeValues, values);
+ } else if constexpr (isCombinedType<OpTy>) {
+ applyToLoopOp(clause);
} else {
- // TODO: When we've implemented this for everything, switch this to an
- // unreachable. Combined constructs remain.
- return clauseNotImplemented(clause);
+ llvm_unreachable("Unknown construct kind in VisitTileClause");
}
}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index da8347a7f89c4..50c831c286e0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -252,4 +252,71 @@ extern "C" void acc_combined(int N) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
+ #pragma acc parallel loop tile(1, 2, 3)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: acc.parallel combined(loop) {
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: acc.loop combined(parallel) tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc serial loop tile(2) device_type(radeon)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: acc.serial combined(loop) {
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop combined(serial) tile({%[[TWO_CONST]] : i64}) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc kernels loop tile(2) device_type(radeon) tile (1, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: acc.kernels combined(loop) {
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop combined(kernels) tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ #pragma acc parallel loop tile(*) device_type(radeon, nvidia) tile (1, 2)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: acc.parallel combined(loop) {
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: acc.loop combined(parallel) tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ #pragma acc serial loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
+ for(unsigned I = 0; I < N; ++I)
+ for(unsigned J = 0; J < N; ++J)
+ for(unsigned K = 0; K < N; ++K);
+ // CHECK-NEXT: acc.serial combined(loop) {
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
+ // CHECK-NEXT: acc.loop combined(serial) tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
}
More information about the cfe-commits
mailing list