[clang] 8293955 - [OpenACC][CIR] Lower 'num_workers' for combined constructs
via cfe-commits
cfe-commits at lists.llvm.org
Fri May 9 14:54:19 PDT 2025
Author: erichkeane
Date: 2025-05-09T14:54:16-07:00
New Revision: 829395591ebf248a9fb57dbed6a8383db4ba6e00
URL: https://github.com/llvm/llvm-project/commit/829395591ebf248a9fb57dbed6a8383db4ba6e00
DIFF: https://github.com/llvm/llvm-project/commit/829395591ebf248a9fb57dbed6a8383db4ba6e00.diff
LOG: [OpenACC][CIR] Lower 'num_workers' for combined constructs
Similar to num_gangs, implementation is trivial, so adding tests and
adding the small amount of implementation.
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
clang/test/CIR/CodeGenOpenACC/combined.cpp
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index ebe5b50be4ead..65e98608ebdef 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -229,12 +229,10 @@ class OpenACCClauseCIREmitter final
operation.addNumWorkersOperand(builder.getContext(),
createIntExpr(clause.getIntExpr()),
lastDeviceTypeValues);
- } else if constexpr (isOneOfTypes<OpTy, mlir::acc::SerialOp>) {
- llvm_unreachable("num_workers not valid on serial");
+ } else if constexpr (isCombinedType<OpTy>) {
+ applyToComputeOp(clause);
} else {
- // TODO: When we've implemented this for everything, switch this to an
- // unreachable. Combined constructs remain.
- return clauseNotImplemented(clause);
+ llvm_unreachable("Unknown construct kind in VisitNumGangsClause");
}
}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index 90ba5cbceb800..78f35c3cb3c5f 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -813,4 +813,70 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
+#pragma acc parallel loop num_workers(cond)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2u)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
+ // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel loop num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(4)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
+ // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel loop device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3)
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
}
More information about the cfe-commits
mailing list