[clang] b7c521b - [OpenACC][CIR] Lowering for 'vector_length' on compute constructs
via cfe-commits
cfe-commits at lists.llvm.org
Mon Apr 21 12:47:52 PDT 2025
Author: erichkeane
Date: 2025-04-21T12:47:47-07:00
New Revision: b7c521b922f8b81544ecb0ccff2847644cac3107
URL: https://github.com/llvm/llvm-project/commit/b7c521b922f8b81544ecb0ccff2847644cac3107
DIFF: https://github.com/llvm/llvm-project/commit/b7c521b922f8b81544ecb0ccff2847644cac3107.diff
LOG: [OpenACC][CIR] Lowering for 'vector_length' on compute constructs
This is the same as the 'num_workers', with slightly different names in
places, so we just do the same exact implementation. This extracts the
implementation as well, which should make it easier to reuse.
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
clang/test/CIR/CodeGenOpenACC/kernels.c
clang/test/CIR/CodeGenOpenACC/parallel.c
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index b79baa96a3fc3..e7dd2e74b0864 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -82,6 +82,56 @@ class OpenACCClauseCIREmitter final
return conversionOp.getResult(0);
}
+ mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
+ // '*' case leaves no identifier-info, just a nullptr.
+ if (!ii)
+ return mlir::acc::DeviceType::Star;
+ return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName())
+ .CaseLower("default", mlir::acc::DeviceType::Default)
+ .CaseLower("host", mlir::acc::DeviceType::Host)
+ .CaseLower("multicore", mlir::acc::DeviceType::Multicore)
+ .CasesLower("nvidia", "acc_device_nvidia",
+ mlir::acc::DeviceType::Nvidia)
+ .CaseLower("radeon", mlir::acc::DeviceType::Radeon);
+ }
+
+ // Handle a clause affected by the 'device-type' to the point that they need
+ // to have the attributes added in the correct/corresponding order, such as
+ // 'num_workers' or 'vector_length' on a compute construct.
+ mlir::ArrayAttr
+ handleDeviceTypeAffectedClause(mlir::ArrayAttr existingDeviceTypes,
+ mlir::Value argument,
+ mlir::MutableOperandRange &argCollection) {
+ llvm::SmallVector<mlir::Attribute> deviceTypes;
+
+ // Collect the 'existing' device-type attributes so we can re-create them
+ // and insert them.
+ if (existingDeviceTypes) {
+ for (const mlir::Attribute &Attr : existingDeviceTypes)
+ deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+ builder.getContext(),
+ cast<mlir::acc::DeviceTypeAttr>(Attr).getValue()));
+ }
+
+ // Insert 1 version of the 'expr' to the NumWorkers list per-current
+ // device type.
+ if (lastDeviceTypeClause) {
+ for (const DeviceTypeArgument &arch :
+ lastDeviceTypeClause->getArchitectures()) {
+ deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+ builder.getContext(), decodeDeviceType(arch.getIdentifierInfo())));
+ argCollection.append(argument);
+ }
+ } else {
+ // Else, we just add a single for 'none'.
+ deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
+ builder.getContext(), mlir::acc::DeviceType::None));
+ argCollection.append(argument);
+ }
+
+ return mlir::ArrayAttr::get(builder.getContext(), deviceTypes);
+ }
+
public:
OpenACCClauseCIREmitter(OpTy &operation, CIRGenFunction &cgf,
CIRGenBuilderTy &builder,
@@ -112,19 +162,6 @@ class OpenACCClauseCIREmitter final
}
}
- mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
- // '*' case leaves no identifier-info, just a nullptr.
- if (!ii)
- return mlir::acc::DeviceType::Star;
- return llvm::StringSwitch<mlir::acc::DeviceType>(ii->getName())
- .CaseLower("default", mlir::acc::DeviceType::Default)
- .CaseLower("host", mlir::acc::DeviceType::Host)
- .CaseLower("multicore", mlir::acc::DeviceType::Multicore)
- .CasesLower("nvidia", "acc_device_nvidia",
- mlir::acc::DeviceType::Nvidia)
- .CaseLower("radeon", mlir::acc::DeviceType::Radeon);
- }
-
void VisitDeviceTypeClause(const OpenACCDeviceTypeClause &clause) {
lastDeviceTypeClause = &clause;
if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp>) {
@@ -165,38 +202,10 @@ class OpenACCClauseCIREmitter final
void VisitNumWorkersClause(const OpenACCNumWorkersClause &clause) {
if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) {
- // Collect the 'existing' device-type attributes so we can re-create them
- // and insert them.
- llvm::SmallVector<mlir::Attribute> deviceTypes;
- mlir::ArrayAttr existingDeviceTypes =
- operation.getNumWorkersDeviceTypeAttr();
-
- if (existingDeviceTypes) {
- for (mlir::Attribute Attr : existingDeviceTypes)
- deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
- builder.getContext(),
- cast<mlir::acc::DeviceTypeAttr>(Attr).getValue()));
- }
-
- // Insert 1 version of the 'int-expr' to the NumWorkers list per-current
- // device type.
- mlir::Value intExpr = createIntExpr(clause.getIntExpr());
- if (lastDeviceTypeClause) {
- for (const DeviceTypeArgument &arg :
- lastDeviceTypeClause->getArchitectures()) {
- deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
- builder.getContext(), decodeDeviceType(arg.getIdentifierInfo())));
- operation.getNumWorkersMutable().append(intExpr);
- }
- } else {
- // Else, we just add a single for 'none'.
- deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
- builder.getContext(), mlir::acc::DeviceType::None));
- operation.getNumWorkersMutable().append(intExpr);
- }
-
- operation.setNumWorkersDeviceTypeAttr(
- mlir::ArrayAttr::get(builder.getContext(), deviceTypes));
+ mlir::MutableOperandRange range = operation.getNumWorkersMutable();
+ operation.setNumWorkersDeviceTypeAttr(handleDeviceTypeAffectedClause(
+ operation.getNumWorkersDeviceTypeAttr(),
+ createIntExpr(clause.getIntExpr()), range));
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
llvm_unreachable("num_workers not valid on serial");
} else {
@@ -204,6 +213,19 @@ class OpenACCClauseCIREmitter final
}
}
+ void VisitVectorLengthClause(const OpenACCVectorLengthClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, ParallelOp, KernelsOp>) {
+ mlir::MutableOperandRange range = operation.getVectorLengthMutable();
+ operation.setVectorLengthDeviceTypeAttr(handleDeviceTypeAffectedClause(
+ operation.getVectorLengthDeviceTypeAttr(),
+ createIntExpr(clause.getIntExpr()), range));
+ } else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
+ llvm_unreachable("vector_length not valid on serial");
+ } else {
+ return clauseNotImplemented(clause);
+ }
+ }
+
void VisitSelfClause(const OpenACCSelfClause &clause) {
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
if (clause.isEmptySelfClause()) {
diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c
index 6459b310546cd..d2da1d18f1534 100644
--- a/clang/test/CIR/CodeGenOpenACC/kernels.c
+++ b/clang/test/CIR/CodeGenOpenACC/kernels.c
@@ -158,5 +158,57 @@ void acc_kernels(int cond) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
+#pragma acc kernels vector_length(cond)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2u)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
+ // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(3)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>], %[[THREE_CAST]] : si32 [#acc.device_type<multicore>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
+ {}
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
// CHECK-NEXT: cir.return
}
diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c
index bdb506ee7e1d2..61dccc591c252 100644
--- a/clang/test/CIR/CodeGenOpenACC/parallel.c
+++ b/clang/test/CIR/CodeGenOpenACC/parallel.c
@@ -157,5 +157,57 @@ void acc_parallel(int cond) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
+#pragma acc parallel vector_length(cond)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2u)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
+ // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
+ // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
+ {}
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
// CHECK-NEXT: cir.return
}
More information about the cfe-commits
mailing list