[clang] 881f6de - [OpenACC][CIR] Lower 'wait' clause for compute/data constructs (#137359)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 25 13:15:06 PDT 2025
Author: Erich Keane
Date: 2025-04-25T13:15:03-07:00
New Revision: 881f6de812632bdbf835aaf8d6a792ff5d93764c
URL: https://github.com/llvm/llvm-project/commit/881f6de812632bdbf835aaf8d6a792ff5d93764c
DIFF: https://github.com/llvm/llvm-project/commit/881f6de812632bdbf835aaf8d6a792ff5d93764c.diff
LOG: [OpenACC][CIR] Lower 'wait' clause for compute/data constructs (#137359)
The 'wait' clause is a bit complicated, and is laid out awkwardly in the
IR addative functions, so this patch has to do a little bit of work to
do that (mostly the 'devnum' work).
Otherwise, this is very similar to how num_gangs works, with the
additional complexity of the 'empty' wait being represented differently
as well, but this is similar to how 'async' and a few others work as
well.
Added:
Modified:
clang/include/clang/AST/OpenACCClause.h
clang/lib/AST/OpenACCClause.cpp
clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
clang/lib/Sema/TreeTransform.h
clang/test/CIR/CodeGenOpenACC/data.c
clang/test/CIR/CodeGenOpenACC/kernels.c
clang/test/CIR/CodeGenOpenACC/parallel.c
clang/test/CIR/CodeGenOpenACC/serial.c
Removed:
################################################################################
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index f18a6cf62f2c5..449bcb71f9f32 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -565,6 +565,9 @@ class OpenACCWaitClause final
llvm::ArrayRef<Expr *> getQueueIdExprs() const {
return OpenACCClauseWithExprs::getExprs().drop_front();
}
+ // If this is a plain `wait` (no parens) this returns 'false'. Else Sema/Parse
+ // ensures we have at least one QueueId expression.
+ bool hasExprs() const { return getLParenLoc().isValid(); }
};
class OpenACCNumGangsClause final
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 01e183051934d..9d3645e6da1ca 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -865,7 +865,7 @@ void OpenACCClausePrinter::VisitReductionClause(
void OpenACCClausePrinter::VisitWaitClause(const OpenACCWaitClause &C) {
OS << "wait";
- if (!C.getLParenLoc().isInvalid()) {
+ if (C.hasExprs()) {
OS << "(";
if (C.hasDevNumExpr()) {
OS << "devnum: ";
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index 5720fec556ff2..6e65f94c78bed 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -386,6 +386,53 @@ class OpenACCClauseCIREmitter final
}
}
+ void VisitWaitClause(const OpenACCWaitClause &clause) {
+ if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp, DataOp>) {
+ if (!clause.hasExprs()) {
+ operation.setWaitOnlyAttr(
+ handleDeviceTypeAffectedClause(operation.getWaitOnlyAttr()));
+ } else {
+ llvm::SmallVector<mlir::Value> values;
+
+ if (clause.hasDevNumExpr())
+ values.push_back(createIntExpr(clause.getDevNumExpr()));
+ for (const Expr *E : clause.getQueueIdExprs())
+ values.push_back(createIntExpr(E));
+
+ llvm::SmallVector<int32_t> segments;
+ if (operation.getWaitOperandsSegments())
+ llvm::copy(*operation.getWaitOperandsSegments(),
+ std::back_inserter(segments));
+
+ unsigned beforeSegmentSize = segments.size();
+
+ mlir::MutableOperandRange range = operation.getWaitOperandsMutable();
+ operation.setWaitOperandsDeviceTypeAttr(handleDeviceTypeAffectedClause(
+ operation.getWaitOperandsDeviceTypeAttr(), values, range,
+ segments));
+ operation.setWaitOperandsSegments(segments);
+
+ // In addition to having to set the 'segments', wait also has a list of
+ // bool attributes whether it is annotated with 'devnum'. We can use
+ // our knowledge of how much the 'segments' array grew to determine how
+ // many we need to add.
+ llvm::SmallVector<bool> hasDevNums;
+ if (operation.getHasWaitDevnumAttr())
+ for (mlir::Attribute A : operation.getHasWaitDevnumAttr())
+ hasDevNums.push_back(cast<mlir::BoolAttr>(A).getValue());
+
+ hasDevNums.insert(hasDevNums.end(), segments.size() - beforeSegmentSize,
+ clause.hasDevNumExpr());
+
+ operation.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasDevNums));
+ }
+ } else {
+ // TODO: When we've implemented this for everything, switch this to an
+ // unreachable. Enter data, exit data, update, Combined constructs remain.
+ return clauseNotImplemented(clause);
+ }
+ }
+
void VisitDefaultAsyncClause(const OpenACCDefaultAsyncClause &clause) {
if constexpr (isOneOfTypes<OpTy, SetOp>) {
operation.getDefaultAsyncMutable().append(
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 071389543ab68..94d2b6b6cc808 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12228,7 +12228,7 @@ void OpenACCClauseTransform<Derived>::VisitVectorClause(
template <typename Derived>
void OpenACCClauseTransform<Derived>::VisitWaitClause(
const OpenACCWaitClause &C) {
- if (!C.getLParenLoc().isInvalid()) {
+ if (C.hasExprs()) {
Expr *DevNumExpr = nullptr;
llvm::SmallVector<Expr *> InstantiatedQueueIdExprs;
diff --git a/clang/test/CIR/CodeGenOpenACC/data.c b/clang/test/CIR/CodeGenOpenACC/data.c
index 29fd465708054..7887df6503f08 100644
--- a/clang/test/CIR/CodeGenOpenACC/data.c
+++ b/clang/test/CIR/CodeGenOpenACC/data.c
@@ -109,5 +109,117 @@ void acc_data(int cond) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
+#pragma acc data default(none) wait
+ {}
+ // CHECK-NEXT: acc.data wait {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: acc.data wait([#acc.device_type<none>, #acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(1) device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(1) device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(cond, 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
+#pragma acc data default(none) wait(queues: cond, 1) device_type(radeon)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
+
// CHECK-NEXT: cir.return
}
diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c
index a57a0ccb557dc..7175e342c39bd 100644
--- a/clang/test/CIR/CodeGenOpenACC/kernels.c
+++ b/clang/test/CIR/CodeGenOpenACC/kernels.c
@@ -302,5 +302,117 @@ void acc_kernels(int cond) {
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
+#pragma acc kernels wait
+ {}
+ // CHECK-NEXT: acc.kernels wait {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: acc.kernels wait([#acc.device_type<none>, #acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(1) device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(1) device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(cond, 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels wait(queues: cond, 1) device_type(radeon)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
// CHECK-NEXT: cir.return
}
diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c
index 89ef6069d320e..c9208566bf2b9 100644
--- a/clang/test/CIR/CodeGenOpenACC/parallel.c
+++ b/clang/test/CIR/CodeGenOpenACC/parallel.c
@@ -329,5 +329,117 @@ void acc_parallel(int cond) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
+#pragma acc parallel wait
+ {}
+ // CHECK-NEXT: acc.parallel wait {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: acc.parallel wait([#acc.device_type<none>, #acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(1) device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(1) device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(cond, 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc parallel wait(queues: cond, 1) device_type(radeon)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.parallel wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
// CHECK-NEXT: cir.return
}
diff --git a/clang/test/CIR/CodeGenOpenACC/serial.c b/clang/test/CIR/CodeGenOpenACC/serial.c
index 094958f0e3b23..88a49a95f87d7 100644
--- a/clang/test/CIR/CodeGenOpenACC/serial.c
+++ b/clang/test/CIR/CodeGenOpenACC/serial.c
@@ -152,5 +152,117 @@ void acc_serial(int cond) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<nvidia>, #acc.device_type<radeon>]}
+#pragma acc serial wait
+ {}
+ // CHECK-NEXT: acc.serial wait {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: acc.serial wait([#acc.device_type<none>, #acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(1) device_type(nvidia) wait
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(1) device_type(nvidia) wait(1)
+ {}
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(cond, 1)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial wait(queues: cond, 1) device_type(radeon)
+ {}
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
+ // CHECK-NEXT: acc.serial wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
// CHECK-NEXT: cir.return
}
More information about the cfe-commits
mailing list