[clang] [OpenACC][CIR] Add parallelism determ. to all acc.loops (PR #143751)
Erich Keane via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 11 10:14:31 PDT 2025
https://github.com/erichkeane created https://github.com/llvm/llvm-project/pull/143751
PR #143720 adds a requirement to the ACC dialect that every acc.loop must have a seq, independent, or auto attribute for the 'default' device_type. The standard has rules for how this can be intuited:
orphan/parallel/parallel loop: independent
kernels/kernels loop: auto
serial/serial loop: seq, unless there is a gang/worker/vector, at which point it should be 'auto'.
This patch implements all of this rule as a 'cleanup' step on the IR generation for combined/loop operations. Note that the test impact is much less since I inadvertently have my 'operation' terminating curley matching the end curley from 'attribute' instead of the front of the line, so I've added sufficient tests to ensure I captured the above.
>From 644612d088f28a21f7f59496de00f8c14de89c1d Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Wed, 11 Jun 2025 09:54:46 -0700
Subject: [PATCH] [OpenACC][CIR] Add parallelism determ. to all acc.loops
PR #143720 adds a requirement to the ACC dialect that every acc.loop
must have a seq, independent, or auto attribute for the 'default'
device_type. The standard has rules for how this can be intuited:
orphan/parallel/parallel loop: independent
kernels/kernels loop: auto
serial/serial loop: seq, unless there is a gang/worker/vector, at which
point it should be 'auto'.
This patch implements all of this rule as a 'cleanup' step on the IR
generation for combined/loop operations. Note that the test impact is
much less since I inadvertently have my 'operation' terminating curley
matching the end curley from 'attribute' instead of the front of the
line, so I've added sufficient tests to ensure I captured the above.
---
clang/lib/CIR/CodeGen/CIRGenFunction.h | 12 +++
clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp | 2 +
.../lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp | 60 +++++++++++
clang/test/CIR/CodeGenOpenACC/combined.cpp | 69 ++++++++++--
clang/test/CIR/CodeGenOpenACC/loop.cpp | 101 ++++++++++++++++--
5 files changed, 227 insertions(+), 17 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index b08dd540e6289..682d59d63faa8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -34,6 +34,12 @@ namespace {
class ScalarExprEmitter;
} // namespace
+namespace mlir {
+namespace acc {
+class LoopOp;
+} // namespace acc
+} // namespace mlir
+
namespace clang::CIRGen {
class CIRGenFunction : public CIRGenTypeCache {
@@ -1082,6 +1088,12 @@ class CIRGenFunction : public CIRGenTypeCache {
OpenACCDirectiveKind dirKind, SourceLocation dirLoc,
ArrayRef<const OpenACCClause *> clauses);
+ // The OpenACC LoopOp requires that we have auto, seq, or independent on all
+ // LoopOp operations for the 'none' device type case. This function checks if
+ // the LoopOp has one, else it updates it to have one.
+ void updateLoopOpParallelism(mlir::acc::LoopOp &op, bool isOrphan,
+ OpenACCDirectiveKind dk);
+
public:
mlir::LogicalResult
emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s);
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index 2aab9cecf93d8..1feefa55eb270 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -102,6 +102,8 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct(
emitOpenACCClauses(computeOp, loopOp, dirKind, dirLoc, clauses);
+ updateLoopOpParallelism(loopOp, /*isOrphan=*/false, dirKind);
+
builder.create<TermOp>(end);
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
index 24cd1d399de65..2082ef65193ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACCLoop.cpp
@@ -22,6 +22,63 @@ using namespace clang::CIRGen;
using namespace cir;
using namespace mlir::acc;
+void CIRGenFunction::updateLoopOpParallelism(mlir::acc::LoopOp &op,
+ bool isOrphan,
+ OpenACCDirectiveKind dk) {
+ // Check that at least one of auto, independent, or seq is present
+ // for the device-independent default clauses.
+ auto hasDeviceNone = [](mlir::acc::DeviceTypeAttr attr) -> bool {
+ return attr.getValue() == mlir::acc::DeviceType::None;
+ };
+ bool hasDefaultSeq =
+ op.getSeqAttr()
+ ? llvm::any_of(
+ op.getSeqAttr().getAsRange<mlir::acc::DeviceTypeAttr>(),
+ hasDeviceNone)
+ : false;
+ bool hasDefaultIndependent =
+ op.getIndependentAttr()
+ ? llvm::any_of(
+ op.getIndependentAttr().getAsRange<mlir::acc::DeviceTypeAttr>(),
+ hasDeviceNone)
+ : false;
+ bool hasDefaultAuto =
+ op.getAuto_Attr()
+ ? llvm::any_of(
+ op.getAuto_Attr().getAsRange<mlir::acc::DeviceTypeAttr>(),
+ hasDeviceNone)
+ : false;
+
+ if (hasDefaultSeq || hasDefaultIndependent || hasDefaultAuto)
+ return;
+
+ // Orphan or parallel results in 'independent'.
+ if (isOrphan || dk == OpenACCDirectiveKind::Parallel ||
+ dk == OpenACCDirectiveKind::ParallelLoop) {
+ op.addIndependent(builder.getContext(), {});
+ return;
+ }
+
+ // Kernels always results in 'auto'.
+ if (dk == OpenACCDirectiveKind::Kernels ||
+ dk == OpenACCDirectiveKind::KernelsLoop) {
+ op.addAuto(builder.getContext(), {});
+ return;
+ }
+
+ // Serial should use 'seq' unless there is a gang, worker, or vector clause,
+ // in which case, it should use 'auto'.
+ assert(dk == OpenACCDirectiveKind::Serial ||
+ dk == OpenACCDirectiveKind::SerialLoop);
+
+ if (op.getWorkerAttr() || op.getVectorAttr() || op.getGangAttr()) {
+ op.addAuto(builder.getContext(), {});
+ return;
+ }
+
+ op.addSeq(builder.getContext(), {});
+}
+
mlir::LogicalResult
CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) {
mlir::Location start = getLoc(s.getSourceRange().getBegin());
@@ -90,6 +147,9 @@ CIRGenFunction::emitOpenACCLoopConstruct(const OpenACCLoopConstruct &s) {
emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
s.clauses());
+ updateLoopOpParallelism(op, s.isOrphanedLoopConstruct(),
+ s.getParentComputeConstructKind());
+
mlir::LogicalResult stmtRes = mlir::success();
// Emit body.
{
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index 1f3c9f1a8d3fa..5b83a9cb91898 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -74,7 +74,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.serial combined(loop) {
// CHECK: acc.loop combined(serial) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>, #acc.device_type<none>]} loc
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc kernels loop seq device_type(nvidia, radeon)
@@ -99,7 +99,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.serial combined(loop) {
// CHECK: acc.loop combined(serial) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>], seq = [#acc.device_type<none>]} loc
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc kernels loop auto device_type(nvidia, radeon)
@@ -124,7 +124,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.serial combined(loop) {
// CHECK: acc.loop combined(serial) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>], seq = [#acc.device_type<none>]} loc
// CHECK: acc.yield
// CHECK-NEXT: } loc
#pragma acc kernels loop independent device_type(nvidia, radeon)
@@ -143,7 +143,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.parallel combined(loop) {
// CHECK: acc.loop combined(parallel) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>]}
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -154,7 +154,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.serial combined(loop) {
// CHECK: acc.loop combined(serial) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>], seq = [#acc.device_type<none>]}
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -165,7 +165,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.kernels combined(loop) {
// CHECK: acc.loop combined(kernels) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>], collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
// CHECK: acc.terminator
// CHECK-NEXT: } loc
#pragma acc parallel loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
@@ -175,7 +175,7 @@ extern "C" void acc_combined(int N, int cond) {
// CHECK: acc.parallel combined(loop) {
// CHECK: acc.loop combined(parallel) {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>], independent = [#acc.device_type<none>]}
// CHECK: acc.yield
// CHECK-NEXT: } loc
@@ -1184,4 +1184,59 @@ extern "C" void acc_combined_data_clauses(int *arg1, int *arg2) {
// CHECK-NEXT: } loc
// CHECK-NEXT: acc.detach accPtr(%[[ATTACH2]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_attach>, name = "arg2"}
// CHECK-NEXT: acc.detach accPtr(%[[ATTACH1]] : !cir.ptr<!cir.ptr<!s32i>>) async([#acc.device_type<host>]) {dataClause = #acc<data_clause acc_attach>, name = "arg1"}
+
+ // Checking the automatic-addition of parallelism clauses.
+#pragma acc parallel loop
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: acc.parallel combined(loop) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels loop
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: acc.kernels combined(loop) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc serial loop
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: acc.serial combined(loop) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial loop worker
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: acc.serial combined(loop) {
+ // CHECK-NEXT: acc.loop combined(serial) worker {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial loop vector
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: acc.serial combined(loop) {
+ // CHECK-NEXT: acc.loop combined(serial) vector {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial loop gang
+ for(unsigned I = 0; I < 5; ++I);
+ // CHECK-NEXT: acc.serial combined(loop) {
+ // CHECK-NEXT: acc.loop combined(serial) gang {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
}
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index db94e2819b301..c0bf11e353951 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -41,12 +41,12 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>], seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
#pragma acc loop device_type(radeon) seq
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {seq = [#acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>], seq = [#acc.device_type<radeon>]} loc
#pragma acc loop seq device_type(nvidia, radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
@@ -67,12 +67,12 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>, #acc.device_type<none>]} loc
#pragma acc loop device_type(radeon) independent
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {independent = [#acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<radeon>, #acc.device_type<none>]} loc
#pragma acc loop independent device_type(nvidia, radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
@@ -93,12 +93,12 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>], independent = [#acc.device_type<none>]} loc
#pragma acc loop device_type(radeon) auto
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<radeon>]} loc
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<radeon>], independent = [#acc.device_type<none>]} loc
#pragma acc loop auto device_type(nvidia, radeon)
for(unsigned I = 0; I < N; ++I);
// CHECK: acc.loop {
@@ -116,7 +116,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>], independent = [#acc.device_type<none>]}
#pragma acc loop collapse(1) device_type(radeon) collapse (2)
for(unsigned I = 0; I < N; ++I)
@@ -124,7 +124,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>], independent = [#acc.device_type<none>]}
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
for(unsigned I = 0; I < N; ++I)
@@ -132,14 +132,14 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>], independent = [#acc.device_type<none>]}
#pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
for(unsigned I = 0; I < N; ++I)
for(unsigned J = 0; J < N; ++J)
for(unsigned K = 0; K < N; ++K);
// CHECK: acc.loop {
// CHECK: acc.yield
- // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>], independent = [#acc.device_type<none>]}
#pragma acc loop tile(1, 2, 3)
for(unsigned I = 0; I < N; ++I)
@@ -392,4 +392,85 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
// CHECK: acc.yield
// CHECK-NEXT: } loc
}
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+ // Checking the automatic-addition of parallelism clauses.
+#pragma acc loop
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
+
+#pragma acc parallel
+ {
+ // CHECK-NEXT: acc.parallel {
+#pragma acc loop
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
+ }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc kernels
+ {
+ // CHECK-NEXT: acc.kernels {
+#pragma acc loop
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+
+#pragma acc serial
+ {
+ // CHECK-NEXT: acc.serial {
+#pragma acc loop
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
+ }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial
+ {
+ // CHECK-NEXT: acc.serial {
+#pragma acc loop worker
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop worker {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial
+ {
+ // CHECK-NEXT: acc.serial {
+#pragma acc loop vector
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop vector {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+
+#pragma acc serial
+ {
+ // CHECK-NEXT: acc.serial {
+#pragma acc loop gang
+ for(unsigned I = 0; I < N; ++I);
+ // CHECK-NEXT: acc.loop gang {
+ // CHECK: acc.yield
+ // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
+ }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
}
More information about the cfe-commits
mailing list