[clang] 23d4756 - [OpenACC][CIR] Add lowering for 'copy' array indexes (#140971)
via cfe-commits
cfe-commits at lists.llvm.org
Fri May 23 07:04:49 PDT 2025
Author: Erich Keane
Date: 2025-05-23T07:04:46-07:00
New Revision: 23d4756c4bfce06a98c9c03b24752d32760ac22b
URL: https://github.com/llvm/llvm-project/commit/23d4756c4bfce06a98c9c03b24752d32760ac22b
DIFF: https://github.com/llvm/llvm-project/commit/23d4756c4bfce06a98c9c03b24752d32760ac22b.diff
LOG: [OpenACC][CIR] Add lowering for 'copy' array indexes (#140971)
The array indexes(and sections) are represented by the acc.bounds
operation, which this ensures we fill in properly. The lowerbound is
required, so we always get that.
The upperbound or extent is required. We typically do extent, since that
is the 'length' as specified by ACC, but in cases where we have implicit
length, we use the extent instead.
It isn't clear when 'stride' should be anything besides 1, though by my
reading, since we have full-types in the emitted code, we should never
have it be anything but 1.
This patch enables these for copy on compute and combined constructs,
and makes sure to test everything I could think of for
combinations/permutations.
Added:
Modified:
clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
clang/test/CIR/CodeGenOpenACC/combined-copy.c
clang/test/CIR/CodeGenOpenACC/compute-copy.c
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 91aa358638d5d..8b61d3fae3ad0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -121,6 +121,11 @@ class OpenACCClauseCIREmitter final
return constOp.getResult();
}
+ mlir::Value createConstantInt(SourceLocation loc, unsigned width,
+ int64_t value) {
+ return createConstantInt(cgf.cgm.getLoc(loc), width, value);
+ }
+
mlir::acc::DeviceType decodeDeviceType(const IdentifierInfo *ii) {
// '*' case leaves no identifier-info, just a nullptr.
if (!ii)
@@ -184,37 +189,89 @@ class OpenACCClauseCIREmitter final
mlir::Location beginLoc;
mlir::Value varValue;
llvm::StringRef name;
+ llvm::SmallVector<mlir::Value> bounds;
};
+ mlir::Value createBound(mlir::Location boundLoc, mlir::Value lowerBound,
+ mlir::Value upperBound, mlir::Value extent) {
+ // Arrays always have a start-idx of 0.
+ mlir::Value startIdx = createConstantInt(boundLoc, 64, 0);
+ // Stride is always 1 in C/C++.
+ mlir::Value stride = createConstantInt(boundLoc, 64, 1);
+
+ auto bound = builder.create<mlir::acc::DataBoundsOp>(boundLoc, lowerBound,
+ upperBound);
+ bound.getStartIdxMutable().assign(startIdx);
+ if (extent)
+ bound.getExtentMutable().assign(extent);
+ bound.getStrideMutable().assign(stride);
+
+ return bound;
+ }
+
// A helper function that gets the information from an operand to a data
// clause, so that it can be used to emit the data operations.
- inline DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk,
- const Expr *e) {
+ DataOperandInfo getDataOperandInfo(OpenACCDirectiveKind dk, const Expr *e) {
// TODO: OpenACC: Cache was
diff erent enough as to need a separate
// `ActOnCacheVar`, so we are going to need to do some investigations here
// when it comes to implement this for cache.
if (dk == OpenACCDirectiveKind::Cache) {
cgf.cgm.errorNYI(e->getSourceRange(),
"OpenACC data operand for 'cache' directive");
- return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}};
+ return {cgf.cgm.getLoc(e->getBeginLoc()), {}, {}, {}};
}
const Expr *curVarExpr = e->IgnoreParenImpCasts();
mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
+ llvm::SmallVector<mlir::Value> bounds;
+
+ // Assemble the list of bounds.
+ while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
+ mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
+ mlir::Value lowerBound;
+ mlir::Value upperBound;
+ mlir::Value extent;
+
+ if (const auto *section = dyn_cast<ArraySectionExpr>(curVarExpr)) {
+ if (const Expr *lb = section->getLowerBound())
+ lowerBound = emitIntExpr(lb);
+ else
+ lowerBound = createConstantInt(boundLoc, 64, 0);
+
+ if (const Expr *len = section->getLength()) {
+ extent = emitIntExpr(len);
+ } else {
+ QualType baseTy = ArraySectionExpr::getBaseOriginalType(
+ section->getBase()->IgnoreParenImpCasts());
+ // We know this is the case as implicit lengths are only allowed for
+ // array types with a constant size, or a dependent size. AND since
+ // we are codegen we know we're not dependent.
+ auto *arrayTy = cgf.getContext().getAsConstantArrayType(baseTy);
+ // Rather than trying to calculate the extent based on the
+ // lower-bound, we can just emit this as an upper bound.
+ upperBound =
+ createConstantInt(boundLoc, 64, arrayTy->getLimitedSize() - 1);
+ }
- // TODO: OpenACC: Assemble the list of bounds.
- if (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
- cgf.cgm.errorNYI(curVarExpr->getSourceRange(),
- "OpenACC data clause array subscript/section");
- return {exprLoc, {}, {}};
+ curVarExpr = section->getBase()->IgnoreParenImpCasts();
+ } else {
+ const auto *subscript = cast<ArraySubscriptExpr>(curVarExpr);
+
+ lowerBound = emitIntExpr(subscript->getIdx());
+ // Length of an array index is always 1.
+ extent = createConstantInt(boundLoc, 64, 1);
+ curVarExpr = subscript->getBase()->IgnoreParenImpCasts();
+ }
+
+ bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent));
}
// TODO: OpenACC: if this is a member expr, emit the VarPtrPtr correctly.
if (isa<MemberExpr>(curVarExpr)) {
cgf.cgm.errorNYI(curVarExpr->getSourceRange(),
"OpenACC Data clause member expr");
- return {exprLoc, {}, {}};
+ return {exprLoc, {}, {}, std::move(bounds)};
}
// Sema has made sure that only 4 types of things can get here, array
@@ -223,14 +280,14 @@ class OpenACCClauseCIREmitter final
// right.
const auto *dre = cast<DeclRefExpr>(curVarExpr);
const auto *vd = cast<VarDecl>(dre->getFoundDecl()->getCanonicalDecl());
- return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName()};
+ return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName(),
+ std::move(bounds)};
}
template <typename BeforeOpTy, typename AfterOpTy>
void addDataOperand(const Expr *varOperand, mlir::acc::DataClause dataClause,
bool structured, bool implicit) {
DataOperandInfo opInfo = getDataOperandInfo(dirKind, varOperand);
- mlir::ValueRange bounds;
// TODO: OpenACC: we should comprehend the 'modifier-list' here for the data
// operand. At the moment, we don't have a uniform way to assign these
@@ -239,7 +296,7 @@ class OpenACCClauseCIREmitter final
auto beforeOp =
builder.create<BeforeOpTy>(opInfo.beginLoc, opInfo.varValue, structured,
- implicit, opInfo.name, bounds);
+ implicit, opInfo.name, opInfo.bounds);
operation.getDataClauseOperandsMutable().append(beforeOp.getResult());
AfterOpTy afterOp;
@@ -248,7 +305,7 @@ class OpenACCClauseCIREmitter final
builder.setInsertionPointAfter(operation);
afterOp = builder.create<AfterOpTy>(opInfo.beginLoc, beforeOp.getResult(),
opInfo.varValue, structured, implicit,
- opInfo.name, bounds);
+ opInfo.name, opInfo.bounds);
}
// Set the 'rest' of the info for both operations.
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
index 8df6b9fc03454..3ebca6578c8a9 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c
+++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
@@ -12,6 +12,8 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[LOCAL3:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["localVar3"]
// CHECK-NEXT: %[[LOCALPTR:.*]] = cir.alloca !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>, ["localPointer"]
// CHECK-NEXT: %[[LOCALARRAY:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["localArray"]
+ // CHECK-NEXT: %[[LOCALARRAYOFPTRS:.*]] = cir.alloca !cir.array<!cir.ptr<!cir.float> x 5>, !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>, ["localArrayOfPtrs"]
+ // CHECK-NEXT: %[[THREEDARRAY:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>, ["threeDArray"]
// CHECK-NEXT: cir.store %[[ARG]], %[[PARM]] : !s32i, !cir.ptr<!s32i>
#pragma acc parallel loop copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar)
@@ -261,4 +263,503 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
// CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc
+
+#pragma acc parallel loop copy(localArray[3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc serial loop copy(localArray[1:3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc kernels loop copy(localArray[:3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc parallel loop copy(localArray[1:])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc serial loop copy(localArray[localVar1:localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc kernels loop copy(localArray[:localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc parallel loop copy(localArray[localVar1:])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc serial loop copy(localPointer[3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc kernels loop copy(localPointer[1:3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc parallel loop copy(localPointer[:3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc serial loop copy(localPointer[localVar1:localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc kernels loop copy(localPointer[:localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+ float *localArrayOfPtrs[5];
+#pragma acc parallel loop copy(localArrayOfPtrs[3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial loop copy(localArrayOfPtrs[3][2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc kernels loop copy(localArrayOfPtrs[localVar1:localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc parallel loop copy(localArrayOfPtrs[localVar1:])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial loop copy(localArrayOfPtrs[:localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc kernels loop copy(localArrayOfPtrs[localVar1])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc parallel loop copy(localArrayOfPtrs[localVar1][localVar2])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial loop copy(localArrayOfPtrs[localVar1][localVar2:parmVar])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc kernels loop copy(localArrayOfPtrs[localVar1][:parmVar])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc parallel loop copy(localArrayOfPtrs[localVar1:localVar2][:1])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial loop copy(localArrayOfPtrs[localVar1:localVar2][1:1])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(serial) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+ double threeDArray[5][6][7];
+#pragma acc kernels loop copy(threeDArray[1][2][3])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
+ // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(kernels) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
+
+#pragma acc parallel loop copy(threeDArray[1:1][2:1][3:1])
+ for(int i = 0; i < 5; ++i);
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
+ // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {
+ // CHECK-NEXT: acc.loop combined(parallel) {
+ // CHECK: acc.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
}
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.c b/clang/test/CIR/CodeGenOpenACC/compute-copy.c
index a542409f07152..77e8cc8562c73 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-copy.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.c
@@ -12,6 +12,9 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[LOCAL3:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["localVar3"]
// CHECK-NEXT: %[[LOCALPTR:.*]] = cir.alloca !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>, ["localPointer"]
// CHECK-NEXT: %[[LOCALARRAY:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["localArray"]
+ // CHECK-NEXT: %[[LOCALARRAYOFPTRS:.*]] = cir.alloca !cir.array<!cir.ptr<!cir.float> x 5>, !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>, ["localArrayOfPtrs"]
+ // CHECK-NEXT: %[[THREEDARRAY:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>, ["threeDArray"]
+
// CHECK-NEXT: cir.store %[[ARG]], %[[PARM]] : !s32i, !cir.ptr<!s32i>
#pragma acc parallel copy(localVar1, parmVar) copy(localVar2) copy(localVar3, parmVar)
@@ -210,4 +213,426 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
// CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type<nvidia>], %[[ONE_CAST]] : si32 [#acc.device_type<radeon>]) to varPtr(%[[LOCAL1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copy>, name = "localVar1"} loc
+
+#pragma acc parallel copy(localArray[3])
+ ;
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc serial copy(localArray[1:3])
+ ;
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc kernels copy(localArray[:3])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc parallel copy(localArray[1:])
+ ;
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+#pragma acc serial copy(localArray[localVar1:localVar2])
+ ;
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc kernels copy(localArray[:localVar2])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc parallel copy(localArray[localVar1:])
+ ;
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+
+#pragma acc serial copy(localPointer[3])
+ ;
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc kernels copy(localPointer[1:3])
+ ;
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc parallel copy(localPointer[:3])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc serial copy(localPointer[localVar1:localVar2])
+ ;
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+#pragma acc kernels copy(localPointer[:localVar2])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+
+ float *localArrayOfPtrs[5];
+#pragma acc parallel copy(localArrayOfPtrs[3])
+ ;
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+#pragma acc serial copy(localArrayOfPtrs[3][2])
+ ;
+ // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc kernels copy(localArrayOfPtrs[localVar1:localVar2])
+ ;
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc parallel copy(localArrayOfPtrs[localVar1:])
+ ;
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial copy(localArrayOfPtrs[:localVar2])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc kernels copy(localArrayOfPtrs[localVar1])
+ ;
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc parallel copy(localArrayOfPtrs[localVar1][localVar2])
+ ;
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial copy(localArrayOfPtrs[localVar1][localVar2:parmVar])
+ ;
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc kernels copy(localArrayOfPtrs[localVar1][:parmVar])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[PV:.*]] = cir.load %[[PARM]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc parallel copy(localArrayOfPtrs[localVar1:localVar2][:1])
+ ;
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+#pragma acc serial copy(localArrayOfPtrs[localVar1:localVar2][1:1])
+ ;
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
+ // CHECK-NEXT: %[[LV1:.*]] = cir.load %[[LOCAL1]] : !cir.ptr<!s32i>, !s32i
+ // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32
+ // CHECK-NEXT: %[[LV2:.*]] = cir.load %[[LOCAL2]] : !cir.ptr<!s16i>, !s16i
+ // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) -> !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+ // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]]) to varPtr(%[[LOCALARRAYOFPTRS]] : !cir.ptr<!cir.array<!cir.ptr<!cir.float> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArrayOfPtrs"} loc
+
+ double threeDArray[5][6][7];
+#pragma acc kernels copy(threeDArray[1][2][3])
+ ;
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
+ // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {
+ // CHECK-NEXT: acc.terminator
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
+
+#pragma acc parallel copy(threeDArray[1:1][2:1][3:1])
+ ;
+ // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+ // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32
+ // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
+ // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
+ // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) -> !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>> {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
+ // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {
+ // CHECK-NEXT: acc.yield
+ // CHECK-NEXT: } loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) bounds(%[[BOUNDS]], %[[BOUNDS2]], %[[BOUNDS3]]) to varPtr(%[[THREEDARRAY]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!cir.double x 7> x 6> x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "threeDArray"} loc
}
More information about the cfe-commits
mailing list