[clang] [llvm] [Clang][OpenMP] Non-contiguous strided update (PR #144635)
Amit Tiwari via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 26 05:06:45 PDT 2025
https://github.com/amitamd7 updated https://github.com/llvm/llvm-project/pull/144635
>From 1383c0e58feff9aabbffab23dc705c497baa0f2d Mon Sep 17 00:00:00 2001
From: amtiwari <amtiwari at amd.com>
Date: Mon, 16 Jun 2025 01:07:01 -0400
Subject: [PATCH] strided_update_offloading with lit-tests added
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 189 ++++++++++--------
.../test/offloading/strided_multiple_update.c | 61 ++++++
.../test/offloading/strided_partial_update.c | 63 ++++++
offload/test/offloading/strided_update.c | 54 +++++
4 files changed, 282 insertions(+), 85 deletions(-)
create mode 100644 offload/test/offloading/strided_multiple_update.c
create mode 100644 offload/test/offloading/strided_partial_update.c
create mode 100644 offload/test/offloading/strided_update.c
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8ccc37ef98a74..785eb5f6a869d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -490,11 +490,11 @@ enum OpenMPLocationFlags : unsigned {
/// member */
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
/// see above */
-///#if USE_ITT_BUILD
+/// #if USE_ITT_BUILD
/// /* but currently used for storing
/// region-specific ITT */
/// /* contextual information. */
-///#endif /* USE_ITT_BUILD */
+/// #endif /* USE_ITT_BUILD */
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
/// C++ */
/// char const *psource; /**< String describing the source location.
@@ -714,16 +714,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
if (DRD) {
// Shift the address forward by one element.
- llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
- SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
- "omp.arraycpy.dest.element");
+ llvm::Value *SrcElementNext =
+ CGF.Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
+ /*Idx0=*/1, "omp.arraycpy.dest.element");
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
}
// Shift the address forward by one element.
- llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
- DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
- "omp.arraycpy.dest.element");
+ llvm::Value *DestElementNext =
+ CGF.Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
+ /*Idx0=*/1, "omp.arraycpy.dest.element");
// Check whether we've reached the end.
llvm::Value *Done =
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
@@ -973,8 +973,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
llvm::Value *PrivatePointer =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
- llvm::Value *Ptr = CGF.Builder.CreateGEP(
- SharedAddr.getElementType(), PrivatePointer, Adjustment);
+ llvm::Value *Ptr = CGF.Builder.CreateGEP(SharedAddr.getElementType(),
+ PrivatePointer, Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
OriginalBaseLValue.getAddress(), Ptr);
@@ -1599,12 +1599,11 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
getOrCreateThreadPrivateCache(VD)};
- return Address(
- CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
- Args),
- CGF.Int8Ty, VDAddr.getAlignment());
+ return Address(CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
+ Args),
+ CGF.Int8Ty, VDAddr.getAlignment());
}
void CGOpenMPRuntime::emitThreadPrivateVarInit(
@@ -1629,8 +1628,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
}
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
- const VarDecl *VD, Address VDAddr, SourceLocation Loc,
- bool PerformInit, CodeGenFunction *CGF) {
+ const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
+ CodeGenFunction *CGF) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return nullptr;
@@ -1692,7 +1691,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
Loc, Loc);
- // Create a scope with an artificial location for the body of this function.
+ // Create a scope with an artificial location for the body of this
+ // function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
DtorCGF.GetAddrOfLocalVar(&Dst),
@@ -1933,8 +1933,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
QualType Int32Ty =
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
- CGF.EmitStoreOfScalar(ThreadID,
- CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
+ CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
return ThreadIDTemp;
}
@@ -2435,8 +2434,8 @@ bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
return Schedule == OMP_sch_static_chunked;
}
-bool CGOpenMPRuntime::isStaticChunked(
- OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+bool CGOpenMPRuntime::isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
return Schedule == OMP_dist_sch_static_chunked;
}
@@ -2600,10 +2599,10 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
"Expected loop-based or sections-based directive.");
- llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
- isOpenMPLoopDirective(DKind)
- ? OMP_IDENT_WORK_LOOP
- : OMP_IDENT_WORK_SECTIONS);
+ llvm::Value *UpdatedLocation = emitUpdateLocation(
+ CGF, Loc,
+ isOpenMPLoopDirective(DKind) ? OMP_IDENT_WORK_LOOP
+ : OMP_IDENT_WORK_SECTIONS);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
@@ -2678,9 +2677,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
SourceLocation Loc, unsigned IVSize,
- bool IVSigned, Address IL,
- Address LB, Address UB,
- Address ST) {
+ bool IVSigned, Address IL, Address LB,
+ Address UB, Address ST) {
// Call __kmpc_dispatch_next(
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
@@ -2858,8 +2856,8 @@ static bool isAllocatableDecl(const VarDecl *VD) {
!AA->getAllocator());
}
-static RecordDecl *
-createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
+static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM,
+ ArrayRef<PrivateDataTy> Privates) {
if (!Privates.empty()) {
ASTContext &C = CGM.getContext();
// Build struct .kmp_privates_t. {
@@ -3364,7 +3362,6 @@ static bool checkInitIsRequired(CodeGenFunction &CGF,
return InitRequired;
}
-
/// Emit task_dup function (for initialization of
/// private/firstprivate/lastprivate vars and last_iter flag)
/// \code
@@ -3721,10 +3718,14 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
: CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
- SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
- getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
- SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- TaskEntry, KmpRoutineEntryPtrTy)};
+ SmallVector<llvm::Value *, 8> AllocArgs = {
+ emitUpdateLocation(CGF, Loc),
+ getThreadID(CGF, Loc),
+ TaskFlags,
+ KmpTaskTWithPrivatesTySize,
+ SharedsSize,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
+ KmpRoutineEntryPtrTy)};
llvm::Value *NewTask;
if (D.hasClausesOfKind<OMPNowaitClause>()) {
// Check if we have any device clause associated with the directive.
@@ -3915,13 +3916,13 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// Copy shareds if there are any.
Address KmpTaskSharedsPtr = Address::invalid();
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
- KmpTaskSharedsPtr = Address(
- CGF.EmitLoadOfScalar(
- CGF.EmitLValueForField(
- TDBase,
- *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
- Loc),
- CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
+ KmpTaskSharedsPtr =
+ Address(CGF.EmitLoadOfScalar(
+ CGF.EmitLValueForField(
+ TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
+ KmpTaskTShareds)),
+ Loc),
+ CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
@@ -4508,7 +4509,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// list is not empty
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
- llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
+ llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
llvm::Value *DepTaskArgs[7];
if (!Data.Dependences.empty()) {
DepTaskArgs[0] = UpLoc;
@@ -4753,12 +4754,12 @@ static void EmitOMPAggregateReduction(
Scope.ForceCleanup();
// Shift the address forward by one element.
- llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
- LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
- "omp.arraycpy.dest.element");
- llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
- RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
- "omp.arraycpy.src.element");
+ llvm::Value *LHSElementNext =
+ CGF.Builder.CreateConstGEP1_32(LHSAddr.getElementType(), LHSElementPHI,
+ /*Idx0=*/1, "omp.arraycpy.dest.element");
+ llvm::Value *RHSElementNext =
+ CGF.Builder.CreateConstGEP1_32(RHSAddr.getElementType(), RHSElementPHI,
+ /*Idx0=*/1, "omp.arraycpy.src.element");
// Check whether we've reached the end.
llvm::Value *Done =
CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
@@ -5708,7 +5709,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
- const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
const FieldDecl *FlagsFD = addFieldToRecordDecl(
@@ -6218,7 +6219,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
/// Checks if the expression is constant or does not have non-trivial function
/// calls.
-static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+static bool isTrivial(ASTContext &Ctx, const Expr *E) {
// We can skip constant expressions.
// We can skip expressions with trivial calls or simple expressions.
return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
@@ -6413,10 +6414,11 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
const auto *CS = D.getInnermostCapturedStmt();
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
- /*IgnoreResultAssign*/ true);
+ llvm::Value *NumTeamsVal =
+ CGF.EmitScalarExpr(NumTeams,
+ /*IgnoreResultAssign*/ true);
return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
- /*isSigned=*/true);
+ /*isSigned=*/true);
}
case OMPD_target_teams:
case OMPD_target_teams_distribute:
@@ -6424,10 +6426,11 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: {
CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
- llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
- /*IgnoreResultAssign*/ true);
+ llvm::Value *NumTeamsVal =
+ CGF.EmitScalarExpr(NumTeams,
+ /*IgnoreResultAssign*/ true);
return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
- /*isSigned=*/true);
+ /*isSigned=*/true);
}
default:
break;
@@ -7378,7 +7381,31 @@ class MappableExprsHandler {
// dimension.
uint64_t DimSize = 1;
- bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
+ // Detects non-contiguous updates due to strided accesses.
+ // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
+ // correctly when generating information to be passed to the runtime. The
+ // flag is set to true if any array section has a stride not equal to 1, or
+ // if the stride is not a constant expression (conservatively assumed
+ // non-contiguous).
+ bool IsNonContiguous = [&]() -> bool {
+ for (const auto &Component : Components) {
+ const auto *OASE =
+ dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
+ if (OASE) {
+ const Expr *StrideExpr = OASE->getStride();
+ if (StrideExpr) {
+ if (const auto Constant =
+ StrideExpr->getIntegerConstantExpr(CGF.getContext())) {
+ if (!Constant->isOne()) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+ }();
+
bool IsPrevMemberReference = false;
bool IsPartialMapped =
@@ -7550,8 +7577,8 @@ class MappableExprsHandler {
LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
TypeSize.getQuantity() - 1);
PartialStruct.HighestElem = {
- std::numeric_limits<decltype(
- PartialStruct.HighestElem.first)>::max(),
+ std::numeric_limits<
+ decltype(PartialStruct.HighestElem.first)>::max(),
HB};
PartialStruct.Base = BP;
PartialStruct.LB = LB;
@@ -8966,7 +8993,8 @@ class MappableExprsHandler {
generateInfoForComponentList(
MapType, MapModifiers, {}, Components, CombinedInfo,
StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
- IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
+ IsImplicit,
+ /*GenerateAllInfoForClauses*/ false, Mapper,
/*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
IsFirstComponentList = false;
}
@@ -10133,7 +10161,7 @@ bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
return false;
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
- switch(A->getAllocatorType()) {
+ switch (A->getAllocatorType()) {
case OMPAllocateDeclAttr::OMPNullMemAlloc:
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
// Not supported, fallback to the default mem space.
@@ -10237,7 +10265,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
CGF.CGM.Int32Ty, /* isSigned = */ true)
: CGF.Builder.getInt32(0);
- // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
+ // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams,
+ // thread_limit)
llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
ThreadLimitVal};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
@@ -10546,7 +10575,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
}
namespace {
- /// Kind of parameter in a function with 'declare simd' directive.
+/// Kind of parameter in a function with 'declare simd' directive.
enum ParamKindTy {
Linear,
LinearRef,
@@ -10672,18 +10701,10 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
unsigned VecRegSize;
};
ISADataTy ISAData[] = {
- {
- 'b', 128
- }, // SSE
- {
- 'c', 256
- }, // AVX
- {
- 'd', 256
- }, // AVX2
- {
- 'e', 512
- }, // AVX512
+ {'b', 128}, // SSE
+ {'c', 256}, // AVX
+ {'d', 256}, // AVX2
+ {'e', 512}, // AVX512
};
llvm::SmallVector<char, 2> Masked;
switch (State) {
@@ -11675,7 +11696,8 @@ Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
RD->completeDefinition();
NewType = C.getRecordType(RD);
- Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
+ Address Addr =
+ CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
} else {
@@ -11684,8 +11706,7 @@ Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
FiredField = std::get<2>(VI->getSecond());
BaseLVal = std::get<3>(VI->getSecond());
}
- LValue FiredLVal =
- CGF.EmitLValueForField(BaseLVal, FiredField);
+ LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredField);
CGF.EmitStoreOfScalar(
llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
FiredLVal);
@@ -11872,7 +11893,7 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
"Lastprivate conditional is not found in outer region.");
QualType StructTy = std::get<0>(It->getSecond());
- const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
+ const FieldDecl *FiredDecl = std::get<2>(It->getSecond());
LValue PrivLVal = CGF.EmitLValue(FoundE);
Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivLVal.getAddress(),
@@ -12225,9 +12246,7 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
llvm_unreachable("Not supported in SIMD-only mode");
}
-bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
- return false;
-}
+bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; }
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
diff --git a/offload/test/offloading/strided_multiple_update.c b/offload/test/offloading/strided_multiple_update.c
new file mode 100644
index 0000000000000..b089746d56e84
--- /dev/null
+++ b/offload/test/offloading/strided_multiple_update.c
@@ -0,0 +1,61 @@
+// This test checks that #pragma omp target update from(data1[0:3:4],
+// data2[0:2:5]) correctly updates disjoint strided sections of multiple arrays
+// from the device to the host.
+
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int len = 12;
+ double data1[len], data2[len];
+
+// Initial values
+#pragma omp target map(tofrom : data1[0 : len], data2[0 : len])
+ {
+ for (int i = 0; i < len; i++) {
+ data1[i] = i;
+ data2[i] = i * 10;
+ }
+ }
+
+ printf("original host array values:\n");
+ printf("data1: ");
+ for (int i = 0; i < len; i++)
+ printf("%.1f ", data1[i]);
+ printf("\ndata2: ");
+ for (int i = 0; i < len; i++)
+ printf("%.1f ", data2[i]);
+ printf("\n\n");
+
+#pragma omp target data map(to : data1[0 : len], data2[0 : len])
+ {
+// Modify arrays on device
+#pragma omp target
+ {
+ for (int i = 0; i < len; i++)
+ data1[i] += i;
+ for (int i = 0; i < len; i++)
+ data2[i] += 100;
+ }
+
+// data1[0:3:4] // indices 0,4,8
+// data2[0:2:5] // indices 0,5
+#pragma omp target update from(data1[0 : 3 : 4], data2[0 : 2 : 5])
+ }
+
+ printf("device array values after update from:\n");
+ printf("data1: ");
+ for (int i = 0; i < len; i++)
+ printf("%.1f ", data1[i]);
+ printf("\ndata2: ");
+ for (int i = 0; i < len; i++)
+ printf("%.1f ", data2[i]);
+ printf("\n\n");
+
+ // CHECK: data1: 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0
+ // CHECK: data2: 0.0 10.0 20.0 30.0 40.0 50.0 60.0 70.0 80.0 90.0 100.0 110.0
+
+ // CHECK: data1: 0.0 1.0 2.0 3.0 8.0 5.0 6.0 7.0 16.0 9.0 10.0 11.0
+ // CHECK: data2: 100.0 10.0 20.0 30.0 40.0 150.0 60.0 70.0 80.0 90.0 100.0 110.0
+}
diff --git a/offload/test/offloading/strided_partial_update.c b/offload/test/offloading/strided_partial_update.c
new file mode 100644
index 0000000000000..0a28caf4cb401
--- /dev/null
+++ b/offload/test/offloading/strided_partial_update.c
@@ -0,0 +1,63 @@
+// This test checks that #pragma omp target update from(data[0:4:3]) correctly updates
+// every third element (stride 3) from the device to the host, partially across
+// the array
+
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int len = 11;
+ double data[len];
+
+#pragma omp target map(tofrom : data[0 : len])
+ {
+ for (int i = 0; i < len; i++)
+ data[i] = i;
+ }
+
+ // initial values
+ printf("original host array values:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+#pragma omp target data map(to : data[0 : len])
+ {
+// Modify arrays on device
+#pragma omp target
+ for (int i = 0; i < len; i++)
+ data[i] += i;
+
+#pragma omp target update from(data[0 : 4 : 3]) // indices 0,3,6,9
+ }
+
+ printf("device array values after update from:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+ // CHECK: 0.000000
+ // CHECK: 1.000000
+ // CHECK: 2.000000
+ // CHECK: 3.000000
+ // CHECK: 4.000000
+ // CHECK: 5.000000
+ // CHECK: 6.000000
+ // CHECK: 7.000000
+ // CHECK: 8.000000
+ // CHECK: 9.000000
+ // CHECK: 10.000000
+
+ // CHECK: 0.000000
+ // CHECK: 1.000000
+ // CHECK: 2.000000
+ // CHECK: 6.000000
+ // CHECK: 4.000000
+ // CHECK: 5.000000
+ // CHECK: 12.000000
+ // CHECK: 7.000000
+ // CHECK: 8.000000
+ // CHECK: 18.000000
+ // CHECK: 10.000000
+}
diff --git a/offload/test/offloading/strided_update.c b/offload/test/offloading/strided_update.c
new file mode 100644
index 0000000000000..6626a3286063d
--- /dev/null
+++ b/offload/test/offloading/strided_update.c
@@ -0,0 +1,54 @@
+// This test checks that "update from" clause in OpenMP is supported when the
+// elements are updated in a non-contiguous manner. This test checks that
+// #pragma omp target update from(data[0:4:2]) correctly updates only every
+// other element (stride 2) from the device to the host
+
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int len = 8;
+ double data[len];
+#pragma omp target map(tofrom : len, data[0 : len])
+ {
+ for (int i = 0; i < len; i++) {
+ data[i] = i;
+ }
+ }
+ // Initial values
+ printf("original host array values:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+#pragma omp target data map(to : len, data[0 : len])
+ {
+// Modify arrays on device
+#pragma omp target
+ for (int i = 0; i < len; i++) {
+ data[i] += i;
+ }
+
+#pragma omp target update from(data[0 : 4 : 2])
+ }
+ // CHECK: 0.000000
+ // CHECK: 1.000000
+ // CHECK: 4.000000
+ // CHECK: 3.000000
+ // CHECK: 8.000000
+ // CHECK: 5.000000
+ // CHECK: 12.000000
+ // CHECK: 7.000000
+ // CHECK-NOT: 2.000000
+ // CHECK-NOT: 6.000000
+ // CHECK-NOT: 10.000000
+ // CHECK-NOT: 14.000000
+
+ printf("from target array results:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+ return 0;
+}
More information about the cfe-commits
mailing list