r301233 - Revert r301223
Carlo Bertolli via cfe-commits
cfe-commits at lists.llvm.org
Mon Apr 24 12:50:35 PDT 2017
Author: cbertol
Date: Mon Apr 24 14:50:35 2017
New Revision: 301233
URL: http://llvm.org/viewvc/llvm-project?rev=301233&view=rev
Log:
Revert r301223
Removed:
cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp
cfe/trunk/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
cfe/trunk/test/OpenMP/distribute_parallel_for_if_codegen.cpp
cfe/trunk/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
cfe/trunk/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
cfe/trunk/test/OpenMP/distribute_parallel_for_private_codegen.cpp
cfe/trunk/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=301233&r1=301232&r2=301233&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Apr 24 14:50:35 2017
@@ -2466,14 +2466,16 @@ static int addMonoNonMonoModifier(OpenMP
return Schedule | Modifier;
}
-void CGOpenMPRuntime::emitForDispatchInit(
- CodeGenFunction &CGF, SourceLocation Loc,
- const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
- bool Ordered, const DispatchRTInput &DispatchValues) {
+void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ const OpenMPScheduleTy &ScheduleKind,
+ unsigned IVSize, bool IVSigned,
+ bool Ordered, llvm::Value *UB,
+ llvm::Value *Chunk) {
if (!CGF.HaveInsertPoint())
return;
- OpenMPSchedType Schedule = getRuntimeSchedule(
- ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
assert(Ordered ||
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
@@ -2484,14 +2486,14 @@ void CGOpenMPRuntime::emitForDispatchIni
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
// If the Chunk was not specified in the clause - use default value 1.
- llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
- : CGF.Builder.getIntN(IVSize, 1);
+ if (Chunk == nullptr)
+ Chunk = CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- DispatchValues.LB, // Lower
- DispatchValues.UB, // Upper
+ CGF.Builder.getIntN(IVSize, 0), // Lower
+ UB, // Upper
CGF.Builder.getIntN(IVSize, 1), // Stride
Chunk // Chunk
};
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=301233&r1=301232&r2=301233&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Apr 24 14:50:35 2017
@@ -672,50 +672,16 @@ public:
///
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
- /// struct with the values to be passed to the dispatch runtime function
- struct DispatchRTInput {
- /// Loop lower bound
- llvm::Value *LB = nullptr;
- /// Loop upper bound
- llvm::Value *UB = nullptr;
- /// Chunk size specified using 'schedule' clause (nullptr if chunk
- /// was not specified)
- llvm::Value *Chunk = nullptr;
- DispatchRTInput() = default;
- DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk)
- : LB(LB), UB(UB), Chunk(Chunk) {}
- };
-
- /// Call the appropriate runtime routine to initialize it before start
- /// of loop.
-
- /// This is used for non static scheduled types and when the ordered
- /// clause is present on the loop construct.
- /// Depending on the loop schedule, it is necessary to call some runtime
- /// routine before start of the OpenMP loop to get the loop upper / lower
- /// bounds \a LB and \a UB and stride \a ST.
- ///
- /// \param CGF Reference to current CodeGenFunction.
- /// \param Loc Clang source location.
- /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
- /// \param IVSize Size of the iteration variable in bits.
- /// \param IVSigned Sign of the interation variable.
- /// \param Ordered true if loop is ordered, false otherwise.
- /// \param DispatchValues struct containing llvm values for lower bound, upper
- /// bound, and chunk expression.
- /// For the default (nullptr) value, the chunk 1 will be used.
- ///
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
const OpenMPScheduleTy &ScheduleKind,
unsigned IVSize, bool IVSigned, bool Ordered,
- const DispatchRTInput &DispatchValues);
+ llvm::Value *UB,
+ llvm::Value *Chunk = nullptr);
/// \brief Call the appropriate runtime routine to initialize it before start
/// of loop.
///
- /// This is used only in case of static schedule, when the user did not
- /// specify a ordered clause on the loop construct.
- /// Depending on the loop schedule, it is necessary to call some runtime
+ /// Depending on the loop schedule, it is nesessary to call some runtime
/// routine before start of the OpenMP loop to get the loop upper / lower
/// bounds \a LB and \a UB and stride \a ST.
///
Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=301233&r1=301232&r2=301233&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Mon Apr 24 14:50:35 2017
@@ -87,8 +87,7 @@ public:
class OMPParallelScope final : public OMPLexicalScope {
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
OpenMPDirectiveKind Kind = S.getDirectiveKind();
- return !(isOpenMPTargetExecutionDirective(Kind) ||
- isOpenMPLoopBoundSharingDirective(Kind)) &&
+ return !isOpenMPTargetExecutionDirective(Kind) &&
isOpenMPParallelDirective(Kind);
}
@@ -1250,20 +1249,10 @@ static void emitPostUpdateForReductionCl
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-namespace {
-/// Codegen lambda for appending distribute lower and upper bounds to outlined
-/// parallel function. This is necessary for combined constructs such as
-/// 'distribute parallel for'
-typedef llvm::function_ref<void(CodeGenFunction &,
- const OMPExecutableDirective &,
- llvm::SmallVectorImpl<llvm::Value *> &)>
- CodeGenBoundParametersTy;
-} // anonymous namespace
-
-static void emitCommonOMPParallelDirective(
- CodeGenFunction &CGF, const OMPExecutableDirective &S,
- OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
- const CodeGenBoundParametersTy &CodeGenBoundParameters) {
+static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
@@ -1290,20 +1279,11 @@ static void emitCommonOMPParallelDirecti
OMPParallelScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
- // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
- // lower and upper bounds with the pragma 'for' chunking mechanism.
- // The following lambda takes care of appending the lower and upper bound
- // parameters when necessary
- CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
CapturedVars, IfCond);
}
-static void emitEmptyBoundParameters(CodeGenFunction &,
- const OMPExecutableDirective &,
- llvm::SmallVectorImpl<llvm::Value *> &) {}
-
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -1324,8 +1304,7 @@ void CodeGenFunction::EmitOMPParallelDir
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
- emitEmptyBoundParameters);
+ emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
emitPostUpdateForReductionClause(
*this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -1670,13 +1649,6 @@ void CodeGenFunction::EmitOMPSimdFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
- const OMPLoopDirective &S,
- CodeGenFunction::JumpDest LoopExit) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
-};
-
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
@@ -1759,12 +1731,9 @@ void CodeGenFunction::EmitOMPSimdDirecti
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
-void CodeGenFunction::EmitOMPOuterLoop(
- bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
- CodeGenFunction::OMPPrivateScope &LoopScope,
- const CodeGenFunction::OMPLoopArguments &LoopArgs,
- const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
- const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
+void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
+ const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
+ Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
auto &RT = CGM.getOpenMPRuntime();
const Expr *IVExpr = S.getIterationVariable();
@@ -1782,18 +1751,15 @@ void CodeGenFunction::EmitOMPOuterLoop(
llvm::Value *BoolCondVal = nullptr;
if (!DynamicOrOrdered) {
- // UB = min(UB, GlobalUB) or
- // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
- // 'distribute parallel for')
- EmitIgnoredExpr(LoopArgs.EUB);
+ // UB = min(UB, GlobalUB)
+ EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB
- EmitIgnoredExpr(LoopArgs.Init);
+ EmitIgnoredExpr(S.getInit());
// IV < UB
- BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
+ BoolCondVal = EvaluateExprAsBool(S.getCond());
} else {
- BoolCondVal =
- RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
- LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
+ BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
+ LB, UB, ST);
}
// If there are any cleanups between here and the loop-exit scope,
@@ -1813,7 +1779,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
// Emit "IV = LB" (in case of static schedule, we have already calculated new
// LB for loop condition and emitted it above).
if (DynamicOrOrdered)
- EmitIgnoredExpr(LoopArgs.Init);
+ EmitIgnoredExpr(S.getInit());
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
@@ -1827,27 +1793,24 @@ void CodeGenFunction::EmitOMPOuterLoop(
EmitOMPSimdInit(S, IsMonotonic);
SourceLocation Loc = S.getLocStart();
-
- // when 'distribute' is not combined with a 'for':
- // while (idx <= UB) { BODY; ++idx; }
- // when 'distribute' is combined with a 'for'
- // (e.g. 'distribute parallel for')
- // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
- EmitOMPInnerLoop(
- S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
- [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
- CodeGenLoop(CGF, S, LoopExit);
- },
- [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
- CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
- });
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
+ [&S, LoopExit](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+ },
+ [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
+ if (Ordered) {
+ CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
+ CGF, Loc, IVSize, IVSigned);
+ }
+ });
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
if (!DynamicOrOrdered) {
// Emit "LB = LB + Stride", "UB = UB + Stride".
- EmitIgnoredExpr(LoopArgs.NextLB);
- EmitIgnoredExpr(LoopArgs.NextUB);
+ EmitIgnoredExpr(S.getNextLowerBound());
+ EmitIgnoredExpr(S.getNextUpperBound());
}
EmitBranch(CondBlock);
@@ -1866,8 +1829,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
void CodeGenFunction::EmitOMPForOuterLoop(
const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- const OMPLoopArguments &LoopArgs,
- const CodeGenDispatchBoundsTy &CGDispatchBounds) {
+ Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
auto &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
@@ -1876,7 +1838,7 @@ void CodeGenFunction::EmitOMPForOuterLoo
assert((Ordered ||
!RT.isStaticNonchunked(ScheduleKind.Schedule,
- LoopArgs.Chunk != nullptr)) &&
+ /*Chunked=*/Chunk != nullptr)) &&
"static non-chunked schedule does not need outer loop");
// Emit outer loop.
@@ -1934,46 +1896,22 @@ void CodeGenFunction::EmitOMPForOuterLoo
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (DynamicOrOrdered) {
- auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
- llvm::Value *LBVal = DispatchBounds.first;
- llvm::Value *UBVal = DispatchBounds.second;
- CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
- LoopArgs.Chunk};
+ llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
- IVSigned, Ordered, DipatchRTInputValues);
+ IVSigned, Ordered, UBVal, Chunk);
} else {
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
- Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
- LoopArgs.ST, LoopArgs.Chunk);
+ Ordered, IL, LB, UB, ST, Chunk);
}
- auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
- const unsigned IVSize,
- const bool IVSigned) {
- if (Ordered) {
- CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
- IVSigned);
- }
- };
-
- OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
- LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
- OuterLoopArgs.IncExpr = S.getInc();
- OuterLoopArgs.Init = S.getInit();
- OuterLoopArgs.Cond = S.getCond();
- OuterLoopArgs.NextLB = S.getNextLowerBound();
- OuterLoopArgs.NextUB = S.getNextUpperBound();
- EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
- emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
+ EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
+ ST, IL, Chunk);
}
-static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
- const unsigned IVSize, const bool IVSigned) {}
-
void CodeGenFunction::EmitOMPDistributeOuterLoop(
- OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
- OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
- const CodeGenLoopTy &CodeGenLoopContent) {
+ OpenMPDistScheduleClauseKind ScheduleKind,
+ const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
+ Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
auto &RT = CGM.getOpenMPRuntime();
@@ -1986,159 +1924,26 @@ void CodeGenFunction::EmitOMPDistributeO
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
- RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
- IVSigned, /* Ordered = */ false, LoopArgs.IL,
- LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
- LoopArgs.Chunk);
-
- // for combined 'distribute' and 'for' the increment expression of distribute
- // is store in DistInc. For 'distribute' alone, it is in Inc.
- Expr *IncExpr;
- if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
- IncExpr = S.getDistInc();
- else
- IncExpr = S.getInc();
-
- // this routine is shared by 'omp distribute parallel for' and
- // 'omp distribute': select the right EUB expression depending on the
- // directive
- OMPLoopArguments OuterLoopArgs;
- OuterLoopArgs.LB = LoopArgs.LB;
- OuterLoopArgs.UB = LoopArgs.UB;
- OuterLoopArgs.ST = LoopArgs.ST;
- OuterLoopArgs.IL = LoopArgs.IL;
- OuterLoopArgs.Chunk = LoopArgs.Chunk;
- OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedEnsureUpperBound()
- : S.getEnsureUpperBound();
- OuterLoopArgs.IncExpr = IncExpr;
- OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedInit()
- : S.getInit();
- OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedCond()
- : S.getCond();
- OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedNextLowerBound()
- : S.getNextLowerBound();
- OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedNextUpperBound()
- : S.getNextUpperBound();
-
- EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
- LoopScope, OuterLoopArgs, CodeGenLoopContent,
- emitEmptyOrdered);
-}
-
-/// Emit a helper variable and return corresponding lvalue.
-static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
- const DeclRefExpr *Helper) {
- auto VDecl = cast<VarDecl>(Helper->getDecl());
- CGF.EmitVarDecl(*VDecl);
- return CGF.EmitLValue(Helper);
-}
+ RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
+ IVSize, IVSigned, /* Ordered = */ false,
+ IL, LB, UB, ST, Chunk);
-static std::pair<LValue, LValue>
-emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
- const OMPExecutableDirective &S) {
- const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
- LValue LB =
- EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
-
- // When composing 'distribute' with 'for' (e.g. as in 'distribute
- // parallel for') we need to use the 'distribute'
- // chunk lower and upper bounds rather than the whole loop iteration
- // space. These are parameters to the outlined function for 'parallel'
- // and we copy the bounds of the previous schedule into the
- // the current ones.
- LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
- LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
- llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
- PrevLBVal = CGF.EmitScalarConversion(
- PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
- LS.getIterationVariable()->getType(), SourceLocation());
- llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
- PrevUBVal = CGF.EmitScalarConversion(
- PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
- LS.getIterationVariable()->getType(), SourceLocation());
-
- CGF.EmitStoreOfScalar(PrevLBVal, LB);
- CGF.EmitStoreOfScalar(PrevUBVal, UB);
-
- return {LB, UB};
-}
-
-/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
-/// we need to use the LB and UB expressions generated by the worksharing
-/// code generation support, whereas in non combined situations we would
-/// just emit 0 and the LastIteration expression
-/// This function is necessary due to the difference of the LB and UB
-/// types for the RT emission routines for 'for_static_init' and
-/// 'for_dispatch_init'
-static std::pair<llvm::Value *, llvm::Value *>
-emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
- const OMPExecutableDirective &S,
- Address LB, Address UB) {
- const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
- const Expr *IVExpr = LS.getIterationVariable();
- // when implementing a dynamic schedule for a 'for' combined with a
- // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
- // is not normalized as each team only executes its own assigned
- // distribute chunk
- QualType IteratorTy = IVExpr->getType();
- llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
- SourceLocation());
- llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
- SourceLocation());
- return {LBVal, UBVal};
-};
-
-static void emitDistributeParallelForDistributeInnerBoundParams(
- CodeGenFunction &CGF, const OMPExecutableDirective &S,
- llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
- const auto &Dir = cast<OMPLoopDirective>(S);
- LValue LB =
- CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
- auto LBCast = CGF.Builder.CreateIntCast(
- CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
- CapturedVars.push_back(LBCast);
- LValue UB =
- CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
-
- auto UBCast = CGF.Builder.CreateIntCast(
- CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
- CapturedVars.push_back(UBCast);
-};
-
-static void
-emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
- const OMPLoopDirective &S,
- CodeGenFunction::JumpDest LoopExit) {
- auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
- PrePostActionTy &) {
- CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
- emitDistributeParallelForInnerBounds,
- emitDistributeParallelForDispatchBounds);
- };
-
- emitCommonOMPParallelDirective(
- CGF, S, OMPD_for, CGInlinedWorksharingLoop,
- emitDistributeParallelForDistributeInnerBoundParams);
+ EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
+ S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
}
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S) {
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
- S.getDistInc());
- };
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
- OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
- /*HasCancel=*/false);
- CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
- /*HasCancel=*/false);
+ CGM.getOpenMPRuntime().emitInlinedDirective(
+ *this, OMPD_distribute_parallel_for,
+ [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ OMPLoopScope PreInitScope(CGF, S);
+ OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for,
+ /*HasCancel=*/false);
+ CGF.EmitStmt(
+ cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ });
}
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
@@ -2276,6 +2081,14 @@ void CodeGenFunction::EmitOMPTargetTeams
});
}
+/// \brief Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
namespace {
struct ScheduleKindModifiersTy {
OpenMPScheduleClauseKind Kind;
@@ -2288,10 +2101,7 @@ namespace {
};
} // namespace
-bool CodeGenFunction::EmitOMPWorksharingLoop(
- const OMPLoopDirective &S, Expr *EUB,
- const CodeGenLoopBoundsTy &CodeGenLoopBounds,
- const CodeGenDispatchBoundsTy &CGDispatchBounds) {
+bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2341,10 +2151,10 @@ bool CodeGenFunction::EmitOMPWorksharing
emitAlignedClause(*this, S);
EmitOMPLinearClauseInit(S);
// Emit helper vars inits.
-
- std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
- LValue LB = Bounds.first;
- LValue UB = Bounds.second;
+ LValue LB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2430,11 +2240,9 @@ bool CodeGenFunction::EmitOMPWorksharing
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
- const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
- ST.getAddress(), IL.getAddress(),
- Chunk, EUB);
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
- LoopArguments, CGDispatchBounds);
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ IL.getAddress(), Chunk);
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
EmitOMPSimdFinal(S,
@@ -2472,42 +2280,12 @@ bool CodeGenFunction::EmitOMPWorksharing
return HasLastprivateClause;
}
-/// The following two functions generate expressions for the loop lower
-/// and upper bounds in case of static and dynamic (dispatch) schedule
-/// of the associated 'for' or 'distribute' loop.
-static std::pair<LValue, LValue>
-emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
- const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
- LValue LB =
- EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
- return {LB, UB};
-}
-
-/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
-/// consider the lower and upper bound expressions generated by the
-/// worksharing loop support, but we use 0 and the iteration space size as
-/// constants
-static std::pair<llvm::Value *, llvm::Value *>
-emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
- Address LB, Address UB) {
- const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
- const Expr *IVExpr = LS.getIterationVariable();
- const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
- llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
- llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
- return {LBVal, UBVal};
-}
-
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2525,9 +2303,7 @@ void CodeGenFunction::EmitOMPForSimdDire
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2778,11 +2554,9 @@ void CodeGenFunction::EmitOMPParallelFor
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ CGF.EmitOMPWorksharingLoop(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
- emitEmptyBoundParameters);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -2790,11 +2564,9 @@ void CodeGenFunction::EmitOMPParallelFor
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ CGF.EmitOMPWorksharingLoop(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
- emitEmptyBoundParameters);
+ emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -2804,8 +2576,7 @@ void CodeGenFunction::EmitOMPParallelSec
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitSections(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
- emitEmptyBoundParameters);
+ emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
}
void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
@@ -3023,9 +2794,7 @@ void CodeGenFunction::EmitOMPFlushDirect
}(), S.getLocStart());
}
-void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
- const CodeGenLoopTy &CodeGenLoop,
- Expr *IncExpr) {
+void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -3066,17 +2835,10 @@ void CodeGenFunction::EmitOMPDistributeL
// Emit 'then' code.
{
// Emit helper vars inits.
-
- LValue LB = EmitOMPHelperVar(
- *this, cast<DeclRefExpr>(
- (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedLowerBoundVariable()
- : S.getLowerBoundVariable())));
- LValue UB = EmitOMPHelperVar(
- *this, cast<DeclRefExpr>(
- (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedUpperBoundVariable()
- : S.getUpperBoundVariable())));
+ LValue LB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -3128,25 +2890,15 @@ void CodeGenFunction::EmitOMPDistributeL
auto LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedEnsureUpperBound()
- : S.getEnsureUpperBound());
+ EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
- EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedInit()
- : S.getInit());
-
- Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
- ? S.getCombinedCond()
- : S.getCond();
-
- // for distribute alone, codegen
+ EmitIgnoredExpr(S.getInit());
// while (idx <= UB) { BODY; ++idx; }
- // when combined with 'for' (e.g. as in 'distribute parallel for')
- // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
- [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
- CodeGenLoop(CGF, S, LoopExit);
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+ S.getInc(),
+ [&S, LoopExit](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
},
[](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
@@ -3155,11 +2907,9 @@ void CodeGenFunction::EmitOMPDistributeL
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
- const OMPLoopArguments LoopArguments = {
- LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
- Chunk};
- EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
- CodeGenLoop);
+ EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ IL.getAddress(), Chunk);
}
// Emit final copy of the lastprivate variables if IsLastIter != 0.
@@ -3181,8 +2931,7 @@ void CodeGenFunction::EmitOMPDistributeL
void CodeGenFunction::EmitOMPDistributeDirective(
const OMPDistributeDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-
- CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
+ CGF.EmitOMPDistributeLoop(S);
};
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
@@ -4091,8 +3840,7 @@ static void emitTargetParallelRegion(Cod
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
- emitEmptyBoundParameters);
+ emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen);
emitPostUpdateForReductionClause(
CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=301233&r1=301232&r2=301233&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Apr 24 14:50:35 2017
@@ -175,25 +175,6 @@ public:
// because of jumps.
VarBypassDetector Bypasses;
- // CodeGen lambda for loops and support for ordered clause
- typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
- JumpDest)>
- CodeGenLoopTy;
- typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation,
- const unsigned, const bool)>
- CodeGenOrderedTy;
-
- // Codegen lambda for loop bounds in worksharing loop constructs
- typedef llvm::function_ref<std::pair<LValue, LValue>(
- CodeGenFunction &, const OMPExecutableDirective &S)>
- CodeGenLoopBoundsTy;
-
- // Codegen lambda for loop bounds in dispatch-based loop implementation
- typedef llvm::function_ref<std::pair<llvm::Value *, llvm::Value *>(
- CodeGenFunction &, const OMPExecutableDirective &S, Address LB,
- Address UB)>
- CodeGenDispatchBoundsTy;
-
/// \brief CGBuilder insert helper. This function is called after an
/// instruction is created using Builder.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
@@ -2775,6 +2756,7 @@ public:
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
+ void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
void EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S);
void EmitOMPDistributeParallelForSimdDirective(
@@ -2831,78 +2813,32 @@ public:
void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S,
OMPPrivateScope &LoopScope);
- /// Helper for the OpenMP loop directives.
- void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
-
- /// \brief Emit code for the worksharing loop-based directive.
- /// \return true, if this construct has any lastprivate clause, false -
- /// otherwise.
- bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB,
- const CodeGenLoopBoundsTy &CodeGenLoopBounds,
- const CodeGenDispatchBoundsTy &CGDispatchBounds);
-
private:
/// Helpers for blocks
llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
/// Helpers for the OpenMP loop directives.
+ void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
void EmitOMPSimdFinal(
const OMPLoopDirective &D,
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
-
- void EmitOMPDistributeLoop(const OMPLoopDirective &S,
- const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr);
-
- /// struct with the values to be passed to the OpenMP loop-related functions
- struct OMPLoopArguments {
- /// loop lower bound
- Address LB = Address::invalid();
- /// loop upper bound
- Address UB = Address::invalid();
- /// loop stride
- Address ST = Address::invalid();
- /// isLastIteration argument for runtime functions
- Address IL = Address::invalid();
- /// Chunk value generated by sema
- llvm::Value *Chunk = nullptr;
- /// EnsureUpperBound
- Expr *EUB = nullptr;
- /// IncrementExpression
- Expr *IncExpr = nullptr;
- /// Loop initialization
- Expr *Init = nullptr;
- /// Loop exit condition
- Expr *Cond = nullptr;
- /// Update of LB after a whole chunk has been executed
- Expr *NextLB = nullptr;
- /// Update of UB after a whole chunk has been executed
- Expr *NextUB = nullptr;
- OMPLoopArguments() = default;
- OMPLoopArguments(Address LB, Address UB, Address ST, Address IL,
- llvm::Value *Chunk = nullptr, Expr *EUB = nullptr,
- Expr *IncExpr = nullptr, Expr *Init = nullptr,
- Expr *Cond = nullptr, Expr *NextLB = nullptr,
- Expr *NextUB = nullptr)
- : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB),
- IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB),
- NextUB(NextUB) {}
- };
- void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
- const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
- const OMPLoopArguments &LoopArgs,
- const CodeGenLoopTy &CodeGenLoop,
- const CodeGenOrderedTy &CodeGenOrdered);
+ /// \brief Emit code for the worksharing loop-based directive.
+ /// \return true, if this construct has any lastprivate clause, false -
+ /// otherwise.
+ bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
+ void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered,
+ const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
+ Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind,
bool IsMonotonic, const OMPLoopDirective &S,
- OMPPrivateScope &LoopScope, bool Ordered,
- const OMPLoopArguments &LoopArgs,
- const CodeGenDispatchBoundsTy &CGDispatchBounds);
- void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,
- const OMPLoopDirective &S,
- OMPPrivateScope &LoopScope,
- const OMPLoopArguments &LoopArgs,
- const CodeGenLoopTy &CodeGenLoopContent);
+ OMPPrivateScope &LoopScope, bool Ordered, Address LB,
+ Address UB, Address ST, Address IL,
+ llvm::Value *Chunk);
+ void EmitOMPDistributeOuterLoop(
+ OpenMPDistScheduleClauseKind ScheduleKind,
+ const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
+ Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
/// \brief Emit code for sections directive.
void EmitSections(const OMPExecutableDirective &S);
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp (removed)
@@ -1,2260 +0,0 @@
-// Test host code gen
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-
-template <typename T>
-T tmain() {
- T *a, *b, *c;
- int n = 10000;
- int ch = 100;
-
- // no schedule clauses
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- // dist_schedule: static no chunk
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for dist_schedule(static)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- // dist_schedule: static chunk
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for dist_schedule(static, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- // schedule: static no chunk
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for schedule(static)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- // schedule: static chunk
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for schedule(static, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- // schedule: dynamic no chunk
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for schedule(dynamic)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- // schedule: dynamic chunk
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for schedule(dynamic, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- }
-
- return T();
-}
-
-int main() {
- double *a, *b, *c;
- int n = 10000;
- int ch = 100;
-
-#ifdef LAMBDA
- // LAMBDA-LABEL: @main
- // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
- [&]() {
- // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
-
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
-
- // no schedule clauses
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
- // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-
- // check EUB for distribute
- // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
- // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
- // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
- // LAMBDA-DAG: [[EUB_TRUE]]:
- // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
- // LAMBDA: br label %[[EUB_END:.+]]
- // LAMBDA-DAG: [[EUB_FALSE]]:
- // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: br label %[[EUB_END]]
- // LAMBDA-DAG: [[EUB_END]]:
- // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
- // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
- // initialize omp.iv
- // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
- // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
-
- // check exit condition
- // LAMBDA: [[OMP_JUMP_BACK]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // LAMBDA: [[DIST_BODY]]:
- // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
- // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
- // check that distlb and distub are properly passed to fork_call
- // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
- // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
- // LAMBDA: br label %[[DIST_INC:.+]]
-
- // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
- // LAMBDA: [[DIST_INC]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
- // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
- // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_JUMP_BACK]]
-
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
-
- // implementation of 'parallel for'
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
- // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
- // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
- // In this case we use EUB
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
- // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
- // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // LAMBDA: [[PF_EUB_TRUE]]:
- // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
- // LAMBDA: br label %[[PF_EUB_END:.+]]
- // LAMBDA-DAG: [[PF_EUB_FALSE]]:
- // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA: br label %[[PF_EUB_END]]
- // LAMBDA-DAG: [[PF_EUB_END]]:
- // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
- // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv
- // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
-
- // check exit condition
- // LAMBDA: [[OMP_PF_JUMP_BACK]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // LAMBDA: [[PF_BODY]]:
- // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label {{.+}}
-
- // check stride 1 for 'for' in 'distribute parallel for'
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
- // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
-
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
-
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
-
- // dist_schedule: static no chunk (same sa default - no dist_schedule)
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for dist_schedule(static)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
- // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-
- // check EUB for distribute
- // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
- // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
- // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
- // LAMBDA-DAG: [[EUB_TRUE]]:
- // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
- // LAMBDA: br label %[[EUB_END:.+]]
- // LAMBDA-DAG: [[EUB_FALSE]]:
- // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: br label %[[EUB_END]]
- // LAMBDA-DAG: [[EUB_END]]:
- // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
- // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
- // initialize omp.iv
- // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
- // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
-
- // check exit condition
- // LAMBDA: [[OMP_JUMP_BACK]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // LAMBDA: [[DIST_BODY]]:
- // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
- // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
- // check that distlb and distub are properly passed to fork_call
- // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
- // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
- // LAMBDA: br label %[[DIST_INC:.+]]
-
- // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
- // LAMBDA: [[DIST_INC]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
- // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
- // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_JUMP_BACK]]
-
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
-
- // implementation of 'parallel for'
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
- // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
- // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
- // In this case we use EUB
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
- // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
- // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // LAMBDA: [[PF_EUB_TRUE]]:
- // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
- // LAMBDA: br label %[[PF_EUB_END:.+]]
- // LAMBDA-DAG: [[PF_EUB_FALSE]]:
- // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA: br label %[[PF_EUB_END]]
- // LAMBDA-DAG: [[PF_EUB_END]]:
- // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
- // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv
- // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
-
- // check exit condition
- // LAMBDA: [[OMP_PF_JUMP_BACK]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // LAMBDA: [[PF_BODY]]:
- // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label {{.+}}
-
- // check stride 1 for 'for' in 'distribute parallel for'
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
- // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
-
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
-
- // dist_schedule: static chunk
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for dist_schedule(static, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
- // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
- // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
- // check EUB for distribute
- // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
- // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
- // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
- // LAMBDA-DAG: [[EUB_TRUE]]:
- // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
- // LAMBDA: br label %[[EUB_END:.+]]
- // LAMBDA-DAG: [[EUB_FALSE]]:
- // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: br label %[[EUB_END]]
- // LAMBDA-DAG: [[EUB_END]]:
- // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
- // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
- // initialize omp.iv
- // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
- // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
-
- // check exit condition
- // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
- // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
- // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
- // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
- // check that distlb and distub are properly passed to fork_call
- // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
- // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
- // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
-
- // check DistInc
- // LAMBDA: [[DIST_INNER_LOOP_INC]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
- // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
- // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
- // LAMBDA: [[DIST_INNER_LOOP_END]]:
- // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
- // check NextLB and NextUB
- // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
- // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
- // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
- // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
- // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
- // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
- // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
- // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
- // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
-
- // outer loop exit
- // LAMBDA: [[DIST_OUTER_LOOP_END]]:
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
-
- // skip implementation of 'parallel for': using default scheduling and was tested above
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
-
- // schedule: static no chunk
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(static)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
- // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // LAMBDA: ret
-
- // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
- // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
- // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
- // In this case we use EUB
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
- // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
- // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // LAMBDA: [[PF_EUB_TRUE]]:
- // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
- // LAMBDA: br label %[[PF_EUB_END:.+]]
- // LAMBDA-DAG: [[PF_EUB_FALSE]]:
- // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA: br label %[[PF_EUB_END]]
- // LAMBDA-DAG: [[PF_EUB_END]]:
- // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
- // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv
- // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
-
- // check exit condition
- // LAMBDA: [[OMP_PF_JUMP_BACK]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // LAMBDA: [[PF_BODY]]:
- // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label {{.+}}
-
- // check stride 1 for 'for' in 'distribute parallel for'
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
- // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
-
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
-
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
-
- // schedule: static chunk
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(static, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // LAMBDA: ret
-
- // 'parallel for' implementation using outer and inner loops and PrevEUB
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
- // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
- // check PrevEUB (using PrevUB instead of NumIt as upper bound)
- // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
- // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
- // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
- // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // LAMBDA: [[PF_EUB_TRUE]]:
- // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA: br label %[[PF_EUB_END:.+]]
- // LAMBDA-DAG: [[PF_EUB_FALSE]]:
- // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
- // LAMBDA: br label %[[PF_EUB_END]]
- // LAMBDA-DAG: [[PF_EUB_END]]:
- // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
- // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
- // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
- // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
- // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv (IV = LB)
- // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-
- // outer loop: while (IV < UB) {
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
- // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
-
- // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
- // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
- // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
- // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // skip body branch
- // LAMBDA: br{{.+}}
- // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
- // IV = IV + 1 and inner loop latch
- // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
- // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
- // check NextLB and NextUB
- // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
- // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
- // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
- // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
- // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
- // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
- // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
- // LAMBDA-DAG: call void @__kmpc_for_static_fini(
- // LAMBDA: ret
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
-
- // schedule: dynamic no chunk
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(dynamic)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // LAMBDA: ret
-
- // 'parallel for' implementation using outer and inner loops and PrevEUB
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
- // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
- // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
- // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
- // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
- // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
- // initialize omp.iv (IV = LB)
- // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
- // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
- // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
- // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
- // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // skip body branch
- // LAMBDA: br{{.+}}
- // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
- // IV = IV + 1 and inner loop latch
- // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
- // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
- // check NextLB and NextUB
- // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
- // LAMBDA: ret
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
-
- // schedule: dynamic chunk
- #pragma omp target
- #pragma omp teams
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(dynamic, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
- // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // LAMBDA: ret
-
- // 'parallel for' implementation using outer and inner loops and PrevEUB
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
- // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
- // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
- // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
- // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
- // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
- // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
- // initialize omp.iv (IV = LB)
- // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
- // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
- // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
- // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
- // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // skip body branch
- // LAMBDA: br{{.+}}
- // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
- // IV = IV + 1 and inner loop latch
- // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
- // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
- // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
- // check NextLB and NextUB
- // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
- // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
- // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
- // LAMBDA: ret
- [&]() {
- a[i] = b[i] + c[i];
- }();
- }
- }();
- return 0;
-#else
- // CHECK-LABEL: @main
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
-
- // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
- // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
-
- // CHECK: call{{.+}} [[TMAIN:@.+]]()
-
- // no schedule clauses
- #pragma omp target
- #pragma omp teams
- // CHECK: define internal void [[OFFLOADING_FUN_1]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
- // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-
- // check EUB for distribute
- // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
- // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
- // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
- // CHECK-DAG: [[EUB_TRUE]]:
- // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
- // CHECK: br label %[[EUB_END:.+]]
- // CHECK-DAG: [[EUB_FALSE]]:
- // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: br label %[[EUB_END]]
- // CHECK-DAG: [[EUB_END]]:
- // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
- // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
- // initialize omp.iv
- // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
- // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_JUMP_BACK:.+]]
-
- // check exit condition
- // CHECK: [[OMP_JUMP_BACK]]:
- // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // CHECK: [[DIST_BODY]]:
- // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
- // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
- // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
- // check that distlb and distub are properly passed to fork_call
- // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
- // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
- // CHECK: br label %[[DIST_INC:.+]]
-
- // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
- // CHECK: [[DIST_INC]]:
- // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
- // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
- // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
- // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_JUMP_BACK]]
-
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
-
- // implementation of 'parallel for'
- // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
- // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
- // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
- // In this case we use EUB
- // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
- // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
- // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // CHECK: [[PF_EUB_TRUE]]:
- // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
- // CHECK: br label %[[PF_EUB_END:.+]]
- // CHECK-DAG: [[PF_EUB_FALSE]]:
- // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK: br label %[[PF_EUB_END]]
- // CHECK-DAG: [[PF_EUB_END]]:
- // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
- // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv
- // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
-
- // check exit condition
- // CHECK: [[OMP_PF_JUMP_BACK]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // CHECK: [[PF_BODY]]:
- // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label {{.+}}
-
- // check stride 1 for 'for' in 'distribute parallel for'
- // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
- // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_JUMP_BACK]]
-
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
- }
-
- // dist_schedule: static no chunk
- #pragma omp target
- #pragma omp teams
- // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for dist_schedule(static)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
- // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-
- // check EUB for distribute
- // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
- // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
- // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
- // CHECK-DAG: [[EUB_TRUE]]:
- // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
- // CHECK: br label %[[EUB_END:.+]]
- // CHECK-DAG: [[EUB_FALSE]]:
- // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: br label %[[EUB_END]]
- // CHECK-DAG: [[EUB_END]]:
- // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
- // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
- // initialize omp.iv
- // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
- // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_JUMP_BACK:.+]]
-
- // check exit condition
- // CHECK: [[OMP_JUMP_BACK]]:
- // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // CHECK: [[DIST_BODY]]:
- // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
- // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
- // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
- // check that distlb and distub are properly passed to fork_call
- // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
- // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
- // CHECK: br label %[[DIST_INC:.+]]
-
- // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
- // CHECK: [[DIST_INC]]:
- // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
- // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
- // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
- // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_JUMP_BACK]]
-
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
-
- // implementation of 'parallel for'
- // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
- // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
- // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
- // In this case we use EUB
- // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
- // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
- // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // CHECK: [[PF_EUB_TRUE]]:
- // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
- // CHECK: br label %[[PF_EUB_END:.+]]
- // CHECK-DAG: [[PF_EUB_FALSE]]:
- // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK: br label %[[PF_EUB_END]]
- // CHECK-DAG: [[PF_EUB_END]]:
- // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
- // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv
- // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
-
- // check exit condition
- // CHECK: [[OMP_PF_JUMP_BACK]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // CHECK: [[PF_BODY]]:
- // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label {{.+}}
-
- // check stride 1 for 'for' in 'distribute parallel for'
- // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
- // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_JUMP_BACK]]
-
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
- }
-
- // dist_schedule: static chunk
- #pragma omp target
- #pragma omp teams
- // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for dist_schedule(static, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
- // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
- // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
- // check EUB for distribute
- // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
- // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
- // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
- // CHECK-DAG: [[EUB_TRUE]]:
- // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
- // CHECK: br label %[[EUB_END:.+]]
- // CHECK-DAG: [[EUB_FALSE]]:
- // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: br label %[[EUB_END]]
- // CHECK-DAG: [[EUB_END]]:
- // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
- // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
- // initialize omp.iv
- // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
- // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
-
- // check exit condition
- // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_BODY]]:
- // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
- // CHECK: [[DIST_INNER_LOOP_HEADER]]:
- // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
- // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // CHECK: [[DIST_INNER_LOOP_BODY]]:
- // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
- // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
- // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
- // check that distlb and distub are properly passed to fork_call
- // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
- // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
- // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
-
- // check DistInc
- // CHECK: [[DIST_INNER_LOOP_INC]]:
- // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
- // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
- // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
- // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
- // CHECK: [[DIST_INNER_LOOP_END]]:
- // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_INC]]:
- // check NextLB and NextUB
- // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
- // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
- // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
- // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
- // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
- // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
- // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
- // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
- // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
-
- // outer loop exit
- // CHECK: [[DIST_OUTER_LOOP_END]]:
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
-
- // skip implementation of 'parallel for': using default scheduling and was tested above
- }
-
- // schedule: static no chunk
- #pragma omp target
- #pragma omp teams
- // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(static)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
- // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // CHECK: ret
-
- // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
- // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
- // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
- // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
- // In this case we use EUB
- // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
- // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
- // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // CHECK: [[PF_EUB_TRUE]]:
- // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
- // CHECK: br label %[[PF_EUB_END:.+]]
- // CHECK-DAG: [[PF_EUB_FALSE]]:
- // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK: br label %[[PF_EUB_END]]
- // CHECK-DAG: [[PF_EUB_END]]:
- // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
- // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv
- // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
-
- // check exit condition
- // CHECK: [[OMP_PF_JUMP_BACK]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
- // check that PrevLB and PrevUB are passed to the 'for'
- // CHECK: [[PF_BODY]]:
- // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label {{.+}}
-
- // check stride 1 for 'for' in 'distribute parallel for'
- // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
- // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_JUMP_BACK]]
-
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
- }
-
- // schedule: static chunk
- #pragma omp target
- #pragma omp teams
- // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(static, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // CHECK: ret
-
- // 'parallel for' implementation using outer and inner loops and PrevEUB
- // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
- // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
- // check PrevEUB (using PrevUB instead of NumIt as upper bound)
- // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
- // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
- // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
- // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
- // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
- // CHECK: [[PF_EUB_TRUE]]:
- // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK: br label %[[PF_EUB_END:.+]]
- // CHECK-DAG: [[PF_EUB_FALSE]]:
- // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
- // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
- // CHECK: br label %[[PF_EUB_END]]
- // CHECK-DAG: [[PF_EUB_END]]:
- // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
- // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
- // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
- // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
- // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
-
- // initialize omp.iv (IV = LB)
- // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-
- // outer loop: while (IV < UB) {
- // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
- // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
- // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
-
- // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
- // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
- // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
- // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // skip body branch
- // CHECK: br{{.+}}
- // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
- // IV = IV + 1 and inner loop latch
- // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
- // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
- // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
- // check NextLB and NextUB
- // CHECK: [[OMP_PF_INNER_LOOP_END]]:
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
- // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
- // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
- // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
- // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
- // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
- // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
- // CHECK-DAG: call void @__kmpc_for_static_fini(
- // CHECK: ret
- }
-
- // schedule: dynamic no chunk
- #pragma omp target
- #pragma omp teams
- // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(dynamic)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // CHECK: ret
-
- // 'parallel for' implementation using outer and inner loops and PrevEUB
- // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
- // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
- // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
- // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
- // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
- // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
- // initialize omp.iv (IV = LB)
- // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
- // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
-
- // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
- // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
- // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
- // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // skip body branch
- // CHECK: br{{.+}}
- // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
- // IV = IV + 1 and inner loop latch
- // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
- // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
- // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
- // check NextLB and NextUB
- // CHECK: [[OMP_PF_INNER_LOOP_END]]:
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
- // CHECK: ret
- }
-
- // schedule: dynamic chunk
- #pragma omp target
- #pragma omp teams
- // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
-
- #pragma omp distribute parallel for schedule(dynamic, ch)
- for (int i = 0; i < n; ++i) {
- a[i] = b[i] + c[i];
- // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
- // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
- // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
- // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
- // CHECK: ret
-
- // 'parallel for' implementation using outer and inner loops and PrevEUB
- // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
- // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
- // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
- // initialize lb and ub to PrevLB and PrevUB
- // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
- // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
- // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
- // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
- // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
- // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
- // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
- // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
- // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
- // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
- // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
- // initialize omp.iv (IV = LB)
- // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
- // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
- // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
- // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
-
- // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
- // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
- // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
- // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
- // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
- // skip body branch
- // CHECK: br{{.+}}
- // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
- // IV = IV + 1 and inner loop latch
- // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
- // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
- // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
- // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
- // check NextLB and NextUB
- // CHECK: [[OMP_PF_INNER_LOOP_END]]:
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
- // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
- // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
- // CHECK: ret
- }
-
- return tmain<int>();
-#endif
-}
-
-// check code
-// CHECK: define{{.+}} [[TMAIN]]()
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_3:@.+]](
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_4:@.+]](
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_5:@.+]](
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_6:@.+]](
-
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_7:@.+]](
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-
-// check EUB for distribute
-// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
-// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
-// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
-// CHECK-DAG: [[EUB_TRUE]]:
-// CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
-// CHECK: br label %[[EUB_END:.+]]
-// CHECK-DAG: [[EUB_FALSE]]:
-// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: br label %[[EUB_END]]
-// CHECK-DAG: [[EUB_END]]:
-// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
-// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
-// initialize omp.iv
-// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
-// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_JUMP_BACK:.+]]
-
-// check exit condition
-// CHECK: [[OMP_JUMP_BACK]]:
-// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
-
-// check that PrevLB and PrevUB are passed to the 'for'
-// CHECK: [[DIST_BODY]]:
-// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
-// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
-// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
-// check that distlb and distub are properly passed to fork_call
-// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
-// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
-// CHECK: br label %[[DIST_INC:.+]]
-
-// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
-// CHECK: [[DIST_INC]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
-// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
-// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
-// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_JUMP_BACK]]
-
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// implementation of 'parallel for'
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
-// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
-// initialize lb and ub to PrevLB and PrevUB
-// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
-// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
-// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
-// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
-// In this case we use EUB
-// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
-// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
-// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
-// CHECK: [[PF_EUB_TRUE]]:
-// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
-// CHECK: br label %[[PF_EUB_END:.+]]
-// CHECK-DAG: [[PF_EUB_FALSE]]:
-// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK: br label %[[PF_EUB_END]]
-// CHECK-DAG: [[PF_EUB_END]]:
-// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
-// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
-// initialize omp.iv
-// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
-
-// check exit condition
-// CHECK: [[OMP_PF_JUMP_BACK]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
-// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
-// check that PrevLB and PrevUB are passed to the 'for'
-// CHECK: [[PF_BODY]]:
-// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label {{.+}}
-
-// check stride 1 for 'for' in 'distribute parallel for'
-// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
-// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_JUMP_BACK]]
-
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-
-// check EUB for distribute
-// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
-// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
-// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
-// CHECK-DAG: [[EUB_TRUE]]:
-// CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
-// CHECK: br label %[[EUB_END:.+]]
-// CHECK-DAG: [[EUB_FALSE]]:
-// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: br label %[[EUB_END]]
-// CHECK-DAG: [[EUB_END]]:
-// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
-// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
-// initialize omp.iv
-// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
-// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_JUMP_BACK:.+]]
-
-// check exit condition
-// CHECK: [[OMP_JUMP_BACK]]:
-// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
-
-// check that PrevLB and PrevUB are passed to the 'for'
-// CHECK: [[DIST_BODY]]:
-// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
-// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
-// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
-// check that distlb and distub are properly passed to fork_call
-// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
-// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
-// CHECK: br label %[[DIST_INC:.+]]
-
-// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
-// CHECK: [[DIST_INC]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
-// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
-// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
-// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_JUMP_BACK]]
-
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// implementation of 'parallel for'
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
-// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
-// initialize lb and ub to PrevLB and PrevUB
-// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
-// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
-// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
-// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
-// In this case we use EUB
-// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
-// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
-// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
-// CHECK: [[PF_EUB_TRUE]]:
-// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
-// CHECK: br label %[[PF_EUB_END:.+]]
-// CHECK-DAG: [[PF_EUB_FALSE]]:
-// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK: br label %[[PF_EUB_END]]
-// CHECK-DAG: [[PF_EUB_END]]:
-// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
-// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
-// initialize omp.iv
-// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
-
-// check exit condition
-// CHECK: [[OMP_PF_JUMP_BACK]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
-// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
-// check that PrevLB and PrevUB are passed to the 'for'
-// CHECK: [[PF_BODY]]:
-// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label {{.+}}
-
-// check stride 1 for 'for' in 'distribute parallel for'
-// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
-// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_JUMP_BACK]]
-
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
-// unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
-// check EUB for distribute
-// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
-// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
-// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
-// CHECK-DAG: [[EUB_TRUE]]:
-// CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
-// CHECK: br label %[[EUB_END:.+]]
-// CHECK-DAG: [[EUB_FALSE]]:
-// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: br label %[[EUB_END]]
-// CHECK-DAG: [[EUB_END]]:
-// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
-// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
-
-// initialize omp.iv
-// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
-// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
-
-// check exit condition
-// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
-
-// check that PrevLB and PrevUB are passed to the 'for'
-// CHECK: [[DIST_INNER_LOOP_BODY]]:
-// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
-// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
-// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
-// check that distlb and distub are properly passed to fork_call
-// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
-// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
-// CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
-
-// check DistInc
-// CHECK: [[DIST_INNER_LOOP_INC]]:
-// CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
-// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
-// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
-// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
-// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
-// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
-// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
-// CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
-// CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
-// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
-// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
-// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
-
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// skip implementation of 'parallel for': using default scheduling and was tested above
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
-
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
-// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
-// CHECK: ret
-
-// 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-
-// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-
-// initialize lb and ub to PrevLB and PrevUB
-// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
-// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
-// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-
-// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
-// In this case we use EUB
-// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
-// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
-// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
-// CHECK: [[PF_EUB_TRUE]]:
-// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
-// CHECK: br label %[[PF_EUB_END:.+]]
-// CHECK-DAG: [[PF_EUB_FALSE]]:
-// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK: br label %[[PF_EUB_END]]
-// CHECK-DAG: [[PF_EUB_END]]:
-// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
-// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
-
-// initialize omp.iv
-// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
-
-// check exit condition
-// CHECK: [[OMP_PF_JUMP_BACK]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
-// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
-
-// check that PrevLB and PrevUB are passed to the 'for'
-// CHECK: [[PF_BODY]]:
-// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label {{.+}}
-
-// check stride 1 for 'for' in 'distribute parallel for'
-// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
-// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_JUMP_BACK]]
-
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
-// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
-// CHECK: ret
-
-// 'parallel for' implementation using outer and inner loops and PrevEUB
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
-// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
-// initialize lb and ub to PrevLB and PrevUB
-// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
-// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
-// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
-// check PrevEUB (using PrevUB instead of NumIt as upper bound)
-// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
-// CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
-// CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
-// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
-// CHECK: [[PF_EUB_TRUE]]:
-// CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK: br label %[[PF_EUB_END:.+]]
-// CHECK-DAG: [[PF_EUB_FALSE]]:
-// CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
-// CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
-// CHECK: br label %[[PF_EUB_END]]
-// CHECK-DAG: [[PF_EUB_END]]:
-// CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
-// CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
-// CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
-// CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
-// CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
-
-// initialize omp.iv (IV = LB)
-// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-
-// outer loop: while (IV < UB) {
-// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
-// CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
-
-// CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
-// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
-// CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
-// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// skip body branch
-// CHECK: br{{.+}}
-// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
-// IV = IV + 1 and inner loop latch
-// CHECK: [[OMP_PF_INNER_LOOP_INC]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
-// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
-// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
-// check NextLB and NextUB
-// CHECK: [[OMP_PF_INNER_LOOP_END]]:
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
-// CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
-// CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
-// CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
-// CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
-// CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
-// CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_END]]:
-// CHECK-DAG: call void @__kmpc_for_static_fini(
-// CHECK: ret
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
-// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
-// CHECK: ret
-
-// 'parallel for' implementation using outer and inner loops and PrevEUB
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
-// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
-// initialize lb and ub to PrevLB and PrevUB
-// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
-// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
-// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
-// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
-// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
-// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
-// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
-// initialize omp.iv (IV = LB)
-// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
-// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
-// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
-// CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
-// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// skip body branch
-// CHECK: br{{.+}}
-// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
-// IV = IV + 1 and inner loop latch
-// CHECK: [[OMP_PF_INNER_LOOP_INC]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
-// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
-// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
-// check NextLB and NextUB
-// CHECK: [[OMP_PF_INNER_LOOP_END]]:
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_END]]:
-// CHECK: ret
-
-// CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
-// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
-// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
-// CHECK: ret
-
-// 'parallel for' implementation using outer and inner loops and PrevEUB
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
-// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
-// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
-
-// initialize lb and ub to PrevLB and PrevUB
-// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
-// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
-// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
-// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
-// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
-// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
-// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
-// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
-// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
-// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
-// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
-
-// initialize omp.iv (IV = LB)
-// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
-// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
-// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
-// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
-// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
-// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
-
-// CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
-// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
-// skip body branch
-// CHECK: br{{.+}}
-// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
-
-// IV = IV + 1 and inner loop latch
-// CHECK: [[OMP_PF_INNER_LOOP_INC]]:
-// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
-// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
-// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
-
-// check NextLB and NextUB
-// CHECK: [[OMP_PF_INNER_LOOP_END]]:
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
-// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
-
-// CHECK: [[OMP_PF_OUTER_LOOP_END]]:
-// CHECK: ret
-#endif
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp (removed)
@@ -1,619 +0,0 @@
-// RxUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-template <class T>
-struct S {
- T f;
- S(T a) : f(a) {}
- S() : f() {}
- operator T() { return T(); }
- ~S() {}
-};
-
-// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-template <typename T>
-T tmain() {
- S<T> test;
- T t_var = T();
- T vec[] = {1, 2};
- S<T> s_arr[] = {1, 2};
- S<T> &var = test;
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for firstprivate(t_var, vec, s_arr, s_arr, var, var)
- for (int i = 0; i < 2; ++i) {
- vec[i] = t_var;
- s_arr[i] = var;
- }
- return T();
-}
-
-int main() {
- static int svar;
- volatile double g;
- volatile double &g1 = g;
-
- #ifdef LAMBDA
- // LAMBDA-LABEL: @main
- // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
- [&]() {
- static float sfvar;
- // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
-
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for firstprivate(g, g1, svar, sfvar)
- for (int i = 0; i < 2; ++i) {
- // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
- // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
-
- // addr alloca's
- // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[G1_REF:%.+]] = alloca double*,
- // LAMBDA: [[TMP:%.+]] = alloca double*,
-
- // private alloca's
- // LAMBDA: [[G_PRIV:%.+]] = alloca double,
- // LAMBDA: [[G1_PRIV:%.+]] = alloca double,
- // LAMBDA: [[TMP_PRIV:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_PRIV:%.+]] = alloca float,
-
- // transfer input parameters into addr alloca's
- // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
-
- // init private alloca's with addr alloca's
- // g
- // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to
- // LAMBDA-32-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
- // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]],
- // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
-
- // g1
- // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to
- // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}** [[G1_REF]],
- // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
- // LAMBDA-DAG: store {{.+}}* [[G1_REF_VAL]], {{.+}}** [[TMP]],
- // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP]],
- // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]],
- // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]]
- // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]],
-
- // svar
- // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
- // LAMBDA-64-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
- // LAMBDA-32-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]],
-
- // sfvar
- // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to
- // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CONV]],
- // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]],
-
- // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
- // pass firstprivate parameters to parallel outlined function
- // g
- // LAMBDA-64-DAG: [[G_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_PRIV]],
- // LAMBDA-64: [[G_CAST_CONV:%.+]] = bitcast {{.+}}* [[G_CAST:%.+]] to
- // LAMBDA-64-DAG: store {{.+}} [[G_PRIV_VAL]], {{.+}}* [[G_CAST_CONV]],
- // LAMBDA-64-DAG: [[G_PAR:%.+]] = load {{.+}}, {{.+}}* [[G_CAST]],
-
- // g1
- // LAMBDA-DAG: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV]],
- // LAMBDA-DAG: [[G1_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV_VAL]],
- // LAMBDA: [[G1_CAST_CONV:%.+]] = bitcast {{.+}}* [[G1_CAST:%.+]] to
- // LAMBDA-DAG: store {{.+}} [[G1_PRIV_VAL]], {{.+}}* [[G1_CAST_CONV]],
- // LAMBDA-DAG: [[G1_PAR:%.+]] = load {{.+}}, {{.+}}* [[G1_CAST]],
-
- // svar
- // LAMBDA: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]],
- // LAMBDA-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to
- // LAMBDA-64-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST_CONV]],
- // LAMBDA-32-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST:%.+]],
- // LAMBDA-DAG: [[SVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]],
-
- // sfvar
- // LAMBDA: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_PRIV]],
- // LAMBDA-DAG: [[SFVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_CAST:%.+]] to
- // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_CAST_CONV]],
- // LAMBDA-DAG: [[SFVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CAST]],
-
- // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PAR]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]])
- // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIV]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]])
- // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
- // LAMBDA: ret void
-
-
- // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
- // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]])
- // skip initial params
- // LAMBDA: {{.+}} = alloca{{.+}},
- // LAMBDA: {{.+}} = alloca{{.+}},
- // LAMBDA: {{.+}} = alloca{{.+}},
- // LAMBDA: {{.+}} = alloca{{.+}},
-
- // addr alloca's
- // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[G1_REF:%.+]] = alloca double*,
-
- // private alloca's (only for 32-bit)
- // LAMBDA-32: [[G_PRIV:%.+]] = alloca double,
-
- // transfer input parameters into addr alloca's
- // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
- // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
-
- // prepare parameters for lambda
- // g
- // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to
- // LAMBDA-32-DAG: [[G_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]]
- // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_ADDR_REF]],
- // LAMBDA-32-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
-
- // g1
- // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to
- // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}* [[G1_REF]],
-
- // svar
- // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
-
- // sfvar
- // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to
-
- // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
- g = 1;
- g1 = 1;
- svar = 3;
- sfvar = 4.0;
- // LAMBDA-64: store double 1.0{{.+}}, double* [[G_CONV]],
- // LAMBDA-32: store double 1.0{{.+}}, double* [[G_PRIV]],
- // LAMBDA: [[G1_REF_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
- // LAMBDA: store {{.+}} 1.0{{.+}}, {{.+}}* [[G1_REF_REF]],
- // LAMBDA-64: store {{.+}} 3, {{.+}}* [[SVAR_CONV]],
- // LAMBDA-32: store {{.+}} 3, {{.+}}* [[SVAR_ADDR]],
- // LAMBDA: store {{.+}} 4.0{{.+}}, {{.+}}* [[SFVAR_CONV]],
-
- // pass params to inner lambda
- // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
- // LAMBDA-64: store double* [[G_CONV]], double** [[G_PRIVATE_ADDR_REF]],
- // LAMBDA-32: store double* [[G_PRIV]], double** [[G_PRIVATE_ADDR_REF]],
- // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
- // LAMBDA: [[G1_REF_REF:%.+]] = load double*, double** [[G1_REF]],
- // LAMBDA: store double* [[G1_REF_REF]], double** [[G1_PRIVATE_ADDR_REF]],
- // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
- // LAMBDA-64: store i{{[0-9]+}}* [[SVAR_CONV]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
- // LAMBDA-32: store i{{[0-9]+}}* [[SVAR_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
- // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
- // LAMBDA: store float* [[SFVAR_CONV]], float** [[SFVAR_PRIVATE_ADDR_REF]]
- // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
- // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
- // LAMBDA: ret void
- [&]() {
- // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
- // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
- g = 2;
- g1 = 2;
- svar = 4;
- sfvar = 8.0;
- // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
- // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
- // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
- // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
-
- // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
- // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
- // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
- // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
- // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
- // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
- // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
- // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
- // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
- }();
- }
- }();
- return 0;
- #else
- S<float> test;
- int t_var = 0;
- int vec[] = {1, 2};
- S<float> s_arr[] = {1, 2};
- S<float> &var = test;
-
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for firstprivate(t_var, vec, s_arr, s_arr, var, var, svar)
- for (int i = 0; i < 2; ++i) {
- vec[i] = t_var;
- s_arr[i] = var;
- }
- return tmain<int>();
- #endif
-}
-
-// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
-// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOAD_FUN_0:@.+]](
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-
-// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED_0:@.+]] to void
-// CHECK: ret
-
-// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
-
-// addr alloca's
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
-
-// skip loop alloca's
-// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
-
-// private alloca's
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-
-// init addr alloca's with input values
-// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
-// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
-
-// init private alloca's with addr alloca's
-// t-var
-// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
-// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]],
-// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
-
-// vec
-// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
-// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
-// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
-
-// s_arr
-// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
-// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
-// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
-// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
-// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
-// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
-// CHECK-DAG: [[CPY_BODY]]:
-// CHECK-DAG: call void @llvm.memcpy{{.+}}(
-// CHECK-DAG: [[CPY_DONE]]:
-
-// var
-// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
-// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
-// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]],
-// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
-
-// svar
-// CHECK-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to
-// CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]],
-// CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// pass private alloca's to fork
-// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]],
-// not dag to distinguish with S_VAR_CAST
-// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to
-// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]],
-// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]],
-// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]],
-// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK-DAG: [[SVAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]],
-// CHECK-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to
-// CHECK-64-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST_CONV]],
-// CHECK-32-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST:%.+]],
-// CHECK-DAG: [[SVAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]],
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], [[S_FLOAT_TY]]* [[TMP_PRIV_VAL]], i{{[0-9]+}} [[SVAR_CAST_VAL]])
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for.
-// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel'
-// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for'
-// in combination
-// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]])
-
-// addr alloca's
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-
-// skip loop alloca's
-// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
-
-// private alloca's
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
-
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-
-// init addr alloca's with input values
-// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
-// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
-
-// init private alloca's with addr alloca's
-// vec
-// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
-// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
-// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
-
-// s_arr
-// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
-// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
-// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
-// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
-// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
-// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
-// CHECK-DAG: [[CPY_BODY]]:
-// CHECK-DAG: call void @llvm.memcpy{{.+}}(
-// CHECK-DAG: [[CPY_DONE]]:
-
-// var
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
-// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
-// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]],
-// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-// template tmain with S_INT_TY
-// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
-// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOAD_FUN_0:@.+]](
-// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
-
-// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void
-// CHECK: ret
-
-// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
-
-// addr alloca's
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
-// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
-
-// skip loop alloca's
-// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
-
-// private alloca's
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
-
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-
-// init addr alloca's with input values
-// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
-// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
-
-// init private alloca's with addr alloca's
-// t-var
-// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
-// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]],
-// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]],
-
-// vec
-// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
-// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
-// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
-
-// s_arr
-// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
-// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
-// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
-// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
-// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
-// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
-// CHECK-DAG: [[CPY_BODY]]:
-// CHECK-DAG: call void @llvm.memcpy{{.+}}(
-// CHECK-DAG: [[CPY_DONE]]:
-
-// var
-// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
-// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
-// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]],
-// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// pass private alloca's to fork
-// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]],
-// not dag to distinguish with S_VAR_CAST
-// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to
-// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]],
-// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]],
-// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]],
-// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], [[S_INT_TY]]* [[TMP_PRIV_VAL]])
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for.
-// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel'
-// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for'
-// in combination
-// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]])
-
-// addr alloca's
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
-
-// skip loop alloca's
-// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}},
-
-// private alloca's
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
-
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-
-// init addr alloca's with input values
-// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
-// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]],
-// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
-
-// init private alloca's with addr alloca's
-// vec
-// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]],
-// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to
-// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to
-// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]],
-
-// s_arr
-// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]],
-// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]],
-// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to
-// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]],
-// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]]
-// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]]
-// CHECK-DAG: [[CPY_BODY]]:
-// CHECK-DAG: call void @llvm.memcpy{{.+}}(
-// CHECK-DAG: [[CPY_DONE]]:
-
-// var
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
-// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
-// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]],
-// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-#endif
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_if_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_if_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_if_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_if_codegen.cpp (removed)
@@ -1,192 +0,0 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK %s
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-void fn1();
-void fn2();
-void fn3();
-void fn4();
-void fn5();
-void fn6();
-
-int Arg;
-
-// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
-void gtid_test() {
-#pragma omp target
-#pragma omp teams
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-#pragma omp distribute parallel for
- for(int i = 0 ; i < 100; i++) {}
- // CHECK: define internal void [[OFFLOADING_FUN_0]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
- // CHECK: call void @__kmpc_for_static_fini(
-
- // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void @__kmpc_for_static_fini(
- // CHECK: ret
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (parallel: false)
- for(int i = 0 ; i < 100; i++) {
- // CHECK: define internal void [[OFFLOADING_FUN_1]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void @__kmpc_serialized_parallel(
- // CHECK: call void [[OMP_OUTLINED_1:@.+]](
- // CHECK: call void @__kmpc_end_serialized_parallel(
- // CHECK: call void @__kmpc_for_static_fini(
- // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void @{{.+}}gtid_test
- // CHECK: call void @__kmpc_for_static_fini(
- // CHECK: ret
- gtid_test();
- }
-}
-
-
-template <typename T>
-int tmain(T Arg) {
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (true)
- for(int i = 0 ; i < 100; i++) {
- fn1();
- }
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (false)
- for(int i = 0 ; i < 100; i++) {
- fn2();
- }
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (parallel: Arg)
- for(int i = 0 ; i < 100; i++) {
- fn3();
- }
- return 0;
-}
-
-// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
-int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
-// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (true)
- for(int i = 0 ; i < 100; i++) {
- // CHECK: define internal void [[OFFLOADING_FUN_0]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
-
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
- // CHECK: call void @__kmpc_for_static_fini(
- // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call {{.*}}void @{{.+}}fn4
- // CHECK: call void @__kmpc_for_static_fini(
-
- fn4();
- }
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (false)
- for(int i = 0 ; i < 100; i++) {
- // CHECK: define internal void [[OFFLOADING_FUN_1]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
-
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void @__kmpc_serialized_parallel(
- // CHECK: call void [[OMP_OUTLINED_3:@.+]](
- // CHECK: call void @__kmpc_end_serialized_parallel(
- // CHECK: call void @__kmpc_for_static_fini(
-
- // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call {{.*}}void @{{.+}}fn5
- // CHECK: call void @__kmpc_for_static_fini(
- fn5();
- }
-
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for if (Arg)
- for(int i = 0 ; i < 100; i++) {
- // CHECK: define internal void [[OFFLOADING_FUN_2]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
-
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
- // CHECK: call void @__kmpc_serialized_parallel(
- // CHECK: call void [[OMP_OUTLINED_4:@.+]](
- // CHECK: call void @__kmpc_end_serialized_parallel(
- // CHECK: call void @__kmpc_for_static_fini(
-
- // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
- // CHECK: call void @__kmpc_for_static_init_4(
- // CHECK: call {{.*}}void @{{.+}}fn6
- // CHECK: call void @__kmpc_for_static_fini(
- fn6();
- }
-
- return tmain(Arg);
-}
-
-// CHECK-LABEL: define {{.+}} @{{.+}}tmain
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
-// CHECK: call void @__kmpc_for_static_fini(
-
-// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call {{.*}}void @{{.+}}fn1
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
-// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
-// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
-// CHECK: call void @__kmpc_for_static_fini(
-
-// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call {{.*}}void @{{.+}}fn2
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
-// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
-// call void [[T_OUTLINE_FUN_3:@.+]](
-// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
-
-// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call {{.*}}void @{{.+}}fn3
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-#endif
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp (removed)
@@ -1,653 +0,0 @@
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-template <class T>
-struct S {
- T f;
- S(T a) : f(a) {}
- S() : f() {}
- operator T() { return T(); }
- ~S() {}
-};
-
-// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-template <typename T>
-T tmain() {
- S<T> test;
- T t_var = T();
- T vec[] = {1, 2};
- S<T> s_arr[] = {1, 2};
- S<T> &var = test;
- #pragma omp target
- #pragma omp teams
-#pragma omp distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var)
- for (int i = 0; i < 2; ++i) {
- vec[i] = t_var;
- s_arr[i] = var;
- }
- return T();
-}
-
-int main() {
- static int svar;
- volatile double g;
- volatile double &g1 = g;
-
- #ifdef LAMBDA
- // LAMBDA-LABEL: @main
- // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
- [&]() {
- static float sfvar;
- // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
-
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
- #pragma omp target
- #pragma omp teams
-#pragma omp distribute parallel for lastprivate(g, g1, svar, sfvar)
- for (int i = 0; i < 2; ++i) {
- // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
- // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
- // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
- // LAMBDA: [[TMP_G1:%.+]] = alloca double*,
- // loop variables
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
- // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
- // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
-
- // init addr alloca's
- // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
- // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
- // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
- // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
-
- // init private variables
- // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
- // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
- // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
- // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
- // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
- // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
- // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
-
- // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
- // LAMBDA: [[G1_PAR:%.+]] = load{{.+}}, {{.+}} [[TMP_G1_PRIVATE]],
- // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]])
- // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]])
- // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
-
- // lastprivate
- // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
- // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
- // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
-
- // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
- // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
- // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
- // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
- // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
- // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
-
- // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
- // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
- // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
- // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
- // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
- // LAMBDA: [[OMP_LASTPRIV_DONE]]:
- // LAMBDA: ret
-
- g = 1;
- g1 = 1;
- svar = 3;
- sfvar = 4.0;
- // outlined function for 'parallel for'
- // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]])
- // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]])
-
- // addr alloca's
- // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
- // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
-
- // loop variables
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
- // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
-
- // private alloca's
- // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
- // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
- // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
-
- // init addr alloca's
- // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
- // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
- // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
- // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
-
- // init private variables
- // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
- // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
- // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-
- // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
- // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1]],
-
- // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
-
- // loop body
- // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
- // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
- // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
- // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
- // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
- // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
- // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
- // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
- // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
- // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
- // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
- // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
- // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
- // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
- // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
-
- // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
-
- // lastprivate
- // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
- // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
- // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
- // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
- // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
- // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
- // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
- // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
- // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
- // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
- // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
- // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
- // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
- // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
- // LAMBDA: [[OMP_LASTPRIV_DONE]]:
- // LAMBDA: ret
-
- [&]() {
- // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
- // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
- g = 2;
- g1 = 2;
- svar = 4;
- sfvar = 8.0;
- // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
- // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
- // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
- // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
-
- // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
- // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
- // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
- // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
- // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
- // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
- // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
- // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
- // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
- }();
- }
- }();
- return 0;
- #else
- S<float> test;
- int t_var = 0;
- int vec[] = {1, 2};
- S<float> s_arr[] = {1, 2};
- S<float> &var = test;
-
- #pragma omp target
- #pragma omp teams
-#pragma omp distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
- for (int i = 0; i < 2; ++i) {
- vec[i] = t_var;
- s_arr[i] = var;
- }
- int i;
-
- return tmain<int>();
- #endif
-}
-
-// CHECK: define{{.*}} i{{[0-9]+}} @main()
-// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
-// CHECK: ret
-
-// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
-// CHECK: ret
-//
-// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
-// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*,
-// skip loop variables
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-
-// copy from parameters to local address variables
-// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
-// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
-
-// load content of local address variables
-// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
-// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
-// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-
-// call constructor for s_arr
-// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
-// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]],
-// CHECK: br label %[[S_ARR_CST_LOOP:.+]]
-// CHECK: [[S_ARR_CST_LOOP]]:
-// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}}
-// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]])
-// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]],
-// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
-// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
-// CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
-// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
-
-// the distribute loop
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]],
-// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]])
-// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]])
-
-// CHECK: call void @__kmpc_for_static_fini(
-
-// lastprivates
-// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
-// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
-
-// CHECK: [[OMP_LASTPRIV_BLOCK]]:
-// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
-// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
-// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
-// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
-// CHECK: [[S_ARR_COPY_BLOCK]]:
-// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
-// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
-// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
-// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
-// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
-// CHECK: [[S_ARR_COPY_DONE]]:
-// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8*
-// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
-// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
-// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
-// CHECK: ret void
-
-// outlined function for 'parallel for'
-// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]])
-// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]])
-
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
-// skip loop variables
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
-// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-
-// copy from parameters to local address variables
-// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
-// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
-
-// load content of local address variables
-// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
-// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-
-// call constructor for s_arr
-// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
-// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]],
-// CHECK: br label %[[S_ARR_CST_LOOP:.+]]
-// CHECK: [[S_ARR_CST_LOOP]]:
-// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}}
-// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]])
-// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]],
-// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
-// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
-// CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
-// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-
-// loop body
-// assignment: vec[i] = t_var;
-// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
-// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
-
-// assignment: s_arr[i] = var;
-// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
-// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
-// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
-
-// CHECK: call void @__kmpc_for_static_fini(
-
-// lastprivates
-// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
-// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
-
-// CHECK: [[OMP_LASTPRIV_BLOCK]]:
-// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
-// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
-// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
-// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
-// CHECK: [[S_ARR_COPY_BLOCK]]:
-// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
-// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
-// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
-// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
-// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
-// CHECK: [[S_ARR_COPY_DONE]]:
-// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
-// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
-// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
-// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
-// CHECK: ret void
-
-// template tmain
-// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
-// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
-// CHECK: ret
-
-// CHECK: define internal void [[OFFLOAD_FUN_1]](
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
-// CHECK: ret
-
-// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN:%.+]])
-// skip alloca of global_tid and bound_tid
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
-// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
-// skip loop variables
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
-
-// skip init of bound and global tid
-// CHECK: store i{{[0-9]+}}* {{.*}},
-// CHECK: store i{{[0-9]+}}* {{.*}},
-// copy from parameters to local address variables
-// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]],
-
-// load content of local address variables
-// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
-// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR_REF]], [[S_INT_TY]]** [[TMP]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]],
-// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]])
-// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]])
-
-// CHECK: call void @__kmpc_for_static_fini(
-
-// lastprivates
-// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
-// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
-
-// CHECK: [[OMP_LASTPRIV_BLOCK]]:
-// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
-// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
-// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
-// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
-// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
-// CHECK: [[S_ARR_COPY_BLOCK]]:
-// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
-// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
-// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
-// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
-// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
-// CHECK: [[S_ARR_COPY_DONE]]:
-// CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
-// CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL_BCAST]],{{.+}})
-// CHECK: ret void
-
-// outlined function for 'parallel for'
-// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]])
-// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]])
-
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
-// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
-// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
-// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
-// skip loop variables
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: {{.+}} = alloca i{{[0-9]+}},
-// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*,
-
-// copy from parameters to local address variables
-// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]],
-
-// load content of local address variables
-// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
-// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
-// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-
-// call constructor for s_arr
-// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
-// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BGN]],
-// CHECK: br label %[[S_ARR_CST_LOOP:.+]]
-// CHECK: [[S_ARR_CST_LOOP]]:
-// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}}
-// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_CTOR]])
-// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]],
-// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
-// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
-// CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
-// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-// CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
-
-// CHECK: call void @__kmpc_for_static_init_4(
-
-// assignment: vec[i] = t_var;
-// CHECK: [[IV_VAL:%.+]] =
-// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
-// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
-
-// assignment: s_arr[i] = var;
-// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
-// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8*
-// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
-
-// CHECK: call void @__kmpc_for_static_fini(
-
-// lastprivates
-// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
-// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
-
-// CHECK: [[OMP_LASTPRIV_BLOCK]]:
-// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
-// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
-// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
-// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
-// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
-// CHECK: [[S_ARR_COPY_BLOCK]]:
-// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
-// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
-// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
-// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
-// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
-// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
-// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
-// CHECK: [[S_ARR_COPY_DONE]]:
-// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8*
-// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
-// CHECK: ret void
-
-#endif
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp (removed)
@@ -1,121 +0,0 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-typedef __INTPTR_TYPE__ intptr_t;
-
-// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
-// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-
-void foo();
-
-struct S {
- intptr_t a, b, c;
- S(intptr_t a) : a(a) {}
- operator char() { return a; }
- ~S() {}
-};
-
-template <typename T, int C>
-int tmain() {
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for num_threads(C)
- for (int i = 0; i < 100; i++)
- foo();
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for num_threads(T(23))
- for (int i = 0; i < 100; i++)
- foo();
- return 0;
-}
-
-int main() {
- S s(0);
- char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
-// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
-#pragma omp target
-#pragma omp teams
- // CHECK: define internal void [[OFFLOADING_FUN_0]](
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
-#pragma omp distribute parallel for num_threads(2)
- for (int i = 0; i < 100; i++) {
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
- // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2)
- // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
- foo();
- }
-#pragma omp target
-#pragma omp teams
- // CHECK: define internal void [[OFFLOADING_FUN_1]](
-
- // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
-#pragma omp distribute parallel for num_threads(a)
- for (int i = 0; i < 100; i++) {
- // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
- // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*,
- // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]],
- // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
- // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
- // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
- // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
- foo();
- }
- return a + tmain<char, 5>() + tmain<S, 1>();
-}
-
-// tmain 5
-// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
-
-// tmain 1
-// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
-
-// CHECK: define internal void [[T_OFFLOADING_FUN_0]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]](
-// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5)
-// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
-
-// CHECK: define internal void [[T_OFFLOADING_FUN_1]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]](
-// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23)
-// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
-
-// CHECK: define internal void [[T_OFFLOADING_FUN_2]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]](
-// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1)
-// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
-
-// CHECK: define internal void [[T_OFFLOADING_FUN_3]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]](
-// CHECK-DAG: [[CALL_RES:%.+]] = invoke{{.+}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}})
-// CHECK-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}}
-// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]])
-// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
-#endif
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_private_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_private_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_private_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_private_codegen.cpp (removed)
@@ -1,297 +0,0 @@
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
-// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
-
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-template <class T>
-struct S {
- T f;
- S(T a) : f(a) {}
- S() : f() {}
- operator T() { return T(); }
- ~S() {}
-};
-
-// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-template <typename T>
-T tmain() {
- S<T> test;
- T t_var = T();
- T vec[] = {1, 2};
- S<T> s_arr[] = {1, 2};
- S<T> &var = test;
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for private(t_var, vec, s_arr, s_arr, var, var)
- for (int i = 0; i < 2; ++i) {
- vec[i] = t_var;
- s_arr[i] = var;
- }
- return T();
-}
-
-int main() {
- static int svar;
- volatile double g;
- volatile double &g1 = g;
-
- #ifdef LAMBDA
- // LAMBDA-LABEL: @main
- // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
- [&]() {
- static float sfvar;
- // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
- // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
- // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
-
- // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
- // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for private(g, g1, svar, sfvar)
- for (int i = 0; i < 2; ++i) {
- // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
- // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
- // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double,
- // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
- // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]],
- // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
- // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to {{.+}},
- // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
- // LAMBDA: ret void
-
- // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED]](
- // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
- // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double,
- // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*,
- // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
- // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
-
- g = 1;
- g1 = 1;
- svar = 3;
- sfvar = 4.0;
- // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]],
- // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
- // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
- // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]],
- // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
- // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]],
- // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
- // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]],
- // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
- // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
- // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
- // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
- // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]]
- // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
- // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
- // LAMBDA: ret void
- [&]() {
- // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
- // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
- g = 2;
- g1 = 2;
- svar = 4;
- sfvar = 8.0;
- // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
- // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
- // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
- // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
-
- // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
- // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
- // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
- // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
- // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
- // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
- // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
- // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
- // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
- }();
- }
- }();
- return 0;
- #else
- S<float> test;
- int t_var = 0;
- int vec[] = {1, 2};
- S<float> s_arr[] = {1, 2};
- S<float> &var = test;
-
- #pragma omp target
- #pragma omp teams
- #pragma omp distribute parallel for private(t_var, vec, s_arr, s_arr, var, var, svar)
- for (int i = 0; i < 2; ++i) {
- vec[i] = t_var;
- s_arr[i] = var;
- }
- return tmain<int>();
- #endif
-}
-
-// CHECK: define{{.*}} i{{[0-9]+}} @main()
-// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOAD_FUN_0:@.+]](
-
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: ret
-
-// CHECK: define{{.+}} [[OFFLOAD_FUN_0]]()
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void
-// CHECK: ret
-//
-// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK-NOT: alloca [[S_FLOAT_TY]],
-// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// this is the ctor loop
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to {{.+}},
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-// By OpenMP specifications, private applies to both distribute and parallel for.
-// However, the support for 'private' of 'parallel' is only used when 'parallel'
-// is found alone. Therefore we only have one 'private' support for 'parallel for'
-// in combination
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_0]](
-// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_FLOAT_TY]],
-// CHECK-NOT: alloca [[S_FLOAT_TY]],
-// CHECK: [[S_VAR_PRIV:%svar+]] = alloca i{{[0-9]+}},
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// this is the ctor loop
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-// template tmain with S_INT_TY
-// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
-// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
-// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
-// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: ret
-
-// CHECK: ret
-
-// CHECK: define internal void [[OFFLOAD_FUN_1]]()
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void
-// CHECK: ret
-//
-// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK-NOT: alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK-NOT: alloca [[S_INT_TY]],
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}},
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]](
-// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK-NOT: alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_INT_TY]],
-// CHECK-NOT: alloca [[S_INT_TY]],
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// this is the ctor loop
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK-NOT: [[VEC_PRIV]]
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call void @__kmpc_for_static_fini(
-
-// call destructors: var..
-// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
-
-// ..and s_arr
-// CHECK: {{.+}}:
-// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]*
-// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-
-// CHECK: ret void
-
-#endif
Removed: cfe/trunk/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp?rev=301232&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp (removed)
@@ -1,93 +0,0 @@
-// add -fopenmp-targets
-
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// expected-no-diagnostics
-#ifndef HEADER
-#define HEADER
-
-typedef __INTPTR_TYPE__ intptr_t;
-
-// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-
-void foo();
-
-struct S {
- intptr_t a, b, c;
- S(intptr_t a) : a(a) {}
- operator char() { return a; }
- ~S() {}
-};
-
-template <typename T>
-T tmain() {
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for proc_bind(master)
- for(int i = 0; i < 1000; i++) {}
- return T();
-}
-
-int main() {
- // CHECK-LABEL: @main
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for proc_bind(spread)
- for(int i = 0; i < 1000; i++) {}
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for proc_bind(close)
- for(int i = 0; i < 1000; i++) {}
- return tmain<int>();
-}
-
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
-// CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
-// CHECK: call void [[OFFL2:@.+]]()
-// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
-// CHECK: ret i32 [[CALL_RET]]
-
-// CHECK: define{{.+}} void [[OFFL1]](
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
-// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
-// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
-// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
-// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
-// CHECK: ret void
-
-// CHECK: define{{.+}} [[OFFL2]]()
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
-// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
-// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
-// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
-// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
-// CHECK: ret void
-
-// CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
-// CHECK: call void [[OFFL3:@.+]]()
-
-// CHECK: define{{.+}} [[OFFL3]]()
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
-
-// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
-// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
-// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
-// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
-// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
-// CHECK: ret void
-#endif
More information about the cfe-commits
mailing list