r340953 - [OPENMP][NVPTX] Add support for lightweight runtime.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 29 11:32:21 PDT 2018
Author: abataev
Date: Wed Aug 29 11:32:21 2018
New Revision: 340953
URL: http://llvm.org/viewvc/llvm-project?rev=340953&view=rev
Log:
[OPENMP][NVPTX] Add support for lightweight runtime.
If the target construct can be executed in SPMD mode + it is a loop
based directive with static scheduling, we can use lightweight runtime
support.
Added:
cfe/trunk/test/OpenMP/nvptx_SPMD_codegen.cpp
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Wed Aug 29 11:32:21 2018
@@ -672,11 +672,19 @@ static bool hasParallelIfNumThreadsClaus
return false;
}
+/// Checks if the directive is the distribute clause with the lastprivate
+/// clauses. This construct does not support SPMD execution mode.
+static bool hasDistributeWithLastprivateClauses(const OMPExecutableDirective &D) {
+ return isOpenMPDistributeDirective(D.getDirectiveKind()) &&
+ D.hasClausesOfKind<OMPLastprivateClause>();
+}
+
/// Check for inner (nested) SPMD construct, if any
static bool hasNestedSPMDDirective(ASTContext &Ctx,
const OMPExecutableDirective &D) {
const auto *CS = D.getInnermostCapturedStmt();
- const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
const Stmt *ChildStmt = getSingleCompoundChild(Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
@@ -684,29 +692,221 @@ static bool hasNestedSPMDDirective(ASTCo
switch (D.getDirectiveKind()) {
case OMPD_target:
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
+ !hasDistributeWithLastprivateClauses(*NestedDir))
return true;
- if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND))
+ !hasParallelIfNumThreadsClause(Ctx, *NND) &&
+ !hasDistributeWithLastprivateClauses(*NND))
return true;
- if (DKind == OMPD_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ }
+ }
+ return false;
+ case OMPD_target_teams:
+ return isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
+ !hasDistributeWithLastprivateClauses(*NestedDir);
+ case OMPD_target_simd:
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return false;
+}
+
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ switch (DirectiveKind) {
+ case OMPD_target:
+ case OMPD_target_teams:
+ return hasNestedSPMDDirective(Ctx, D);
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ return !hasParallelIfNumThreadsClause(Ctx, D);
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ // Distribute with lastprivates requires non-SPMD execution mode.
+ return !hasParallelIfNumThreadsClause(Ctx, D) &&
+ !hasDistributeWithLastprivateClauses(D);
+ case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ return false;
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_unknown:
+ break;
+ }
+ llvm_unreachable(
+ "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+/// Check if the directive is loops based and has schedule clause at all or has
+/// static scheduling.
+static bool hasStaticScheduling(const OMPExecutableDirective &D) {
+ assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
+ isOpenMPLoopDirective(D.getDirectiveKind()) &&
+ "Expected loop-based directive.");
+ return !D.hasClausesOfKind<OMPOrderedClause>() &&
+ (!D.hasClausesOfKind<OMPScheduleClause>() ||
+ llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
+ [](const OMPScheduleClause *C) {
+ return C->getScheduleKind() == OMPC_SCHEDULE_static;
+ }));
+}
+
+/// Check for inner (nested) lightweight runtime construct, if any
+static bool hasNestedLightweightDirective(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPParallelDirective(DKind) &&
+ isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+ hasStaticScheduling(*NestedDir))
+ return true;
+ if (DKind == OMPD_parallel) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
+ }
+ } else if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPParallelDirective(DKind) &&
+ isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
+ if (DKind == OMPD_parallel) {
+ Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
- if (!ChildStmt)
- return false;
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND);
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
}
}
}
@@ -714,25 +914,28 @@ static bool hasNestedSPMDDirective(ASTCo
return false;
case OMPD_target_teams:
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
+ isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+ hasStaticScheduling(*NestedDir))
return true;
- if (DKind == OMPD_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (DKind == OMPD_parallel) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
ChildStmt = getSingleCompoundChild(Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND);
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
}
}
return false;
+ case OMPD_target_parallel:
+ return isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
case OMPD_target_teams_distribute:
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
case OMPD_target_simd:
- case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_simd:
@@ -788,21 +991,26 @@ static bool hasNestedSPMDDirective(ASTCo
return false;
}
-static bool supportsSPMDExecutionMode(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
+/// Checks if the construct supports lightweight runtime. It must be SPMD
+/// construct + inner loop-based construct with static scheduling.
+static bool supportsLightweightRuntime(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ if (!supportsSPMDExecutionMode(Ctx, D))
+ return false;
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
- case OMPD_target_teams_distribute:
- return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_parallel:
+ return hasNestedLightweightDirective(Ctx, D);
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
- return !hasParallelIfNumThreadsClause(Ctx, D);
+ // (Last|First)-privates must be shared in parallel region.
+ return hasStaticScheduling(D);
case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
return false;
case OMPD_parallel:
@@ -1010,18 +1218,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntry
EST.ExitBB = CGF.createBasicBlock(".exit");
// Initialize the OMP state in the runtime; called by all active threads.
- // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
- // based on code analysis of the target region.
- llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
- /*RequiresOMPRuntime=*/Bld.getInt16(1),
- /*RequiresDataSharing=*/Bld.getInt16(1)};
+ bool RequiresFullRuntime = !supportsLightweightRuntime(CGF.getContext(), D);
+ llvm::Value *Args[] = {
+ getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
+ /*RequiresOMPRuntime=*/
+ Bld.getInt16(RequiresFullRuntime ? 1 : 0),
+ /*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
- // For data sharing, we need to initialize the stack.
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+ if (RequiresFullRuntime) {
+ // For data sharing, we need to initialize the stack.
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+ }
CGF.EmitBranch(ExecuteBB);
@@ -1414,7 +1624,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
/// Build void __kmpc_data_sharing_init_stack_spmd();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
@@ -1607,7 +1818,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitT
.emitGenericVarsEpilog(CGF);
}
} Action(Loc);
- CodeGen.setAction(Action);
+ if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
+ CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
@@ -1640,19 +1852,61 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
unsigned GlobalRecordSize =
CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
- // TODO: allow the usage of shared memory to be controlled by
- // the user, for now, default to global.
- llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
- llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+
+ llvm::Value *GlobalRecCastAddr;
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
+ llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+ Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(SPMDBB);
+ Address RecPtr = CGF.CreateMemTemp(RecTy, "_local_stack");
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(NonSPMDBB);
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue =
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ GlobalRecordSizeArg);
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ CGF.EmitBlock(ExitBB);
+ auto *Phi = Bld.CreatePHI(GlobalRecCastAddr->getType(),
+ /*NumReservedValues=*/2, "_select_stack");
+ Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
+ Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
+ GlobalRecCastAddr = Phi;
+ I->getSecond().GlobalRecordAddr = Phi;
+ I->getSecond().IsInSPMDModeFlag = IsSPMD;
+ } else {
+ assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
+ "Expected Non-SPMD construct.");
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue =
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ GlobalRecordSizeArg);
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ I->getSecond().IsInSPMDModeFlag = nullptr;
+ }
LValue Base =
CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
- I->getSecond().GlobalRecordAddr = GlobalRecValue;
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
@@ -1724,9 +1978,26 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
Addr);
}
if (I->getSecond().GlobalRecordAddr) {
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
- I->getSecond().GlobalRecordAddr);
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(NonSPMDBB);
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
+ CGF.EmitBlock(ExitBB);
+ } else {
+ assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
+ "Expected Non-SPMD mode.");
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ I->getSecond().GlobalRecordAddr);
+ }
}
}
}
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Wed Aug 29 11:32:21 2018
@@ -374,6 +374,7 @@ private:
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
const RecordDecl *GlobalRecord = nullptr;
llvm::Value *GlobalRecordAddr = nullptr;
+ llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
};
/// Maps the function to the list of the globalized variables with their
Modified: cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp (original)
+++ cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp Wed Aug 29 11:32:21 2018
@@ -35,9 +35,19 @@ int maini1() {
// CHECK-NOT: @__kmpc_data_sharing_push_stack
// CHECK: define {{.*}}[[BAR]]()
+// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
+// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
+// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
+// CHECK: br i1 [[IS_SPMD]], label
+// CHECK: br label
// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
-// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST]]*
+// CHECK: br label
+// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[GLOBALS]], {{.+}} ]
+// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: br i1 [[IS_SPMD]], label
+// CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]])
+// CHECK: br label
// CHECK: ret i32
Added: cfe/trunk/test/OpenMP/nvptx_SPMD_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_SPMD_codegen.cpp?rev=340953&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_SPMD_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/nvptx_SPMD_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -0,0 +1,328 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
+// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1
+// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
+
+void foo() {
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+int a;
+// CHECK: call void @__kmpc_kernel_init(
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target teams distribute parallel for lastprivate(a)
+ for (int i = 0; i < 10; ++i)
+ a = i;
+#pragma omp target teams distribute parallel for schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams distribute parallel for schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target teams
+#pragma omp distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for simd schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for simd schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for simd schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for simd schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for simd schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for simd schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target teams
+#pragma omp distribute parallel for
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target teams
+#pragma omp distribute parallel for schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target parallel for
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel for schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel for schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel for schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel for schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel for schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel for schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target parallel
+#pragma omp for simd
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel
+#pragma omp for simd schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel
+#pragma omp for simd schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel
+#pragma omp for simd schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel
+#pragma omp for simd schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel
+#pragma omp for simd schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target parallel
+#pragma omp for simd schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd ordered
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel
+#pragma omp for simd schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
+#pragma omp target
+#pragma omp parallel for
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel for schedule(static)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel for schedule(static, 1)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel for schedule(auto)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel for schedule(runtime)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel for schedule(dynamic)
+ for (int i = 0; i < 10; ++i)
+ ;
+#pragma omp target
+#pragma omp parallel for schedule(guided)
+ for (int i = 0; i < 10; ++i)
+ ;
+}
+
+#endif
+
Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -554,13 +554,20 @@ int baz(int f, double &a) {
// CHECK: ret void
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
+ // CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: [[GTID_ADDR:%.+]] = alloca i32,
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
+ // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
+ // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
+ // CHECK: br i1 [[IS_SPMD]], label
+ // CHECK: br label
// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
- // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
- // CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0
+ // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST]]*
+ // CHECK: br label
+ // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
+ // CHECK: [[F_PTR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
@@ -595,7 +602,12 @@ int baz(int f, double &a) {
// CHECK: br label
// CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]],
- // CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
+ // CHECK: store i32 [[RES]], i32* [[RET:%.+]],
+ // CHECK: br i1 [[IS_SPMD]], label
+ // CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
+ // CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]])
+ // CHECK: br label
+ // CHECK: [[RES:%.+]] = load i32, i32* [[RET]],
// CHECK: ret i32 [[RES]]
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -59,7 +59,7 @@ int bar(int n){
// CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
@@ -102,7 +102,7 @@ int bar(int n){
// CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align
// CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align
// CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -47,7 +47,7 @@ int bar(int n){
}
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
- // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
@@ -69,7 +69,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
- // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
@@ -90,7 +90,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
- // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXEC:.+]]
//
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -54,7 +54,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}(
//
- // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]]
//
@@ -242,7 +242,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}(
//
- // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]]
//
@@ -520,7 +520,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}(
//
- // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK: br label {{%?}}[[EXECUTE:.+]]
//
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -227,13 +227,13 @@ int bar(int n){
// CHECK: ret void
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
-// CHECK: call void @__kmpc_spmd_kernel_init(
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
-// CHECK: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
// CHECK-NOT: call void @__kmpc_serialized_parallel(
// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
-// CHECK: call void @__kmpc_data_sharing_pop_stack(
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
// CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -8,12 +8,13 @@
#ifndef HEADER
#define HEADER
-// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0
+// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1
+// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0
#define N 1000
#define M 10
@@ -67,14 +68,14 @@ int bar(int n){
return a;
}
-// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
-// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker()
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33(
+// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
-// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_kernel_deinit(i16 1)
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
@@ -84,8 +85,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
@@ -99,8 +99,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
@@ -115,8 +114,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
@@ -129,7 +127,7 @@ int bar(int n){
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void
-// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
+// CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp?rev=340953&r1=340952&r2=340953&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp Wed Aug 29 11:32:21 2018
@@ -8,11 +8,12 @@
#ifndef HEADER
#define HEADER
-// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
+// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0
#define N 1000
#define M 10
@@ -62,14 +63,14 @@ int bar(int n){
return a;
}
-// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
-// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l31_worker()
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l31(
+// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
-// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: call void @__kmpc_kernel_deinit(i16 1)
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
@@ -79,8 +80,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
@@ -94,8 +94,7 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
@@ -110,8 +109,7 @@ int bar(int n){
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
-// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
More information about the cfe-commits
mailing list