r322515 - [OPENMP] Initial codegen for `target teams distribute parallel for
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 15 12:59:41 PST 2018
Author: abataev
Date: Mon Jan 15 12:59:40 2018
New Revision: 322515
URL: http://llvm.org/viewvc/llvm-project?rev=322515&view=rev
Log:
[OPENMP] Initial codegen for `target teams distribute parallel for
simd`.
Added host codegen + codegen for devices with default codegen for
`#pragma omp target teams distribute parallel for simd` directive.
Added:
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
Modified:
cfe/trunk/lib/Basic/OpenMPKinds.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/lib/Sema/SemaOpenMP.cpp
Modified: cfe/trunk/lib/Basic/OpenMPKinds.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/OpenMPKinds.cpp?rev=322515&r1=322514&r2=322515&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/OpenMPKinds.cpp (original)
+++ cfe/trunk/lib/Basic/OpenMPKinds.cpp Mon Jan 15 12:59:40 2018
@@ -928,6 +928,7 @@ void clang::getOpenMPCaptureRegions(
CaptureRegions.push_back(OMPD_taskloop);
break;
case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
CaptureRegions.push_back(OMPD_task);
CaptureRegions.push_back(OMPD_target);
CaptureRegions.push_back(OMPD_teams);
@@ -947,7 +948,6 @@ void clang::getOpenMPCaptureRegions(
case OMPD_atomic:
case OMPD_target_data:
case OMPD_distribute_simd:
- case OMPD_target_teams_distribute_parallel_for_simd:
CaptureRegions.push_back(OMPD_unknown);
break;
case OMPD_threadprivate:
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=322515&r1=322514&r2=322515&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Jan 15 12:59:40 2018
@@ -7357,6 +7357,12 @@ void CGOpenMPRuntime::scanForTargetRegio
CGM, ParentName,
cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
break;
+ case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
+ CodeGenFunction::
+ EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+ CGM, ParentName,
+ cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
+ break;
default:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=322515&r1=322514&r2=322515&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Mon Jan 15 12:59:40 2018
@@ -2170,16 +2170,6 @@ void CodeGenFunction::EmitOMPTargetSimdD
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
-void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
- const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
- OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams_distribute_parallel_for_simd,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
- });
-}
-
namespace {
struct ScheduleKindModifiersTy {
OpenMPScheduleClauseKind Kind;
@@ -4303,6 +4293,56 @@ void CodeGenFunction::EmitOMPTargetTeams
};
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
+
+static void emitTargetTeamsDistributeParallelForSimdRegion(
+ CodeGenFunction &CGF,
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S,
+ PrePostActionTy &Action) {
+ auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+ S.getDistInc());
+ };
+
+ // Emit teams region as a standalone region.
+ auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
+ CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
+ };
+
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
+ CodeGenTeams);
+ emitPostUpdateForReductionClause(CGF, S,
+ [](CodeGenFunction &) { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+ // Emit SPMD target teams distribute parallel for simd region as a standalone
+ // region.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
void CodeGenFunction::EmitOMPCancellationPointDirective(
const OMPCancellationPointDirective &S) {
Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=322515&r1=322514&r2=322515&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Mon Jan 15 12:59:40 2018
@@ -2942,6 +2942,11 @@ public:
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM,
StringRef ParentName,
const OMPTargetSimdDirective &S);
+ /// Emit device code for the target teams distribute parallel for simd
+ /// directive.
+ static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDistributeParallelForSimdDirective &S);
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
CodeGenModule &CGM, StringRef ParentName,
Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=322515&r1=322514&r2=322515&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Mon Jan 15 12:59:40 2018
@@ -2266,8 +2266,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMP
break;
}
case OMPD_distribute_parallel_for_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_target_teams_distribute_parallel_for_simd: {
+ case OMPD_distribute_parallel_for: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
QualType KmpInt32PtrTy =
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -2282,7 +2281,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMP
Params);
break;
}
- case OMPD_target_teams_distribute_parallel_for: {
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
QualType KmpInt32PtrTy =
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -7490,7 +7490,6 @@ StmtResult Sema::ActOnOpenMPTargetTeamsD
// The point of exit cannot be a branch out of the structured block.
// longjmp() and throw() must not violate the entry/exit criteria.
CS->getCapturedDecl()->setNothrow();
-
for (int ThisCaptureLevel =
getOpenMPCaptureLevels(OMPD_target_teams_distribute_parallel_for);
ThisCaptureLevel > 1; --ThisCaptureLevel) {
@@ -7516,6 +7515,17 @@ StmtResult Sema::ActOnOpenMPTargetTeamsD
assert((CurContext->isDependentContext() || B.builtAll()) &&
"omp target teams distribute parallel for loop exprs were not built");
+ if (!CurContext->isDependentContext()) {
+ // Finalize the clauses that need pre-built expressions for CodeGen.
+ for (auto C : Clauses) {
+ if (auto *LC = dyn_cast<OMPLinearClause>(C))
+ if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+ B.NumIterations, *this, CurScope,
+ DSAStack))
+ return StmtError();
+ }
+ }
+
getCurFunction()->setHasBranchProtectedScope();
return OMPTargetTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
@@ -7536,15 +7546,26 @@ StmtResult Sema::ActOnOpenMPTargetTeamsD
// The point of exit cannot be a branch out of the structured block.
// longjmp() and throw() must not violate the entry/exit criteria.
CS->getCapturedDecl()->setNothrow();
+ for (int ThisCaptureLevel = getOpenMPCaptureLevels(
+ OMPD_target_teams_distribute_parallel_for_simd);
+ ThisCaptureLevel > 1; --ThisCaptureLevel) {
+ CS = cast<CapturedStmt>(CS->getCapturedStmt());
+ // 1.2.2 OpenMP Language Terminology
+ // Structured block - An executable statement with a single entry at the
+ // top and a single exit at the bottom.
+ // The point of exit cannot be a branch out of the structured block.
+ // longjmp() and throw() must not violate the entry/exit criteria.
+ CS->getCapturedDecl()->setNothrow();
+ }
OMPLoopDirective::HelperExprs B;
// In presence of clause 'collapse' with number of loops, it will
// define the nested loops number.
- auto NestedLoopCount = CheckOpenMPLoop(
- OMPD_target_teams_distribute_parallel_for_simd,
- getCollapseNumberExpr(Clauses),
- nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
- VarsWithImplicitDSA, B);
+ auto NestedLoopCount =
+ CheckOpenMPLoop(OMPD_target_teams_distribute_parallel_for_simd,
+ getCollapseNumberExpr(Clauses),
+ nullptr /*ordered not a clause on distribute*/, CS, *this,
+ *DSAStack, VarsWithImplicitDSA, B);
if (NestedLoopCount == 0)
return StmtError();
@@ -9209,7 +9230,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClaus
// A list item cannot appear in both a map clause and a data-sharing
// attribute clause on the same construct
if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel ||
- CurrDir == OMPD_target_teams ||
+ CurrDir == OMPD_target_teams ||
CurrDir == OMPD_target_teams_distribute ||
CurrDir == OMPD_target_teams_distribute_parallel_for ||
CurrDir == OMPD_target_teams_distribute_parallel_for_simd ||
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,131 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix HCK1 --check-prefix HCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix HCK1 --check-prefix HCK1-64
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix TCK1 --check-prefix TCK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 --check-prefix TCK1 --check-prefix TCK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix TCK1 --check-prefix TCK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 --check-prefix TCK1 --check-prefix TCK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY1
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK1
+
+// HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}(
+int target_teams_fun(int *g){
+ int n = 1000;
+ int a[1000];
+ int te = n / 128;
+ int th = 128;
+// discard n_addr
+// HCK1: alloca i32,
+// HCK1: [[TE:%.+]] alloca i32,
+// HCK1: [[TH:%.+]] = alloca i32,
+// HCK1: [[I:%.+]] = alloca i32,
+// discard capture expressions for te and th
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: [[I_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]],
+// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
+// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
+// HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
+// HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+
+// HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
+ int i;
+#pragma omp target teams distribute parallel for simd num_teams(te), thread_limit(th) aligned(a : 8) safelen(16) simdlen(4) linear(i : n)
+ for(i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+
+ // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+ // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
+ {{{
+ #pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
+ for(int i = 0; i < n; i++) {
+ a[i] = g[0];
+ }
+ }}}
+
+ // outlined target regions
+ // HCK1: define internal void @[[OFFL1]](i{{32|64}} [[I_ARG:%.+]], i{{32|64}} [[N_ARG:%.+]], {{.+}}, i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]])
+ // TCK1: define void @{{.+}}target_teams_fun{{.*}}(i{{32|64}} [[I_ARG:%.+]], i{{32|64}} [[N_ARG:%.+]], {{.+}}, i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]])
+ // CK1: [[I_ADDR:%.+]] = alloca i{{32|64}},
+ // CK1: [[N_ADDR:%.+]] = alloca i{{32|64}},
+ // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
+ // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
+ // TCK1: store {{.+}} [[N_ARG]], {{.+}} [[N_ADDR]],
+ // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
+ // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
+ // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
+ // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
+ // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
+ // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
+ // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
+ // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
+ // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL1]]({{.+}})
+ // CK1: [[ARRDECAY:%.+]] = getelementptr inbounds [1000 x i32], [1000 x i32]* %{{.+}}, i32 0, i32 0
+ // CK1: [[ARR_CAST:%.+]] = ptrtoint i32* [[ARRDECAY]] to i{{32|64}}
+ // CK1: [[MASKED_PTR:%.+]] = and i{{32|64}} [[ARR_CAST]], 7
+ // CK1: [[COND:%.+]] = icmp eq i{{32|64}} [[MASKED_PTR]], 0
+ // CK1: call void @llvm.assume(i1 [[COND]])
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.+}} @__kmpc_fork_call(
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // HCK1: define internal void @[[OFFL2]](
+ // TCK1: define void @{{.+}}target_teams_fun{{.+}}(
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL2]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.+}} @__kmpc_fork_call(
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ return a[0];
+}
+
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 4}
+// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
+// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 8}
+
+#endif // CK1
+#endif // HEADER
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,472 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute parallel for simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// Check that no target code is emmitted if no omptests flag was provided.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s
+// SIMD-ONLY2-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+
+// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat
+
+// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
+// CHECK-DAG: [[A2:@.+]] = global [[SA]]
+// CHECK-DAG: [[B1:@.+]] = global [[SB]]
+// CHECK-DAG: [[B2:@.+]] = global [[SB]]
+// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
+// CHECK-DAG: [[D1:@.+]] = global [[SD]]
+// CHECK-DAG: [[E1:@.+]] = global [[SE]]
+// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
+// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
+
+// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
+// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
+// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
+// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
+// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
+// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
+// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
+// CHECK-NTARGET-NOT: type { i8*, i8*, %
+// CHECK-NTARGET-NOT: type { i32, %
+
+// We have 7 target regions
+
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// TCHECK-NOT: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+// CHECK-DAG: {{@.+}} = private constant i8 0
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
+// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288]
+
+// CHECK-NTARGET-NOT: private constant i8 0
+// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
+
+// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
+// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
+// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
+// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
+// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
+// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
+// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
+// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
+// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
+// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
+// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
+// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
+// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
+
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]])
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]])
+
+// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
+// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
+// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }]
+
+// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [
+
+extern int *R;
+
+struct SA {
+ int arr[4];
+ void foo() {
+ int a = *R;
+ a += 1;
+ *R = a;
+ }
+ SA() {
+ int a = *R;
+ a += 2;
+ *R = a;
+ }
+ ~SA() {
+ int a = *R;
+ a += 3;
+ *R = a;
+ }
+};
+
+struct SB {
+ int arr[8];
+ void foo() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 4;
+ *R = a;
+ }
+ SB() {
+ int a = *R;
+ a += 5;
+ *R = a;
+ }
+ ~SB() {
+ int a = *R;
+ a += 6;
+ *R = a;
+ }
+};
+
+struct SC {
+ int arr[16];
+ void foo() {
+ int a = *R;
+ a += 7;
+ *R = a;
+ }
+ SC() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 8;
+ *R = a;
+ }
+ ~SC() {
+ int a = *R;
+ a += 9;
+ *R = a;
+ }
+};
+
+struct SD {
+ int arr[32];
+ void foo() {
+ int a = *R;
+ a += 10;
+ *R = a;
+ }
+ SD() {
+ int a = *R;
+ a += 11;
+ *R = a;
+ }
+ ~SD() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 12;
+ *R = a;
+ }
+};
+
+struct SE {
+ int arr[64];
+ void foo() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd if(target: 0)
+ for (int i = 0; i < 10; ++i)
+ a += 13;
+ *R = a;
+ }
+ SE() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 14;
+ *R = a;
+ }
+ ~SE() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 15;
+ *R = a;
+ }
+};
+
+template <int x>
+struct ST {
+ int arr[128 + x];
+ void foo() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 16 + x;
+ *R = a;
+ }
+ ST() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 17 + x;
+ *R = a;
+ }
+ ~ST() {
+ int a = *R;
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ a += 18 + x;
+ *R = a;
+ }
+};
+
+// We have to make sure we us all the target regions:
+//CHECK-DAG: define internal void @[[NAME1]](
+//CHECK-DAG: call void @[[NAME1]](
+//CHECK-DAG: define internal void @[[NAME2]](
+//CHECK-DAG: call void @[[NAME2]](
+//CHECK-DAG: define internal void @[[NAME3]](
+//CHECK-DAG: call void @[[NAME3]](
+//CHECK-DAG: define internal void @[[NAME4]](
+//CHECK-DAG: call void @[[NAME4]](
+//CHECK-DAG: define internal void @[[NAME5]](
+//CHECK-DAG: call void @[[NAME5]](
+//CHECK-DAG: define internal void @[[NAME6]](
+//CHECK-DAG: call void @[[NAME6]](
+//CHECK-DAG: define internal void @[[NAME7]](
+//CHECK-DAG: call void @[[NAME7]](
+//CHECK-DAG: define internal void @[[NAME8]](
+//CHECK-DAG: call void @[[NAME8]](
+//CHECK-DAG: define internal void @[[NAME9]](
+//CHECK-DAG: call void @[[NAME9]](
+//CHECK-DAG: define internal void @[[NAME10]](
+//CHECK-DAG: call void @[[NAME10]](
+//CHECK-DAG: define internal void @[[NAME11]](
+//CHECK-DAG: call void @[[NAME11]](
+//CHECK-DAG: define internal void @[[NAME12]](
+//CHECK-DAG: call void @[[NAME12]](
+
+//TCHECK-DAG: define void @[[NAME1]](
+//TCHECK-DAG: define void @[[NAME2]](
+//TCHECK-DAG: define void @[[NAME3]](
+//TCHECK-DAG: define void @[[NAME4]](
+//TCHECK-DAG: define void @[[NAME5]](
+//TCHECK-DAG: define void @[[NAME6]](
+//TCHECK-DAG: define void @[[NAME7]](
+//TCHECK-DAG: define void @[[NAME8]](
+//TCHECK-DAG: define void @[[NAME9]](
+//TCHECK-DAG: define void @[[NAME10]](
+//TCHECK-DAG: define void @[[NAME11]](
+//TCHECK-DAG: define void @[[NAME12]](
+
+// CHECK-NTARGET-NOT: __tgt_target
+// CHECK-NTARGET-NOT: __tgt_register_lib
+// CHECK-NTARGET-NOT: __tgt_unregister_lib
+
+// TCHECK-NOT: __tgt_target
+// TCHECK-NOT: __tgt_register_lib
+// TCHECK-NOT: __tgt_unregister_lib
+
+// We have 2 initializers with priority 500
+//CHECK: define internal void [[P500]](
+//CHECK: call void @{{.+}}()
+//CHECK: call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK: ret void
+
+// We have 1 initializers with priority 501
+//CHECK: define internal void [[P501]](
+//CHECK: call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK: ret void
+
+// We have 6 initializers with default priority
+//CHECK: define internal void [[PMAX]](
+//CHECK: call void @{{.+}}()
+//CHECK: call void @{{.+}}()
+//CHECK: call void @{{.+}}()
+//CHECK: call void @{{.+}}()
+//CHECK: call void @{{.+}}()
+//CHECK: call void @{{.+}}()
+//CHECK-NOT: call void @{{.+}}()
+//CHECK: ret void
+
+// Check registration and unregistration
+
+//CHECK: define internal void @[[UNREGFN:.+]](i8*)
+//CHECK-SAME: comdat($[[REGFN]]) {
+//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
+//CHECK: ret void
+//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*)
+
+//CHECK: define linkonce dso_local hidden void @[[REGFN]](i8*)
+//CHECK-SAME: comdat {
+//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
+//CHECK: call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
+//CHECK: ret void
+//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*)
+
+static __attribute__((init_priority(500))) SA a1;
+SA a2;
+SB __attribute__((init_priority(500))) b1;
+SB __attribute__((init_priority(501))) b2;
+static SC c1;
+SD d1;
+SE e1;
+ST<100> t1;
+ST<1000> t2;
+
+
+int bar(int a){
+ int r = a;
+
+ a1.foo();
+ a2.foo();
+ b1.foo();
+ b2.foo();
+ c1.foo();
+ d1.foo();
+ e1.foo();
+ t1.foo();
+ t2.foo();
+
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ ++r;
+
+ return r + *R;
+}
+
+// Check metadata is properly generated:
+// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 216, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 268, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 286, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 293, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 436, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 312, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 319, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 305, i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 242, i32 {{[0-9]+}}}
+
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,86 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// Test target teams distribute parallel for simd codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
+// SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[CA:%.+]] = type { i32* }
+
+// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
+int nested(int a){
+ // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ ++a;
+
+ // CHECK: call void @"[[LNAME:.+]]"([[CA]]*
+ auto F = [&](){
+ #pragma omp parallel
+ {
+ #pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < 10; ++i)
+ ++a;
+ }
+ };
+
+ F();
+
+ return a;
+}
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
+
+// CHECK: define {{.*}}void @"[[LNAME]]"(
+// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
+
+// CHECK: define {{.*}}void [[PNAME]](
+// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
+
+// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
+// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
+
+
+// Check metadata is properly generated:
+// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+
+// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
+// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,157 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+ T a[X][Y];
+
+ // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+ int foo(void) {
+
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL1:.+]](
+ #pragma omp target teams distribute parallel for simd collapse(2)
+ for(int i = 0; i < X; i++) {
+ for(int j = 0; j < Y; j++) {
+ a[i][j] = (T)0;
+ }
+ }
+ // CK1: define internal void @[[OFFL1]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL1]]({{.+}})
+ // discard loop variables not needed here
+ // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+ // CK1: store i32 56087, i32* [[OMP_UB]],
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ return a[0][0];
+ }
+};
+
+int teams_template_struct(void) {
+ SS<int, 123, 456> V;
+ return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n, int m>
+int tmain(T argc) {
+ T a[n][m];
+ #pragma omp target teams distribute parallel for simd collapse(2)
+ for(int i = 0; i < n; i++) {
+ for(int j = 0; j < m; j++) {
+ a[i][j] = (T)0;
+ }
+ }
+ return 0;
+}
+
+int main (int argc, char **argv) {
+ int n = 100;
+ int m = 2;
+ int a[n][m];
+ #pragma omp target teams distribute parallel for simd collapse(2)
+ for(int i = 0; i < n; i++) {
+ for(int j = 0; j < m; j++) {
+ a[i][j] = 0;
+ }
+ }
+ return tmain<int, 10, 2>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2: define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64,
+// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: ret
+// CK2-NEXT: }
+
+// CK2: define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// discard loop variables not needed here
+// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
+// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[TPAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,270 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+ T a[X];
+ float b;
+ // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+ int foo(void) {
+
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL1:.+]](
+ #pragma omp target teams distribute parallel for simd
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL2:.+]](
+ #pragma omp target teams distribute parallel for simd dist_schedule(static)
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL3:.+]](
+ #pragma omp target teams distribute parallel for simd dist_schedule(static, X/2)
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+ // CK1: define internal void @[[OFFL1]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL1]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[OFFL2]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL2]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+
+ // CK1: define internal void @[[OFFL3]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL3]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ return a[0];
+ }
+};
+
+int teams_template_struct(void) {
+ SS<int, 123, 456> V;
+ return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+ T a[n];
+ int m = 10;
+#pragma omp target teams distribute parallel for simd
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+#pragma omp target teams distribute parallel for simd dist_schedule(static)
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+#pragma omp target teams distribute parallel for simd dist_schedule(static, m)
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+ return 0;
+}
+
+int main (int argc, char **argv) {
+ int n = 100;
+ int a[n];
+ int m = 10;
+#pragma omp target teams distribute parallel for simd
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+#pragma omp target teams distribute parallel for simd dist_schedule(static)
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+#pragma omp target teams distribute parallel for simd dist_schedule(static, m)
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+ return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2: define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2: ret
+// CK2-NEXT: }
+
+// CK2: define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,523 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK --check-prefix HCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK --check-prefix HCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix TLAMBDA --check-prefix TLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+ int a, b;
+ St() : a(0), b(0) {}
+ St(const St &st) : a(st.a + st.b), b(0) {}
+ ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+ T f;
+ S(T a) : f(a + g) {}
+ S() : f(g) {}
+ S(const S &s, St t = St()) : f(s.f + t.a) {}
+ operator T() { return T(); }
+ ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+ S<T> test;
+ T t_var = T();
+ T vec[] = {1, 2};
+ S<T> s_arr[] = {1, 2};
+ S<T> &var = test;
+#pragma omp target teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return T();
+}
+
+// HCHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// HCHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
+int t_var = 333;
+// HCHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// HCHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// HCHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+ static int sivar;
+#ifdef LAMBDA
+ // HLAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // HLAMBDA-LABEL: @main
+ // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+ // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
+ // HLAMBDA: ret
+#pragma omp target teams distribute parallel for simd firstprivate(g, g1, sivar)
+ for (int i = 0; i < 2; ++i) {
+ // HLAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+ // TLAMBDA: define void @[[LOFFL1:.+]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
+ // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
+ // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
+ // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
+ // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
+ // LAMBDA: ret void
+
+ // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+ // Skip global and bound tid vars
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+ // skip loop vars
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+ // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+ // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+ // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+ g = 1;
+ g1 = 1;
+ sivar = 2;
+ // LAMBDA: call void @__kmpc_for_static_init_4(
+ // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: ret void
+
+ // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+ // Skip global and bound tid vars, and prev lb and ub vars
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
+ // skip loop vars
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+ // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
+ // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
+ // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
+
+ // use of private vars
+ // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]],
+ // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]]
+ // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
+ // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]],
+ // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
+ // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: ret void
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ g1 = 2;
+ sivar = 4;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+ // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+ // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+ }();
+ }
+ }();
+ return 0;
+#else
+#pragma omp target teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var, sivar)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ sivar += i;
+ }
+ return tmain<int>();
+#endif
+}
+
+// HCHECK: define {{.*}}i{{[0-9]+}} @main()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5,
+// HCHECK: call void @[[OFFL1:.+]](
+// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// HCHECK: ret
+
+// HCHECK: define{{.*}} void @[[OFFL1]](
+// TCHECK: define{{.*}} void @[[OFFL1:.+]](
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
+// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
+// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
+// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
+// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}})
+// Skip global and bound tid vars, and prev lb ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
+
+// T_VAR and SIVAR
+// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
+
+// preparation vars
+// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call void @[[TOFFL1:.+]](
+// HCHECK: ret
+
+// HCHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
+// TCHECK: define {{.*}}void @[[TOFFL1:.+]]({{.+}})
+// CHECK-DAG: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK-DAG: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
+
+// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
+// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
+// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
+// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
+
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}})
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// Skip temp vars for loop
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// param copy
+// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
+// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
+
+// T_VAR and preparation variables
+// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+
+// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
+
+// firstprivate(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
+// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
+// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
+// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
+// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
+// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
+
+// firstprivate(var)
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
+// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
+// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void fn1();
+void fn2();
+void fn3();
+void fn4();
+void fn5();
+void fn6();
+
+int Arg;
+
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
+void gtid_test() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+#pragma omp target teams distribute parallel for simd
+ for(int i = 0 ; i < 100; i++) {}
+ // CHECK: define internal void [[OFFLOADING_FUN_0]](
+ // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+ // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
+ // CHECK: call void @__kmpc_for_static_fini(
+
+ // CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void @__kmpc_for_static_fini(
+ // CHECK: ret
+#pragma omp target teams distribute parallel for simd if (parallel: false)
+ for(int i = 0 ; i < 100; i++) {
+ // CHECK: define internal void [[OFFLOADING_FUN_1]](
+ // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+ // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void @__kmpc_serialized_parallel(
+ // CHECK: call void [[OMP_OUTLINED_1:@.+]](
+ // CHECK: call void @__kmpc_end_serialized_parallel(
+ // CHECK: call void @__kmpc_for_static_fini(
+ // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void @{{.+}}gtid_test
+ // CHECK: call void @__kmpc_for_static_fini(
+ // CHECK: ret
+ gtid_test();
+ }
+}
+
+
+template <typename T>
+int tmain(T Arg) {
+#pragma omp target teams distribute parallel for simd if (true)
+ for(int i = 0 ; i < 100; i++) {
+ fn1();
+ }
+#pragma omp target teams distribute parallel for simd if (false)
+ for(int i = 0 ; i < 100; i++) {
+ fn2();
+ }
+#pragma omp target teams distribute parallel for simd if (parallel: Arg)
+ for(int i = 0 ; i < 100; i++) {
+ fn3();
+ }
+ return 0;
+}
+
+// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
+int main() {
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
+// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
+#pragma omp target teams distribute parallel for simd if (true)
+ for(int i = 0 ; i < 100; i++) {
+ // CHECK: define internal void [[OFFLOADING_FUN_0]](
+ // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
+ // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
+
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
+ // CHECK: call void @__kmpc_for_static_fini(
+ // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call {{.*}}void @{{.+}}fn4
+ // CHECK: call void @__kmpc_for_static_fini(
+
+ fn4();
+ }
+
+#pragma omp target teams distribute parallel for simd if (false)
+ for(int i = 0 ; i < 100; i++) {
+ // CHECK: define internal void [[OFFLOADING_FUN_1]](
+ // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
+ // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
+
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void @__kmpc_serialized_parallel(
+ // CHECK: call void [[OMP_OUTLINED_3:@.+]](
+ // CHECK: call void @__kmpc_end_serialized_parallel(
+ // CHECK: call void @__kmpc_for_static_fini(
+
+ // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call {{.*}}void @{{.+}}fn5
+ // CHECK: call void @__kmpc_for_static_fini(
+ fn5();
+ }
+
+#pragma omp target teams distribute parallel for simd if (Arg)
+ for(int i = 0 ; i < 100; i++) {
+ // CHECK: define internal void [[OFFLOADING_FUN_2]](
+ // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
+ // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
+
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
+ // CHECK: call void @__kmpc_serialized_parallel(
+ // CHECK: call void [[OMP_OUTLINED_4:@.+]](
+ // CHECK: call void @__kmpc_end_serialized_parallel(
+ // CHECK: call void @__kmpc_for_static_fini(
+
+ // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call {{.*}}void @{{.+}}fn6
+ // CHECK: call void @__kmpc_for_static_fini(
+ fn6();
+ }
+
+ return tmain(Arg);
+}
+
+// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn1
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_for_static_fini(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn2
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
+// call void [[T_OUTLINE_FUN_3:@.+]](
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
+
+// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call {{.*}}void @{{.+}}fn3
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,456 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+ T f;
+ S(T a) : f(a) {}
+ S() : f() {}
+ operator T() { return T(); }
+ ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+ S<T> test;
+ T t_var = T();
+ T vec[] = {1, 2};
+ S<T> s_arr[] = {1, 2};
+ S<T> &var = test;
+ #pragma omp target teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return T();
+}
+
+int main() {
+ static int svar;
+ volatile double g;
+ volatile double &g1 = g;
+
+ #ifdef LAMBDA
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ static float sfvar;
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+ // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+ // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+ // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+ #pragma omp target teams distribute parallel for simd lastprivate(g, g1, svar, sfvar)
+ for (int i = 0; i < 2; ++i) {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* {{.+}}, i32* {{.+}}, {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]], {{.+}} [[G_IN:%.+]])
+ // skip gbl and bound tid
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: [[G1_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+ // loop variables
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+ // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+ // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+ // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+ // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+
+ // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+ // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
+ // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
+ // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+ // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+ // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
+
+ g1 = 1;
+ svar = 3;
+ sfvar = 4.0;
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+ // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+ // LAMBDA: store i32 2, i32* %
+ // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+ // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+ // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+ // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
+ // LAMBDA-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+ // LAMBDA-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[SFVAR_TGT]],
+ // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+ // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+ // LAMBDA: ret
+
+ // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]], {{.+}} [[G_IN:%.+]])
+ // skip tid and prev variables
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: [[G1_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
+ // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+ // loop variables
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+ // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
+ // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
+ // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
+ // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
+
+ // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
+ // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
+ // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
+ // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+ // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+ // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
+
+
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+ // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+
+ // LAMBDA: store i32 2, i32* %
+ // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+ // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+ // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+ // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
+ // LAMBDA-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+ // LAMBDA-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+ // LAMBDA-DAG: store {{.+}}, {{.+}} [[SFVAR_TGT]],
+ // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+ // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+ // LAMBDA: ret
+
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ g1 = 2;
+ svar = 4;
+ sfvar = 8.0;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+ // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+ // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+ // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+ // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+ // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+ // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+ // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+ }();
+ }
+ }();
+ return 0;
+ #else
+ S<float> test;
+ int t_var = 0;
+ int vec[] = {1, 2};
+ S<float> s_arr[] = {1, 2};
+ S<float> &var = test;
+
+ #pragma omp target teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ int i;
+
+ return tmain<int>();
+ #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK: store {{.+}} [[T_VAR_IN]], {{.+}} [[T_VAR_ADDR]],
+// CHECK: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK: store {{.+}} [[S_VAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[TVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR]],
+// CHECK-DAG: [[VEC_TGT_REF:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VEC_TGT_REF]],
+// CHECK-DAG: [[S_ARR_BEGIN:%.+]] = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_TGT_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VAR_TGT_BCAST]],
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+// CHECK: ret void
+
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}{{.*}} [[S_VAR_IN:%.+]])
+
+// gbl and bound tid vars, prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]],
+// CHECK: store {{.+}} [[T_VAR_IN]], {{.+}} [[T_VAR_ADDR]],
+// CHECK: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]],
+// CHECK: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]],
+// CHECK: store {{.+}} [[S_VAR_IN]], {{.+}} [[SVAR_ADDR]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
+
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// skip body: code generation routine is same as distribute parallel for lastprivate
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[TVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR]],
+// CHECK-DAG: [[VEC_TGT_REF:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VEC_TGT_REF]],
+// CHECK-DAG: [[S_ARR_BEGIN:%.+]] = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_TGT_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VAR_TGT_BCAST]],
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[SVAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[SVAR_ADDR]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: ret
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN1]], {{.+}} [[VEC_ADDR1]],
+// CHECK: store {{.+}} [[T_VAR_IN1]], {{.+}} [[T_VAR_ADDR1]],
+// CHECK: store {{.+}} [[S_ARR_IN1]], {{.+}} [[S_ARR_ADDR1]],
+// CHECK: store {{.+}} [[VAR_IN1]], {{.+}} [[VAR_ADDR1]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[T_VAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR1]],
+// CHECK-DAG: [[VEC_TGT_BCAST:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VEC_TGT_BCAST]],
+// CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_BCAST]],
+// CHECK: ret void
+
+// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store {{.+}} [[VEC_IN1]], {{.+}} [[VEC_ADDR1]],
+// CHECK: store {{.+}} [[T_VAR_IN1]], {{.+}} [[T_VAR_ADDR1]],
+// CHECK: store {{.+}} [[S_ARR_IN1]], {{.+}} [[S_ARR_ADDR1]],
+// CHECK: store {{.+}} [[VAR_IN1]], {{.+}} [[VAR_ADDR1]],
+
+// prepare lastprivate targets
+// CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
+// CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
+// CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// skip body: code generation routine is same as distribute parallel for lastprivate
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK-64-DAG: store {{.+}}, {{.+}} [[T_VAR_TGT]],
+// CHECK-32-DAG: store {{.+}}, {{.+}} [[T_VAR_ADDR1]],
+// CHECK-DAG: [[VEC_TGT_BCAST:%.+]] = bitcast {{.+}} [[VEC_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VEC_TGT_BCAST]],
+// CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
+// CHECK: call void @llvm.memcpy.{{.+}}(
+// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_BCAST]],
+// CHECK: ret void
+
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,369 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HHECK --check-prefix HCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HHECK --check-prefix HCHECK-64
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix HLAMBDA --check-prefix HLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+
+// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region)
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix TCHECK --check-prefix TCHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 --check-prefix TLAMBDA --check-prefix TLAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY0
+//
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix SIMD-ONLY0
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct St {
+ int a, b;
+ St() : a(0), b(0) {}
+ St(const St &st) : a(st.a + st.b), b(0) {}
+ ~St() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+
+template <class T>
+struct S {
+ T f;
+ S(T a) : f(a + g) {}
+ S() : f(g) {}
+ S(const S &s, St t = St()) : f(s.f + t.a) {}
+ operator T() { return T(); }
+ ~S() {}
+};
+
+// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+
+template <typename T>
+T tmain() {
+ S<T> test;
+ T t_var = T();
+ T vec[] = {1, 2};
+ S<T> s_arr[] = {1, 2};
+ S<T> &var = test;
+#pragma omp target teams distribute parallel for simd private(t_var, vec, s_arr, var)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return T();
+}
+
+// HCHECK-DAG: [[TEST:@.+]] ={{.*}} global [[S_FLOAT_TY]] zeroinitializer,
+S<float> test;
+// HCHECK-DAG: [[T_VAR:@.+]] ={{.+}} global i{{[0-9]+}} 333,
+int t_var = 333;
+// HCHECK-DAG: [[VEC:@.+]] ={{.+}} global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+int vec[] = {1, 2};
+// HCHECK-DAG: [[S_ARR:@.+]] ={{.+}} global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+S<float> s_arr[] = {1, 2};
+// HCHECK-DAG: [[VAR:@.+]] ={{.+}} global [[S_FLOAT_TY]] zeroinitializer,
+S<float> var(3);
+// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+
+int main() {
+ static int sivar;
+#ifdef LAMBDA
+ // HLAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // HLAMBDA-LABEL: @main
+ // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+ // HLAMBDA: call void @[[LOFFL1:.+]](
+ // HLAMBDA: ret
+#pragma omp target teams distribute parallel for simd private(g, g1, sivar)
+ for (int i = 0; i < 2; ++i) {
+ // HLAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]()
+ // TLAMBDA: define{{.*}} void @[[LOFFL1:.+]]()
+ // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
+ // LAMBDA: ret void
+
+ // LAMBDA: define internal void @[[LOUTL1]]({{.+}})
+ // Skip global, bound tid and loop vars
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+ // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+ // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+
+ g = 1;
+ g1 = 1;
+ sivar = 2;
+ // LAMBDA: call void @__kmpc_for_static_init_4(
+ // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: ret void
+
+ // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
+ // Skip global, bound tid and loop vars
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
+ // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
+ // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
+ // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
+ // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
+ // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
+ // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
+ // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: ret void
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ g1 = 2;
+ sivar = 4;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+ // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
+ // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
+ }();
+ }
+ }();
+ return 0;
+#else
+#pragma omp target teams distribute parallel for simd private(t_var, vec, s_arr, var, sivar)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ sivar += i;
+ }
+ return tmain<int>();
+#endif
+}
+
+// HCHECK: define {{.*}}i{{[0-9]+}} @main()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0)
+// HCHECK: call void @[[OFFL1:.+]]()
+// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// HCHECK: ret
+
+// HCHECK: define{{.*}} void @[[OFFL1]]()
+// TCHECK: define{{.*}} void @[[OFFL1:.+]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK: {{.+}} = alloca i32,
+// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call void @[[TOFFL1:.+]]()
+// HCHECK: ret
+
+// HCHECK: define {{.*}}void @[[TOFFL1]]()
+// TCHECK: define void @[[TOFFL1:.+]]()
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
+// Skip global, bound tid and loop vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// prev lb and ub
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// iter variables
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i32,
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+
+// private(s_arr)
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
+// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
+// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
+
+// private(var)
+// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
+// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
+// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,93 @@
+// add -fopenmp-targets
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+void foo();
+
+struct S {
+ intptr_t a, b, c;
+ S(intptr_t a) : a(a) {}
+ operator char() { return a; }
+ ~S() {}
+};
+
+template <typename T>
+T tmain() {
+#pragma omp target teams distribute parallel for simd proc_bind(master)
+ for(int i = 0; i < 1000; i++) {}
+ return T();
+}
+
+int main() {
+ // CHECK-LABEL: @main
+#pragma omp target teams distribute parallel for simd proc_bind(spread)
+ for(int i = 0; i < 1000; i++) {}
+#pragma omp target teams distribute parallel for simd proc_bind(close)
+ for(int i = 0; i < 1000; i++) {}
+ return tmain<int>();
+}
+
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL1:@.+]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL2:@.+]]()
+// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
+// CHECK: ret i32 [[CALL_RET]]
+
+// CHECK: define{{.+}} void [[OFFL1]](
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[OFFL2]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+
+// CHECK: define{{.+}} [[TMAIN]]()
+// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call void [[OFFL3:@.+]]()
+
+// CHECK: define{{.+}} [[OFFL3]]()
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
+
+// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
+// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
+// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
+// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
+// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
+// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
+// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK: ret void
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,353 @@
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <typename T>
+T tmain() {
+ T t_var = T();
+ T vec[] = {1, 2};
+#pragma omp target teams distribute parallel for simd reduction(+: t_var)
+ for (int i = 0; i < 2; ++i) {
+ t_var += (T) i;
+ }
+ return T();
+}
+
+int main() {
+ static int sivar;
+#ifdef LAMBDA
+ // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+ // LAMBDA: call void @[[LOFFL1:.+]](
+ // LAMBDA: ret
+#pragma omp target teams distribute parallel for simd reduction(+: sivar)
+ for (int i = 0; i < 2; ++i) {
+ // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}}*{{.*}} [[SIVAR_ARG:%.+]])
+ // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+ // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA: [[SIVAR_PAR:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_PAR]])
+ // LAMBDA: ret void
+
+ // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+ // Skip global and bound tid vars
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+ // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+ // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+ // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+ // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+ // LAMBDA: call void @__kmpc_for_static_init_4(
+ // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+ // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+ // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+ // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+ // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+ // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+ // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+ // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+ // LAMBDA: ]
+ // LAMBDA: [[CASE1]]:
+ // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+ // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+ // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+ // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+ // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+ // LAMBDA: br
+ // LAMBDA: [[CASE2]]:
+ // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+ // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+ // LAMBDA: br
+
+ // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+
+ // Skip global and bound tid vars, and prev lb and ub vars
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: {{.+}} = alloca i32*,
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: alloca i{{[0-9]+}},
+ // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+ // skip loop vars
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: alloca i32,
+ // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+ // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+ // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}} [[SIVAR_ADDR]]
+ // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+ // LAMBDA: call void @__kmpc_for_static_init_4(
+ // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+ // LAMBDA: call void [[INNER_LAMBDA:@.+]](
+ // LAMBDA: call void @__kmpc_for_static_fini(
+ // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+ // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+ // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+ // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+ // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+ // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+ // LAMBDA: {{.+}}, label %[[CASE1:.+]]
+ // LAMBDA: {{.+}}, label %[[CASE2:.+]]
+ // LAMBDA: ]
+ // LAMBDA: [[CASE1]]:
+ // LAMBDA-64-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+ // LAMBDA-32-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]],
+ // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+ // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+ // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+ // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+ // LAMBDA: br
+ // LAMBDA: [[CASE2]]:
+ // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+ // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+ // LAMBDA: br
+
+ sivar += i;
+
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+
+ sivar += 4;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
+ // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+ // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+ // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
+ // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
+ }();
+ }
+ }();
+ return 0;
+#else
+#pragma omp target teams distribute parallel for simd reduction(+: sivar)
+ for (int i = 0; i < 2; ++i) {
+ sivar += i;
+ }
+ return tmain<int>();
+#endif
+}
+
+// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
+
+// CHECK: define {{.*}}i{{[0-9]+}} @main()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
+// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
+// CHECK: ret
+
+// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}}*{{.*}} [[SIVAR_ARG:%.+]])
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_VAL]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK: [[SIVAR_REF:%.+]] = load{{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
+// CHECK-64: [[SIVAR_REF:%.+]] = load {{.+}} [[SIVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
+// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
+// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call void @[[TOFFL1:.+]]({{.+}})
+// CHECK: ret
+
+// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}}*{{.*}} [[TVAR_ARG:%.+]])
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
+// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_VAL]])
+// CHECK: ret void
+
+// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+
+// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
+// Skip global and bound tid vars, and prev lb and ub vars
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: {{.+}} = alloca i32*,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
+// skip loop vars
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
+// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
+// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
+// CHECK: [[TVAR_REF:%.+]] = load {{.+}} [[TVAR_ADDR]],
+// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
+
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
+// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
+// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
+// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
+// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
+// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
+// CHECK: {{.+}}, label %[[CASE1:.+]]
+// CHECK: {{.+}}, label %[[CASE2:.+]]
+// CHECK: ]
+// CHECK: [[CASE1]]:
+// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
+// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
+// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
+// CHECK: br
+// CHECK: [[CASE2]]:
+// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK: br
+#endif
Added: cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp?rev=322515&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp Mon Jan 15 12:59:40 2018
@@ -0,0 +1,400 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+#ifdef CK1
+
+template <typename T, int X, long long Y>
+struct SS{
+ T a[X];
+ float b;
+ // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
+ int foo(void) {
+
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL1:.+]](
+ #pragma omp target teams distribute parallel for simd
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL2:.+]](
+ #pragma omp target teams distribute parallel for simd schedule(static)
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL3:.+]](
+ #pragma omp target teams distribute parallel for simd schedule(static, X/2)
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL4:.+]](
+ #pragma omp target teams distribute parallel for simd schedule(dynamic)
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+
+ // CK1: call i32 @__tgt_target_teams(
+ // CK1: call void @[[OFFL5:.+]](
+ #pragma omp target teams distribute parallel for simd schedule(dynamic, X/2)
+ for(int i = 0; i < X; i++) {
+ a[i] = (T)0;
+ }
+
+ // CK1: define internal void @[[OFFL1]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL1]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL1]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[OFFL2]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL2]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL2]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[OFFL3]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL3]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL3]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[OFFL4]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL4]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL4]]({{.+}})
+ // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+ // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+ // CK1: ret void
+
+ // CK1: define internal void @[[OFFL5]](
+ // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}},
+ // CK1: ret void
+
+ // CK1: define internal void @[[OUTL5]]({{.+}})
+ // CK1: call void @__kmpc_for_static_init_4(
+ // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+ // CK1: call void @__kmpc_for_static_fini(
+ // CK1: ret void
+
+ // CK1: define internal void @[[PAR_OUTL5]]({{.+}})
+ // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+ // CK1: call {{.+}} @__kmpc_dispatch_next_4(
+ // CK1: ret void
+
+ return a[0];
+ }
+};
+
+int teams_template_struct(void) {
+ SS<int, 123, 456> V;
+ return V.foo();
+
+}
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+#ifdef CK2
+
+template <typename T, int n>
+int tmain(T argc) {
+ T a[n];
+ int m = 10;
+#pragma omp target teams distribute parallel for simd
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+#pragma omp target teams distribute parallel for simd schedule(static)
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+#pragma omp target teams distribute parallel for simd schedule(static, m)
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+#pragma omp target teams distribute parallel for simd schedule(dynamic)
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+#pragma omp target teams distribute parallel for simd schedule(dynamic, m)
+ for(int i = 0; i < n; i++) {
+ a[i] = (T)0;
+ }
+ return 0;
+}
+
+int main (int argc, char **argv) {
+ int n = 100;
+ int a[n];
+ int m = 10;
+#pragma omp target teams distribute parallel for simd
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+#pragma omp target teams distribute parallel for simd dist_schedule(static)
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+#pragma omp target teams distribute parallel for simd dist_schedule(static, m)
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+#pragma omp target teams distribute parallel for simd schedule(dynamic)
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+#pragma omp target teams distribute parallel for simd schedule(dynamic, m)
+ for(int i = 0; i < n; i++) {
+ a[i] = 0;
+ }
+ return tmain<int, 10>(argc);
+}
+
+// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFL5:.+]]({{.+}})
+// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
+// CK2: ret
+
+// CK2: define {{.*}}void @[[OFFL1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFL4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+
+// CK2: define {{.*}}void @[[OFFL5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTL5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTL5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT1:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT2:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT3:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT4:.+]]({{.+}})
+// CK2: call i32 @__tgt_target_teams(
+// CK2: call void @[[OFFLT5:.+]]({{.+}})
+// CK2: ret
+// CK2-NEXT: }
+
+// CK2: define {{.*}}void @[[OFFLT1]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFLT2]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFLT3]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFLT4]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT4]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+// CK2: define {{.*}}void @[[OFFLT5]]({{.+}})
+// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}},
+// CK2: ret void
+
+// CK2: define internal void @[[OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_for_static_init_4(
+// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to
+// CK2: call void @__kmpc_for_static_fini(
+// CK2: ret void
+
+// CK2: define internal void @[[PAR_OUTLT5]]({{.+}})
+// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
+// CK2: call {{.+}} @__kmpc_dispatch_next_4(
+// CK2: ret void
+
+#endif // CK2
+#endif // #ifndef HEADER
More information about the cfe-commits
mailing list