r326368 - [OpenMP] Extend NVPTX SPMD implementation of combined constructs
Carlo Bertolli via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 28 12:48:35 PST 2018
Author: cbertol
Date: Wed Feb 28 12:48:35 2018
New Revision: 326368
URL: http://llvm.org/viewvc/llvm-project?rev=326368&view=rev
Log:
[OpenMP] Extend NVPTX SPMD implementation of combined constructs
Differential Revision: https://reviews.llvm.org/D43852
This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag.
Added:
cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
Modified:
cfe/trunk/include/clang/Basic/LangOptions.def
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp
cfe/trunk/test/OpenMP/target_parallel_for_debug_codegen.cpp
Modified: cfe/trunk/include/clang/Basic/LangOptions.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/LangOptions.def (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.def Wed Feb 28 12:48:35 2018
@@ -197,6 +197,7 @@ LANGOPT(OpenMP , 32, 0, "Open
LANGOPT(OpenMPSimd , 1, 0, "Use SIMD only OpenMP support.")
LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
+LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode")
LANGOPT(RenderScript , 1, 0, "RenderScript")
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
Modified: cfe/trunk/include/clang/Driver/Options.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Wed Feb 28 12:48:35 2018
@@ -1424,6 +1424,8 @@ def fnoopenmp_relocatable_target : Flag<
def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">;
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Wed Feb 28 12:48:35 2018
@@ -271,21 +271,10 @@ bool CGOpenMPRuntimeNVPTX::isInSpmdExecu
}
static CGOpenMPRuntimeNVPTX::ExecutionMode
-getExecutionModeForDirective(CodeGenModule &CGM,
- const OMPExecutableDirective &D) {
- OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
- switch (DirectiveKind) {
- case OMPD_target:
- case OMPD_target_teams:
- return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
- case OMPD_target_parallel:
- case OMPD_target_parallel_for:
- case OMPD_target_parallel_for_simd:
- return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
- default:
- llvm_unreachable("Unsupported directive on NVPTX device.");
- }
- llvm_unreachable("Unsupported directive on NVPTX device.");
+getExecutionMode(CodeGenModule &CGM) {
+ return CGM.getLangOpts().OpenMPCUDAMode
+ ? CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd
+ : CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
}
void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
@@ -819,8 +808,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOut
assert(!ParentName.empty() && "Invalid target region parent name!");
- CGOpenMPRuntimeNVPTX::ExecutionMode Mode =
- getExecutionModeForDirective(CGM, D);
+ CGOpenMPRuntimeNVPTX::ExecutionMode Mode = getExecutionMode(CGM);
switch (Mode) {
case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic:
emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
@@ -1051,10 +1039,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParal
// TODO: Do something with IfCond when support for the 'if' clause
// is added on Spmd target directives.
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Wed Feb 28 12:48:35 2018
@@ -4260,6 +4260,7 @@ void CodeGenFunction::EmitOMPTeamsDistri
static void emitTargetTeamsDistributeParallelForRegion(
CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
PrePostActionTy &Action) {
+ Action.Enter(CGF);
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
@@ -4310,6 +4311,7 @@ static void emitTargetTeamsDistributePar
CodeGenFunction &CGF,
const OMPTargetTeamsDistributeParallelForSimdDirective &S,
PrePostActionTy &Action) {
+ Action.Enter(CGF);
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Wed Feb 28 12:48:35 2018
@@ -3970,6 +3970,11 @@ void Clang::ConstructJob(Compilation &C,
options::OPT_fnoopenmp_use_tls, /*Default=*/true))
CmdArgs.push_back("-fnoopenmp-use-tls");
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
+
+ // When in OpenMP offloading mode with NVPTX target, forward
+ // cuda-mode flag
+ Args.AddLastArg(CmdArgs, options::OPT_fopenmp_cuda_mode,
+ options::OPT_fno_openmp_cuda_mode);
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Wed Feb 28 12:48:35 2018
@@ -2526,6 +2526,10 @@ static void ParseLangArgs(LangOptions &O
<< Opts.OMPHostIRFile;
}
+ // set CUDA mode for OpenMP target NVPTX if specified in options
+ Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() &&
+ Args.hasArg(options::OPT_fopenmp_cuda_mode);
+
// Record whether the __DEPRECATED define was requested.
Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro,
OPT_fno_deprecated_macro,
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -62,7 +62,7 @@ int bar(int n){
// CHECK: br label {{%?}}[[EXEC:.+]]
//
// CHECK: [[EXEC]]
- // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]])
+ // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
@@ -104,7 +104,7 @@ int bar(int n){
// CHECK: br label {{%?}}[[EXEC:.+]]
//
// CHECK: [[EXEC]]
- // CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
+ // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -51,7 +51,7 @@ int bar(int n){
//
// CHECK: [[EXEC]]
// CHECK-NOT: call void @__kmpc_push_num_threads
- // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]])
+ // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]])
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
@@ -94,7 +94,7 @@ int bar(int n){
//
// CHECK: [[EXEC]]
// CHECK-NOT: call void @__kmpc_push_num_threads
- // CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
+ // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]])
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -52,7 +52,7 @@ int bar(int n){
//
// CHECK: [[EXEC]]
// CHECK-NOT: call void @__kmpc_push_proc_bind
- // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+ // CHECK: {{call|invoke}} void [[OP1:@.+]](
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
@@ -73,7 +73,7 @@ int bar(int n){
//
// CHECK: [[EXEC]]
// CHECK-NOT: call void @__kmpc_push_proc_bind
- // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+ // CHECK: {{call|invoke}} void [[OP1:@.+]](
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
@@ -93,7 +93,7 @@ int bar(int n){
//
// CHECK: [[EXEC]]
// CHECK-NOT: call void @__kmpc_push_proc_bind
- // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+ // CHECK: {{call|invoke}} void [[OP1:@.+]](
// CHECK: br label {{%?}}[[DONE:.+]]
//
// CHECK: [[DONE]]
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Added: cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp?rev=326368&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -0,0 +1,74 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l24}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0
+
+#define N 1000
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a[N];
+ short aa[N];
+ tx b[10];
+
+ #pragma omp target simd
+ for(int i = 0; i < n; i++) {
+ a[i] = 1;
+ }
+
+ #pragma omp target simd
+ for(int i = 0; i < n; i++) {
+ aa[i] += 1;
+ }
+
+ #pragma omp target simd
+ for(int i = 0; i < 10; i++) {
+ b[i] += 1;
+ }
+
+ return a[0];
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l24}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l29}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l34}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+#endif
Added: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp?rev=326368&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -0,0 +1,123 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+
+#define N 1000
+#define M 10
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a[N];
+ short aa[N];
+ tx b[10];
+ tx c[M][M];
+ tx f = n;
+ tx l;
+ int k;
+
+#pragma omp target teams distribute parallel for lastprivate(l) dist_schedule(static,128) schedule(static,32)
+ for(int i = 0; i < n; i++) {
+ a[i] = 1;
+ l = i;
+ }
+
+ #pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64)
+ for(int i = 0; i < n; i++) {
+ aa[i] += 1;
+ }
+
+#pragma omp target teams distribute parallel for map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
+ for(int i = 0; i < 10; i++) {
+ b[i] += 1;
+ }
+
+#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M)
+ for(int i = 0; i < M; i++) {
+ for(int j = 0; j < M; j++) {
+ k = M;
+ c[i][j] = i+j*f+k;
+ }
+ }
+
+ return a[0];
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
+// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL1]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL2]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL3]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL4]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
Added: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp?rev=326368&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -0,0 +1,123 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+
+#define N 1000
+#define M 10
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a[N];
+ short aa[N];
+ tx b[10];
+ tx c[M][M];
+ tx f = n;
+ tx l;
+ int k;
+
+#pragma omp target teams distribute parallel for simd lastprivate(l) dist_schedule(static,128) schedule(static,32)
+ for(int i = 0; i < n; i++) {
+ a[i] = 1;
+ l = i;
+ }
+
+ #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64)
+ for(int i = 0; i < n; i++) {
+ aa[i] += 1;
+ }
+
+#pragma omp target teams distribute parallel for simd map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread)
+ for(int i = 0; i < 10; i++) {
+ b[i] += 1;
+ }
+
+#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M)
+ for(int i = 0; i < M; i++) {
+ for(int j = 0; j < M; j++) {
+ k = M;
+ c[i][j] = i+j*f+k;
+ }
+ }
+
+ return a[0];
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
+// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL1]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL2]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL3]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define internal void [[OUTL4]](
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+#endif
Added: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp?rev=326368&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -0,0 +1,99 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
+
+#define N 1000
+#define M 10
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a[N];
+ short aa[N];
+ tx b[10];
+ tx c[M][M];
+ tx f = n;
+ tx l;
+ int k;
+
+#pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128)
+ for(int i = 0; i < n; i++) {
+ a[i] = 1;
+ l = i;
+ }
+
+ #pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64)
+ for(int i = 0; i < n; i++) {
+ aa[i] += 1;
+ }
+
+#pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40)
+ for(int i = 0; i < 10; i++) {
+ b[i] += 1;
+ }
+
+#pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k)
+ for(int i = 0; i < M; i++) {
+ for(int j = 0; j < M; j++) {
+ k = M;
+ c[i][j] = i+j*f+k;
+ }
+ }
+
+ return a[0];
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92,
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
+// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+#endif
Modified: cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
// expected-no-diagnostics
int main() {
Modified: cfe/trunk/test/OpenMP/target_parallel_for_debug_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_parallel_for_debug_codegen.cpp?rev=326368&r1=326367&r2=326368&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_parallel_for_debug_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/target_parallel_for_debug_codegen.cpp Wed Feb 28 12:48:35 2018
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
// expected-no-diagnostics
int main() {
More information about the cfe-commits
mailing list