r361269 - [OPENMP][NVPTX]Mark more functions as always_inline for better
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Tue May 21 08:11:58 PDT 2019
Author: abataev
Date: Tue May 21 08:11:58 2019
New Revision: 361269
URL: http://llvm.org/viewvc/llvm-project?rev=361269&view=rev
Log:
[OPENMP][NVPTX]Mark more functions as always_inline for better
performance.
Internally generated functions must be marked as always_inlines in most
cases. Patch marks some extra reduction function + outlined parallel
functions as always_inline for better performance, but only if the
optimization is requested.
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp
cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp
cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp
cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp
cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue May 21 08:11:58 2019
@@ -1274,9 +1274,11 @@ emitCombinerOrInitializer(CodeGenModule
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
Name, &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
- Fn->removeFnAttr(llvm::Attribute::NoInline);
- Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
- Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+ if (CGM.getLangOpts().Optimize) {
+ Fn->removeFnAttr(llvm::Attribute::NoInline);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
@@ -4671,9 +4673,11 @@ emitTaskPrivateMappingFunction(CodeGenMo
&CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
TaskPrivatesMapFnInfo);
- TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
- TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
- TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
+ if (CGM.getLangOpts().Optimize) {
+ TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
+ TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
+ TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
TaskPrivatesMapFnInfo, Args, Loc, Loc);
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Tue May 21 08:11:58 2019
@@ -1929,6 +1929,11 @@ llvm::Function *CGOpenMPRuntimeNVPTX::em
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
+ if (CGM.getLangOpts().Optimize) {
+ OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+ OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
@@ -2045,9 +2050,11 @@ llvm::Function *CGOpenMPRuntimeNVPTX::em
CodeGen.setAction(Action);
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
- OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
- OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
- OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ if (CGM.getLangOpts().Optimize) {
+ OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+ OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
return OutlinedFun;
}
@@ -3422,6 +3429,12 @@ static llvm::Function *emitShuffleAndRed
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setDoesNotRecurse();
+ if (CGM.getLangOpts().Optimize) {
+ Fn->removeFnAttr(llvm::Attribute::NoInline);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
Modified: cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp (original)
+++ cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp Tue May 21 08:11:58 2019
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s
// expected-no-diagnostics
int foo(int &a) { return a; }
Modified: cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp (original)
+++ cfe/trunk/test/OpenMP/for_reduction_codegen_UDR.cpp Tue May 21 08:11:58 2019
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
@@ -52,9 +52,8 @@ void init_plus(BaseS1&, const BaseS1&);
// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { %{{[^,]+}}, %{{[^,]+}}, float }
// CHECK-DAG: [[S_INT_TY:%.+]] = type { %{{[^,]+}}, %{{[^,]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
-// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8*
// CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
#pragma omp declare reduction(operator&& : int : omp_out = 111 & omp_in)
@@ -173,13 +172,13 @@ int main() {
// CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
// For + reduction operation initial value of private variable is -1.
-// CHECK: store float -1.0{{.+}}, float*
+// CHECK: call void [[RED_INIT1:@.+]](float* %{{.+}}, float* %{{.+}})
// For & reduction operation initial value of private variable is defined by call of 'init()' function.
-// CHECK: call {{.*}}void @_Z4initR6BaseS1RKS_(
+// CHECK: call void [[RED_INIT2:@.+]](
// For && reduction operation initial value of private variable is 1.0.
-// CHECK: call {{.*}}void @_Z5init1R6BaseS1RKS_(
+// CHECK: call void [[RED_INIT3:@.+]](
// For min reduction operation initial value of private variable is largest repesentable value.
// CHECK: [[INIT:%.+]] = load float, float* @
@@ -219,16 +218,16 @@ int main() {
// case 1:
// t_var += t_var_reduction;
-// CHECK: fsub float 2.220000e+02, %
+// CHECK: call void [[RED_COMB1:@.+]](float* %{{.+}}, float* %{{.+}})
// var = var.operator &(var_reduction);
-// CHECK: call {{.*}}void @_Z3redR6BaseS1RKS_(
+// CHECK: call void [[RED_COMB2:@.+]](
// var1 = var1.operator &&(var1_reduction);
-// CHECK: fmul float
+// CHECK: call void [[RED_COMB3:@.+]](
// t_var1 = min(t_var1, t_var1_reduction);
-// CHECK: fadd float 5.550000e+02, %
+// CHECK: call void [[RED_COMB4:@.+]](
// __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
// CHECK: call void @__kmpc_end_reduce(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
@@ -239,22 +238,22 @@ int main() {
// case 2:
// t_var += t_var_reduction;
// CHECK: call void @__kmpc_critical(
-// CHECK: fsub float 2.220000e+02, %
+// CHECK: call void [[RED_COMB1]](float* %{{.+}}, float* %{{.+}})
// CHECK: call void @__kmpc_end_critical(
// var = var.operator &(var_reduction);
// CHECK: call void @__kmpc_critical(
-// CHECK: call {{.*}}void @_Z3redR6BaseS1RKS_(
+// CHECK: call void [[RED_COMB2]]
// CHECK: call void @__kmpc_end_critical(
// var1 = var1.operator &&(var1_reduction);
// CHECK: call void @__kmpc_critical(
-// CHECK: fmul float
+// CHECK: call void [[RED_COMB3]]
// CHECK: call void @__kmpc_end_critical(
// t_var1 = min(t_var1, t_var1_reduction);
// CHECK: call void @__kmpc_critical(
-// CHECK: fadd float 5.550000e+02, %
+// CHECK: call void [[RED_COMB4]]
// CHECK: call void @__kmpc_end_critical(
// __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
@@ -269,6 +268,24 @@ int main() {
// CHECK: ret void
+// CHECK: define internal void [[RED_COMB1]](float* noalias, float* noalias)
+// CHECK: fsub float 2.220000e+02, %
+
+// CHECK: define internal void [[RED_INIT1]](float* noalias, float* noalias)
+// CHECK: store float -1.0{{.+}}, float*
+
+// CHECK: define internal void [[RED_COMB2]](
+// CHECK: call {{.*}}void @_Z3redR6BaseS1RKS_(
+
+// CHECK: define internal void [[RED_INIT2]](
+// CHECK: call {{.*}}void @_Z4initR6BaseS1RKS_(
+
+// CHECK: define internal void [[RED_COMB3]](
+// CHECK: fmul float
+
+// CHECK: define internal void [[RED_INIT3]](
+// CHECK: call {{.*}}void @_Z5init1R6BaseS1RKS_(
+
// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
// ...
@@ -313,18 +330,21 @@ int main() {
// CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to float*
// t_var_lhs += t_var_rhs;
-// CHECK: fsub float 2.220000e+02, %
+// CHECK: call void [[RED_COMB1]](float* %{{.+}}, float* %{{.+}})
// var_lhs = var_lhs.operator &(var_rhs);
-// CHECK: call {{.*}}void @_Z3redR6BaseS1RKS_(
+// CHECK: call void [[RED_COMB2]](
// var1_lhs = var1_lhs.operator &&(var1_rhs);
-// CHECK: fmul float
+// CHECK: call void [[RED_COMB3]](
// t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
-// CHECK: fadd float 5.550000e+02, %
+// CHECK: call void [[RED_COMB4]](
// CHECK: ret void
+// CHECK: define internal void [[RED_COMB4]](
+// CHECK: fadd float 5.550000e+02, %
+
// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}})
// Reduction list for runtime.
@@ -348,7 +368,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_PRIV]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
-// CHECK: store i32 888, i32* %
+// CHECK: call void [[RED_INIT5:@.+]](i32* %{{.+}}, i32* %{{.+}})
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -359,7 +379,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_PRIV]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
-// CHECK: call void @_Z4initR6BaseS1RKS_(%
+// CHECK: call void [[RED_INIT2]](
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -405,8 +425,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
-// CHECK: [[ADD:%.+]] = mul nsw i32 555, %
-// CHECK: store i32 [[ADD]], i32* %
+// CHECK: call void [[RED_COMB5:@.+]](i32* %{{.+}}, i32* %{{.+}})
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -415,7 +434,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -434,7 +453,7 @@ int main() {
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
// CHECK: call void @__kmpc_critical(
-// CHECK: [[ADD:%.+]] = mul nsw i32 555, %
+// CHECK: call void [[RED_COMB5]](
// CHECK: call void @__kmpc_end_critical(
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -445,7 +464,7 @@ int main() {
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
// CHECK: call void @__kmpc_critical(
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: call void @__kmpc_end_critical(
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -466,6 +485,12 @@ int main() {
// CHECK: ret void
+// CHECK: define internal void [[RED_COMB5]](i32* noalias, i32* noalias)
+// CHECK: mul nsw i32 555, %
+
+// CHECK: define internal void [[RED_INIT5]](i32* noalias, i32* noalias)
+// CHECK: store i32 888, i32* %
+
// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
// ...
@@ -506,7 +531,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_LHS]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
-// CHECK: [[ADD:%.+]] = mul nsw i32 555, %
+// CHECK: call void [[RED_COMB5]](
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -515,7 +540,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -539,7 +564,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_PRIV]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
-// CHECK: store i32 888, i32* %
+// CHECK: call void [[RED_INIT5]](
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -550,7 +575,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[BEGIN]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
-// CHECK: call void @_Z4initR6BaseS1RKS_(%
+// CHECK: call void [[RED_INIT2]](
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
@@ -595,8 +620,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
-// CHECK: [[ADD:%[^ ]+]] = mul nsw i32 555, %
-// CHECK: store i32 [[ADD]], i32* %
+// CHECK: call void [[RED_COMB5]](
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -605,7 +629,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[LHS_BEGIN]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -624,7 +648,7 @@ int main() {
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
// CHECK: call void @__kmpc_critical(
-// CHECK: [[ADD:%.+]] = mul nsw i32 555, %
+// CHECK: call void [[RED_COMB5]](
// CHECK: call void @__kmpc_end_critical(
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -635,7 +659,7 @@ int main() {
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
// CHECK: call void @__kmpc_critical(
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: call void @__kmpc_end_critical(
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -692,8 +716,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_LHS]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi i32*
-// CHECK: [[ADD:%.+]] = mul nsw i32 555, %
-// CHECK: store i32 [[ADD]], i32* %
+// CHECK: call void [[RED_COMB5]](
// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -702,7 +725,7 @@ int main() {
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
// CHECK: phi [[S_FLOAT_TY]]*
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
@@ -843,16 +866,16 @@ int main() {
// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
// For + reduction operation initial value of private variable is 0.
-// CHECK: store i32 321, i32* %
+// CHECK: call void [[RED_INIT6:@.+]](
// For & reduction operation initial value of private variable is ones in all bits.
-// CHECK: call void @_Z4initR6BaseS1RKS_(
+// CHECK: call void [[RED_INIT2:@.+]](
// For && reduction operation initial value of private variable is 1.0.
-// CHECK: call void @_Z5init2R6BaseS1RKS_(
+// CHECK: call void [[RED_INIT7:@.+]](
// For min reduction operation initial value of private variable is largest repesentable value.
-// CHECK: sdiv i32 432, %
+// CHECK: call void [[RED_INIT8:@.+]](
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
@@ -888,16 +911,16 @@ int main() {
// case 1:
// t_var += t_var_reduction;
-// CHECK: add nsw i32 1513, %
+// CHECK: call void [[RED_COMB6:@.+]](
// var = var.operator &(var_reduction);
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// var1 = var1.operator &&(var1_reduction);
-// CHECK: mul nsw i32 17, %
+// CHECK: call void [[RED_COMB7:@.+]](
// t_var1 = min(t_var1, t_var1_reduction);
-// CHECK: sub nsw i32 47, %
+// CHECK: call void [[RED_COMB8:@.+]](
// __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>);
// CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
@@ -908,22 +931,22 @@ int main() {
// case 2:
// t_var += t_var_reduction;
// CHECK: call void @__kmpc_critical(
-// CHECK: add nsw i32 1513, %
+// CHECK: call void [[RED_COMB6]](
// CHECK: call void @__kmpc_end_critical(
// var = var.operator &(var_reduction);
// CHECK: call void @__kmpc_critical(
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// CHECK: call void @__kmpc_end_critical(
// var1 = var1.operator &&(var1_reduction);
// CHECK: call void @__kmpc_critical(
-// CHECK: mul nsw i32 17, %
+// CHECK: call void [[RED_COMB7]](
// CHECK: call void @__kmpc_end_critical(
// t_var1 = min(t_var1, t_var1_reduction);
// CHECK: call void @__kmpc_critical(
-// CHECK: sub nsw i32 47, %
+// CHECK: call void [[RED_COMB8]](
// CHECK: call void @__kmpc_end_critical(
// break;
@@ -933,6 +956,24 @@ int main() {
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
// CHECK: ret void
+// CHECK: define internal void [[RED_COMB6]](i32* noalias, i32* noalias)
+// CHECK: add nsw i32 1513, %
+
+// CHECK: define internal void [[RED_INIT6]](i32* noalias, i32* noalias)
+// CHECK: store i32 321, i32* %
+
+// CHECK: define internal void [[RED_COMB7]](
+// CHECK: mul nsw i32 17, %
+
+// CHECK: define internal void [[RED_INIT7]](
+// CHECK: call void @_Z5init2R6BaseS1RKS_(
+
+// CHECK: define internal void [[RED_COMB8]](i32* noalias, i32* noalias)
+// CHECK: sub nsw i32 47, %
+
+// CHECK: define internal void [[RED_INIT8]](i32* noalias, i32* noalias)
+// CHECK: sdiv i32 432, %
+
// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
// *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
// ...
@@ -977,16 +1018,16 @@ int main() {
// CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}*
// t_var_lhs += t_var_rhs;
-// CHECK: add nsw i32 1513, %
+// CHECK: call void [[RED_COMB6]](
// var_lhs = var_lhs.operator &(var_rhs);
-// CHECK: call void @_Z3redR6BaseS1RKS_(%
+// CHECK: call void [[RED_COMB2]](
// var1_lhs = var1_lhs.operator &&(var1_rhs);
-// CHECK: mul nsw i32 17, %
+// CHECK: call void [[RED_COMB7]](
// t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
-// CHECK: sub nsw i32 47, %
+// CHECK: call void [[RED_COMB8]](
// CHECK: ret void
// CHECK: define internal void [[TMAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [42 x [[S_INT_TY]]]* dereferenceable(504) %{{.*}}, [2 x i32]* dereferenceable(8) %{{.*}}, i32* dereferenceable(4) %{{.*}}, [2 x [[S_INT_TY]]]* dereferenceable(24) %{{.*}}, [[S_INT_TY]]* dereferenceable(12) %{{.*}})
Modified: cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_allocate_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc -o %t-host.bc %s
-// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - -disable-llvm-optzns | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_data_sharing.cpp Tue May 21 08:11:58 2019
@@ -2,7 +2,7 @@
///==========================================================================///
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CK1
// expected-no-diagnostics
Modified: cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -25,12 +25,16 @@ int main(int argc, char **argv) {
// CHECK-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
// CHECK-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40
+// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
// CHECK-DAG: @__omp_offloading_{{.*}}_main_l17_exec_mode = weak constant i8 0
// CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}})
-// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 40, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
+// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
// CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
-// CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty*
+// CHECK: [[GEP:%.+]] = getelementptr inbounds i8, i8* [[PTR]], i{{64|32}} 0
+// CHECK: [[STACK:%.+]] = bitcast i8* [[GEP]] to %struct._globalized_locals_ty*
// CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0
// CHECK-NOT: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]],
// CHECK: call void @__kmpc_for_static_init_4(
@@ -39,7 +43,8 @@ int main(int argc, char **argv) {
// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 1)
+// CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[SHARED]])
// CHECK: define internal void [[PARALLEL]](
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
Modified: cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_lambda_capturing.cpp Tue May 21 08:11:58 2019
@@ -26,11 +26,11 @@
// CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* }
// CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* }
-// CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72_worker()
-// CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}})
+// CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67_worker()
+// CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}})
// CLASS-NOT: getelementptr
// CLASS: br i1 %
-// CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l72_worker()
+// CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67_worker()
// CLASS: br label %
// CLASS: br i1 %
// CLASS: call void @__kmpc_kernel_init(
@@ -43,12 +43,7 @@
// CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]])
// CLASS: ret void
-// CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l74([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
-// CLASS-NOT: getelementptr
-// CLASS: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* %{{.+}})
-// CLASS: ret void
-
-// CLASS: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
+// CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l69([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
// CLASS-NOT: getelementptr
// CLASS: call void @llvm.memcpy.
// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
@@ -77,11 +72,11 @@ struct S {
}
} s;
-// FUN: define internal void @__omp_offloading_{{.+}}_main_l134_worker()
-// FUN: define weak void @__omp_offloading_{{.+}}_main_l134(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}})
+// FUN: define internal void @__omp_offloading_{{.+}}_main_l124_worker()
+// FUN: define weak void @__omp_offloading_{{.+}}_main_l124(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}})
// FUN-NOT: getelementptr
// FUN: br i1 %
-// FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l134_worker()
+// FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l124_worker()
// FUN: br label %
// FUN: br i1 %
// FUN: call void @__kmpc_kernel_init(
@@ -98,19 +93,14 @@ struct S {
// FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
// FUN: store i32* %{{.+}}, i32** [[A_CAP]],
-// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
// FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]])
// FUN: ret void
-// FUN: define weak void @__omp_offloading_{{.+}}_main_l136(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
-// FUN-NOT: getelementptr
-// FUN: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, [[CAP2]]* %{{.+}})
-// FUN: ret void
-
-// FUN: define internal void [[PARALLEL:@.+]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
+// FUN: define weak void @__omp_offloading_{{.+}}_main_l126(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
// FUN-NOT: getelementptr
// FUN: call void @llvm.memcpy.
-// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
Modified: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_for_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,6 +1,6 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp Tue May 21 08:11:58 2019
@@ -1,9 +1,9 @@
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp Tue May 21 08:11:58 2019
@@ -127,9 +127,7 @@ int bar(int n){
// CHECK: [[ACP:%.+]] = bitcast i[[SZ]]* [[AC:%.+]] to i8*
// CHECK: store i8 [[A_VAL]], i8* [[ACP]], align
// CHECK: [[ACV:%.+]] = load i[[SZ]], i[[SZ]]* [[AC]], align
- // CHECK: store i[[SZ]] [[ACV]], i[[SZ]]* [[A_ADDR_T:%.+]], align
- // CHECK: [[CONV2:%.+]] = bitcast i[[SZ]]* [[A_ADDR_T]] to i8*
- // CHECK: store i8 49, i8* [[CONV2]], align
+ // CHECK: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] [[ACV]])
// CHECK: br label {{%?}}[[TERMINATE:.+]]
//
// CHECK: [[TERMINATE]]
@@ -140,10 +138,12 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
-
-
-
-
+ // CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i[[SZ]] [[A_VAL:%.+]])
+ // CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]],
+ // CHECK: store i[[SZ]] [[A_VAL]], i[[SZ]]* [[A_ADDR]],
+ // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i8*
+ // CHECK: store i8 49, i8* [[CONV]],
+ // CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l32}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
@@ -213,9 +213,7 @@ int bar(int n){
// CHECK: [[ACP:%.+]] = bitcast i[[SZ]]* [[AC:%.+]] to i16*
// CHECK: store i16 [[AA_VAL]], i16* [[ACP]], align
// CHECK: [[ACV:%.+]] = load i[[SZ]], i[[SZ]]* [[AC]], align
- // CHECK: store i[[SZ]] [[ACV]], i[[SZ]]* [[AA_ADDR_T:%.+]], align
- // CHECK: [[CONV2:%.+]] = bitcast i[[SZ]]* [[AA_ADDR_T]] to i16*
- // CHECK: store i16 1, i16* [[CONV2]], align
+ // CHECK: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] [[ACV]])
// CHECK: br label {{%?}}[[TERMINATE:.+]]
//
// CHECK: [[TERMINATE]]
@@ -226,24 +224,35 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
+ // CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i[[SZ]] [[A_VAL:%.+]])
+ // CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]],
+ // CHECK: store i[[SZ]] [[A_VAL]], i[[SZ]]* [[A_ADDR]],
+ // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i16*
+ // CHECK: store i16 1, i16* [[CONV]],
+ // CHECK: ret void
+
// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 0)
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
// CHECK-NOT: call void @__kmpc_serialized_parallel(
-// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
+// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.+}})
// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: ret
-// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
-// CHECK: call void @__kmpc_serialized_parallel(
+// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i[[SZ]] %{{.+}})
// CHECK: call void [[L1:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.+}})
-// CHECK: call void @__kmpc_end_serialized_parallel(
// CHECK: ret void
// CHECK: define internal void [[L1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L2:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.+}})
+// CHECK: call void @__kmpc_end_serialized_parallel(
+// CHECK: ret void
+
+// CHECK: define internal void [[L2]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
// CHECK: store i16 1, i16* %
// CHECK: ret void
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp Tue May 21 08:11:58 2019
@@ -9,8 +9,9 @@
#define HEADER
// CHECK: [[MEM_TY:%.+]] = type { [128 x i8] }
-// CHECK-DAG: {{@__omp_offloading_.+}}_l19_exec_mode = weak constant i8 1
-// CHECK-DAG: internal unnamed_addr constant i{{64|32}} 4
+// CHECK-DAG: {{@__omp_offloading_.+}}_l20_exec_mode = weak constant i8 1
+// CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
+// CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
template<typename tx>
tx ftemplate(int n) {
@@ -34,10 +35,10 @@ int bar(int n){
return a;
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l19}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l20}}_worker()
// CHECK: ret void
- // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l19}}()
+ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l20}}()
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
@@ -47,7 +48,7 @@ int bar(int n){
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
- // CHECK: {{call|invoke}} void {{@__omp_offloading_.+template.+l19}}_worker()
+ // CHECK: {{call|invoke}} void {{@__omp_offloading_.+template.+l20}}_worker()
// CHECK: br label {{%?}}[[EXIT:.+]]
//
// CHECK: [[CHECK_MASTER]]
@@ -62,29 +63,35 @@ int bar(int n){
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
- // CHECK: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* @{{.+}}, i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[BUF:@.+]] to i8**))
+ // CHECK: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}})
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
+ //
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit(
+ // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
+ // CHECK: br label {{%?}}[[EXIT]]
+ //
+ // CHECK: [[EXIT]]
+ // CHECK: ret void
+
+ // CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+ // CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+ // CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
+ // CHECK: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* @{{.+}}, i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[SHARED]], i8** addrspacecast (i8* addrspace(3)* [[BUF:@.+]] to i8**))
// CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[BUF]],
- // CHECK: [[RD:%.+]] = bitcast i8* [[PTR]] to [[GLOB_TY:%.+]]*
+ // CHECK: [[ADDR:%.+]] = getelementptr inbounds i8, i8* [[PTR]], i{{64|32}} 0
+ // CHECK: [[RD:%.+]] = bitcast i8* [[ADDR]] to [[GLOB_TY:%.+]]*
// CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0
//
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
// CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1)
// CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]],
+ // CHECK: [[VARS_BUF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED_VARS_BUF]], i{{64|32}} 0
// CHECK: [[I_ADDR_BC:%.+]] = bitcast i32* [[I_ADDR]] to i8*
- // CHECK: store i8* [[I_ADDR_BC]], i8** [[SHARED_VARS_BUF]],
+ // CHECK: store i8* [[I_ADDR_BC]], i8** [[VARS_BUF]],
// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
// CHECK: call void @__kmpc_end_sharing_variables()
// CHECK: call void @__kmpc_for_static_fini(
- // CHECK: br label {{%?}}[[TERMINATE:.+]]
- //
- // CHECK: [[TERMINATE]]
- // CHECK: call void @__kmpc_kernel_deinit(
- // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
- // CHECK: br label {{%?}}[[EXIT]]
- //
- // CHECK: [[EXIT]]
- // CHECK: ret void
-
#endif
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Tue May 21 08:11:58 2019
@@ -85,15 +85,22 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l34(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void [[PARALLEL:@.+]](
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
+// CHECK: define internal void [[PARALLEL]](
+// CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
+// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
-// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
+// CHECK: [[ADDR:%.+]] = getelementptr inbounds i8, i8* [[TEAM_ALLOC]], i{{64|32}} 0
+// CHECK: [[BC:%.+]] = bitcast i8* [[ADDR]] to [[REC:%.+]]*
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 1)
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+// CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[SHARED]])
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
@@ -104,11 +111,12 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+// CHECK: ret void
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
-// CHECK: ret void
// CHECK: define internal void [[OUTL2]](
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
@@ -118,11 +126,12 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+// CHECK: ret void
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
-// CHECK: ret void
// CHECK: define internal void [[OUTL3]](
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33,
@@ -131,6 +140,11 @@ int bar(int n){
// Distribute with collapse(2)
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+// CHECK: ret void
+
// CHECK: alloca
// CHECK: alloca
// CHECK: alloca
@@ -142,8 +156,6 @@ int bar(int n){
// CHECK: [[OMP_UB:%.+]] = alloca
// CHECK: [[OMP_ST:%.+]] = alloca
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
-// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
-// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
@@ -213,8 +225,7 @@ int bar(int n){
// CHECK: [[DIST_INNER_LOOP_END]]:
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
-// CHECK: ret void
+
// CHECK-32: define internal void [[OUTL4]](
// CHECK-64: define internal void [[OUTL4]](
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp Tue May 21 08:11:58 2019
@@ -24,6 +24,8 @@ int main(int argc, char **argv) {
// CHECK: define weak void @__omp_offloading_{{.*}}_main_l16(i{{64|32}} %{{[^,].*}}, i32* dereferenceable{{[^,]*}}, i{{64|32}} %{{[^,)]*}})
// CHECK: call void @__kmpc_spmd_kernel_init(
// CHECK: [[TID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void [[PARALLEL:@.+]](i32* %{{.*}}, i32* %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.*}}, i{{64|32}} %{{.*}}, i32* %{{.*}})
@@ -32,8 +34,6 @@ int main(int argc, char **argv) {
// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* @
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
-
// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} [[ARGC:%.+]], i32* dereferenceable{{.*}})
// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
// CHECK: alloca i{{[0-9]+}},
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp Tue May 21 08:11:58 2019
@@ -71,15 +71,20 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
-// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
+// CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: [[SIZE:%.+]] = load i{{64|32}}, i{{64|32}}* [[KERNEL_SIZE]],
+// CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
// CHECK: [[TEAM_ALLOC:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
-// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
+// CHECK: [[PTR:%.+]] = getelementptr inbounds i8, i8* [[TEAM_ALLOC]], i{{64|32}} 0
+// CHECK: [[BC:%.+]] = bitcast i8* [[PTR]] to [[REC:%.+]]*
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 1)
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+// CHECK: [[SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED]],
+// CHECK: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[SHARED]])
// CHECK: ret void
// CHECK: define internal void [[OUTL1]](
@@ -90,10 +95,11 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL2:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL2]](
@@ -104,10 +110,11 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: {{call|invoke}} void [[OUTL3:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL3]](
@@ -119,11 +126,12 @@ int bar(int n){
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define internal void [[OUTL4]](
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp Tue May 21 08:11:58 2019
@@ -64,32 +64,36 @@ int bar(int n){
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l36(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l41(
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l46({{.+}}, i{{32|64}} [[F_IN:%.+]])
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK: call void @__kmpc_spmd_kernel_init(i32 %{{.+}}, i16 0, i16 0)
+// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
+
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0)
// CHECK: ret void
#endif
Modified: cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Tue May 21 08:11:58 2019
@@ -37,9 +37,6 @@ int main (int argc, char **argv) {
// only nvptx side: do not outline teams region and do not call fork_teams
// CK1: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[ARGC:%.+]])
-// CK1: {{.+}} = alloca i{{[0-9]+}}*,
-// CK1: {{.+}} = alloca i{{[0-9]+}}*,
-// CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{[0-9]+}}*,
// CK1: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
// CK1: store {{.+}} 0, {{.+}},
// CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
@@ -53,16 +50,16 @@ int main (int argc, char **argv) {
// CK1-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK1: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
-// CK1: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK1: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
-// CK1-NOT: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+// CK1: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* [[ARGCADDR]])
// CK1: ret void
// CK1-NEXT: }
+// CK1: define internal void [[OUTLINED]](
+// CK1: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
+// CK1-NOT: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+
// target region in template
// CK1: define {{.*}}void @{{[^,]+}}(i{{.+}}** [[ARGC:%.+]])
-// CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
// CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
// CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
// CK1: [[IS_SHARED:%.+]] = load i16, i16* [[KERNEL_SHARED2]],
@@ -73,13 +70,14 @@ int main (int argc, char **argv) {
// CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
-// CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
-// CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
-// CK1: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
-// CK1-NOT: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+// CK1: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}, i8*** [[ARGCADDR]])
// CK1: ret void
// CK1-NEXT: }
+// CK1: define internal void [[OUTLINED]](
+// CK1: store i{{[0-9]+}}** null, i{{[0-9]+}}*** %
+// CK1-NOT: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+
#endif // CK1
@@ -123,9 +121,6 @@ int main (int argc, char **argv) {
// CK2-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
// CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[ARGC_IN:.+]])
-// CK2: {{.}} = alloca i{{[0-9]+}}*,
-// CK2: {{.}} = alloca i{{[0-9]+}}*,
-// CK2: [[ARGCADDR_PTR:%.+]] = alloca i{{[0-9]+}}*,
// CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
@@ -145,15 +140,15 @@ int main (int argc, char **argv) {
// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
-// CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
-// CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
-// CK2-NOT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+// CK2: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* [[ARGCADDR]])
// CK2: ret
+// CK2: define internal void [[OUTLINED]](
+// CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
+// CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
+// CK2-NOT: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+
// CK2: define {{.*}}void @{{[^,]+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[BP:%.+]], i{{[0-9]+}}** [[ARGC:%.+]])
-// CK2: [[ARGCADDR_PTR:%.+]] = alloca i{{[0-9]+}}***,
// CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**,
@@ -169,12 +164,13 @@ int main (int argc, char **argv) {
// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
// CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
-// CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],
-// CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}***, i{{[0-9]+}}**** [[ARGCADDR_PTR]],
-// CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
-// CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
-// CK2-NOT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+// CK2: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}, i8*** [[ARGCADDR]])
// CK2: ret void
+// CK2: define internal void [[OUTLINED]](
+// CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** %
+// CK2-NOT: {{.+}} = call i32 @__kmpc_push_num_teams(
+// CK2-NOT: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
+
#endif // CK2
#endif
Modified: cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Tue May 21 08:11:58 2019
@@ -77,6 +77,7 @@ int bar(int n){
// CHECK: {{call|invoke}} void [[T1]]_worker()
//
// CHECK: call void @__kmpc_kernel_init(
+ // CHECK: call void @__kmpc_kernel_deinit(
//
// CHECK: store double {{[0\.e\+]+}}, double* [[E:%.+]], align
// CHECK: [[EV:%.+]] = load double, double* [[E]], align
@@ -86,7 +87,8 @@ int bar(int n){
// CHECK: [[BC:%.+]] = bitcast double* [[E]] to i8*
// CHECK: store i8* [[BC]], i8** [[GEP1]],
// CHECK: [[BC_RED_LIST:%.+]] = bitcast [1 x i8*]* [[RED_LIST]] to i8*
- // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* [[LOC:@.+]], i32 [[GTID:%.+]], i8* bitcast ([[TEAMS_REDUCE_UNION_TY]]* [[TEAMS_RED_BUFFER]] to i8*), i32 {{1024|2048}}, i8* [[BC_RED_LIST]], void (i8*, i16, i16, i16)* [[SHUFFLE_AND_REDUCE:@.+]], void (i8*, i32)* [[INTER_WARP_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_RED:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_COPY:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_RED:@.+]])
+ // CHECK: [[BUF:%.+]] = load i8*, i8** @
+ // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* [[LOC:@.+]], i32 [[GTID:%.+]], i8* [[BUF]], i32 {{1024|2048}}, i8* [[BC_RED_LIST]], void (i8*, i16, i16, i16)* [[SHUFFLE_AND_REDUCE:@.+]], void (i8*, i32)* [[INTER_WARP_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_RED:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_COPY:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_RED:@.+]])
// CHECK: [[COND:%.+]] = icmp eq i32 [[RET]], 1
// CHECK: br i1 [[COND]], label {{%?}}[[IFLABEL:.+]], label {{%?}}[[EXIT:.+]]
//
@@ -99,7 +101,6 @@ int bar(int n){
// CHECK: br label %[[EXIT]]
//
// CHECK: [[EXIT]]
- // CHECK: call void @__kmpc_kernel_deinit(
//
// Reduction function
@@ -344,6 +345,7 @@ int bar(int n){
//
// CHECK: call void @__kmpc_kernel_init(
+ // CHECK: call void @__kmpc_kernel_deinit(
//
// CHECK: store float {{1\.[0e\+]+}}, float* [[D:%.+]], align
// CHECK: [[C_VAL:%.+]] = load i8, i8* [[C:%.+]], align
@@ -360,7 +362,8 @@ int bar(int n){
// CHECK: [[BC:%.+]] = bitcast float* [[D]] to i8*
// CHECK: store i8* [[BC]], i8** [[GEP2]],
// CHECK: [[BC_RED_LIST:%.+]] = bitcast [2 x i8*]* [[RED_LIST]] to i8*
- // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* [[LOC:@.+]], i32 [[GTID:%.+]], i8* bitcast ([[TEAMS_REDUCE_UNION_TY]]* [[TEAMS_RED_BUFFER]] to i8*), i32 {{1024|2048}}, i8* [[BC_RED_LIST]], void (i8*, i16, i16, i16)* [[SHUFFLE_AND_REDUCE:@.+]], void (i8*, i32)* [[INTER_WARP_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_RED:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_COPY:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_RED:@.+]])
+ // CHECK: [[BUF:%.+]] = load i8*, i8** @
+ // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* [[LOC:@.+]], i32 [[GTID:%.+]], i8* [[BUF]], i32 {{1024|2048}}, i8* [[BC_RED_LIST]], void (i8*, i16, i16, i16)* [[SHUFFLE_AND_REDUCE:@.+]], void (i8*, i32)* [[INTER_WARP_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_RED:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_COPY:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_RED:@.+]])
// CHECK: [[COND:%.+]] = icmp eq i32 [[RET]], 1
// CHECK: br i1 [[COND]], label {{%?}}[[IFLABEL:.+]], label {{%?}}[[EXIT:.+]]
//
@@ -380,7 +383,6 @@ int bar(int n){
// CHECK: br label %[[EXIT]]
//
// CHECK: [[EXIT]]
- // CHECK: call void @__kmpc_kernel_deinit(
//
// Reduction function
@@ -706,6 +708,8 @@ int bar(int n){
//
// CHECK: call void @__kmpc_spmd_kernel_init(
// CHECK: call void @__kmpc_data_sharing_init_stack_spmd()
+ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
+
// CHECK-NOT: call void @__kmpc_get_team_static_memory
// CHECK: store i32 0,
// CHECK: store i32 0, i32* [[A_ADDR:%.+]], align
@@ -718,7 +722,8 @@ int bar(int n){
// CHECK: [[BC:%.+]] = bitcast i16* [[B_ADDR]] to i8*
// CHECK: store i8* [[BC]], i8** [[GEP2]],
// CHECK: [[BC_RED_LIST:%.+]] = bitcast [2 x i8*]* [[RED_LIST]] to i8*
- // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* [[LOC:@.+]], i32 [[GTID:%.+]], i8* bitcast ([[TEAMS_REDUCE_UNION_TY]]* [[TEAMS_RED_BUFFER]] to i8*), i32 {{1024|2048}}, i8* [[BC_RED_LIST]], void (i8*, i16, i16, i16)* [[SHUFFLE_AND_REDUCE:@.+]], void (i8*, i32)* [[INTER_WARP_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_RED:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_COPY:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_RED:@.+]])
+ // CHECK: [[BUF:%.+]] = load i8*, i8** @
+ // CHECK: [[RET:%.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* [[LOC:@.+]], i32 [[GTID:%.+]], i8* [[BUF]], i32 {{1024|2048}}, i8* [[BC_RED_LIST]], void (i8*, i16, i16, i16)* [[SHUFFLE_AND_REDUCE:@.+]], void (i8*, i32)* [[INTER_WARP_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_COPY:@.+]], void (i8*, i32, i8*)* [[RED_LIST_TO_GLOBAL_RED:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_COPY:@.+]], void (i8*, i32, i8*)* [[GLOBAL_TO_RED_LIST_RED:@.+]])
// CHECK: [[COND:%.+]] = icmp eq i32 [[RET]], 1
// CHECK: br i1 [[COND]], label {{%?}}[[IFLABEL:.+]], label {{%?}}[[EXIT:.+]]
//
@@ -749,7 +754,6 @@ int bar(int n){
// CHECK: br label %[[EXIT]]
//
// CHECK: [[EXIT]]
- // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i16* dereferenceable{{.+}})
//
Modified: cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/taskloop_reduction_codegen.cpp Tue May 21 08:11:58 2019
@@ -167,9 +167,15 @@ sum = 0.0;
// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached(
// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8**
// CHECK: load i8*, i8** [[ORIG_PTR_REF]],
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+// CHECK: call void [[OMP_INIT1:@.+]](
// CHECK: ret void
+// CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias, %struct.S* noalias)
+// CHECK: fadd float %
+
+// CHECK: define internal void [[OMP_INIT1]](%struct.S* noalias, %struct.S* noalias)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
// CHECK: define internal void @[[RED_FINI2]](i8*)
// CHECK: call i8* @__kmpc_threadprivate_cached(
// CHECK: call void @
@@ -177,8 +183,7 @@ sum = 0.0;
// CHECK: define internal void @[[RED_COMB2]](i8*, i8*)
// CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: fadd float %
-// CHECK: store float %{{.+}}, float* %
+// CHECK: call void [[OMP_COMB1]](
// CHECK: ret void
// CHECK: define internal void @[[RED_INIT3]](i8*)
Modified: cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp?rev=361269&r1=361268&r2=361269&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/taskloop_simd_reduction_codegen.cpp Tue May 21 08:11:58 2019
@@ -165,9 +165,15 @@ sum = 0.0;
// CHECK: define internal void @[[RED_INIT2]](i8*)
// CHECK: call i8* @__kmpc_threadprivate_cached(
// CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+// CHECK: call void [[OMP_INIT1:@.+]](
// CHECK: ret void
+// CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias, %struct.S* noalias)
+// CHECK: fadd float %
+
+// CHECK: define internal void [[OMP_INIT1]](%struct.S* noalias, %struct.S* noalias)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
// CHECK: define internal void @[[RED_FINI2]](i8*)
// CHECK: call i8* @__kmpc_threadprivate_cached(
// CHECK: call void @
@@ -175,8 +181,7 @@ sum = 0.0;
// CHECK: define internal void @[[RED_COMB2]](i8*, i8*)
// CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: fadd float %
-// CHECK: store float %{{.+}}, float* %
+// CHECK: call void [[OMP_COMB1]](
// CHECK: ret void
// CHECK: define internal void @[[RED_INIT3]](i8*)
More information about the cfe-commits
mailing list