r331393 - [OPENMP] Add support for reductions on simd directives in target
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Wed May 2 13:03:28 PDT 2018
Author: abataev
Date: Wed May 2 13:03:27 2018
New Revision: 331393
URL: http://llvm.org/viewvc/llvm-project?rev=331393&view=rev
Log:
[OPENMP] Add support for reductions on simd directives in target
regions.
Added codegen for `simd reduction()` constructs in target directives.
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=331393&r1=331392&r2=331393&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Wed May 2 13:03:27 2018
@@ -61,6 +61,12 @@ enum OpenMPRTLFunctionNVPTX {
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
+ /// \brief Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32
+ /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+ OMPRTL_NVPTX__kmpc_simd_reduce_nowait,
/// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
/// int32_t num_vars, size_t reduce_size, void *reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
@@ -1028,6 +1034,33 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
break;
}
+ case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: {
+ // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid,
+ // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t Algorithm Version),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait");
+ break;
+ }
case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
// Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
// int32_t num_vars, size_t reduce_size, void *reduce_data,
@@ -2703,8 +2736,8 @@ void CGOpenMPRuntimeNVPTX::emitReduction
bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
- // FIXME: Add support for simd reduction.
- assert((TeamsReduction || ParallelReduction) &&
+ bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind);
+ assert((TeamsReduction || ParallelReduction || SimdReduction) &&
"Invalid reduction selection in emitReduction.");
ASTContext &C = CGM.getContext();
@@ -2764,19 +2797,22 @@ void CGOpenMPRuntimeNVPTX::emitReduction
llvm::Value *InterWarpCopyFn =
emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
- llvm::Value *Res = nullptr;
- if (ParallelReduction) {
- llvm::Value *Args[] = {ThreadId,
- CGF.Builder.getInt32(RHSExprs.size()),
- ReductionArrayTySize,
- RL,
- ShuffleAndReduceFn,
- InterWarpCopyFn};
+ llvm::Value *Args[] = {ThreadId,
+ CGF.Builder.getInt32(RHSExprs.size()),
+ ReductionArrayTySize,
+ RL,
+ ShuffleAndReduceFn,
+ InterWarpCopyFn};
+ llvm::Value *Res = nullptr;
+ if (ParallelReduction)
Res = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
Args);
- }
+ else if (SimdReduction)
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait),
+ Args);
if (TeamsReduction) {
llvm::Value *ScratchPadCopyFn =
Modified: cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp?rev=331393&r1=331392&r2=331393&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp Wed May 2 13:03:27 2018
@@ -9,9 +9,10 @@
#define HEADER
// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l24}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 0
#define N 1000
@@ -20,14 +21,14 @@ tx ftemplate(int n) {
tx a[N];
short aa[N];
tx b[10];
-
+
#pragma omp target simd
for(int i = 0; i < n; i++) {
a[i] = 1;
}
#pragma omp target simd
- for(int i = 0; i < n; i++) {
+ for (int i = 0; i < n; i++) {
aa[i] += 1;
}
@@ -36,6 +37,11 @@ tx ftemplate(int n) {
b[i] += 1;
}
+ #pragma omp target simd reduction(+:n)
+ for(int i = 0; i < 10; i++) {
+ b[i] += 1;
+ }
+
return a[0];
}
@@ -47,7 +53,7 @@ int bar(int n){
return a;
}
-// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l24}}(
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l25}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
// CHECK-NOT: call void @__kmpc_for_static_init
@@ -55,7 +61,7 @@ int bar(int n){
// CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret void
-// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l29}}(
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l30}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
// CHECK-NOT: call void @__kmpc_for_static_init
@@ -63,7 +69,7 @@ int bar(int n){
// CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret void
-// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l34}}(
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l35}}(
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
// CHECK-NOT: call void @__kmpc_for_static_init
@@ -71,4 +77,16 @@ int bar(int n){
// CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret void
+// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l40}}(
+// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]],
+// CHECK-NOT: call void @__kmpc_for_static_init
+// CHECK-NOT: call void @__kmpc_for_static_fini
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_nvptx_simd_reduce_nowait(i32 %{{.+}}, i32 1, i{{64|32}} {{8|4}}, i8* %{{.+}}, void (i8*, i16, i16, i16)* @{{.+}}, void (i8*, i32)* @{{.+}})
+// CHECK: switch i32 [[RES]]
+// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(i32 %{{.+}})
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret void
+
+
#endif
More information about the cfe-commits
mailing list