r369946 - [OPENMP][NVPTX]Fix critical region codegen.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 26 12:07:48 PDT 2019
Author: abataev
Date: Mon Aug 26 12:07:48 2019
New Revision: 369946
URL: http://llvm.org/viewvc/llvm-project?rev=369946&view=rev
Log:
[OPENMP][NVPTX]Fix critical region codegen.
Summary:
Previously critical regions were emitted with the barrier making it a
worksharing construct though it is not. Also, it leads to incorrect
behavior in Cuda9+. Patch fixes this problem.
Reviewers: ABataev, jdoerfert
Subscribers: jholewinski, guansong, cfe-commits, grokos
Tags: #clang
Differential Revision: https://reviews.llvm.org/D66673
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=369946&r1=369945&r2=369946&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Aug 26 12:07:48 2019
@@ -107,6 +107,10 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL__kmpc_barrier_simple_spmd,
+ /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+ OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+ /// Call to void __kmpc_syncwarp(int32_t Mask);
+ OMPRTL_NVPTX__kmpc_syncwarp,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -1794,6 +1798,20 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
->addFnAttr(llvm::Attribute::Convergent);
break;
}
+ case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+ // Build int32_t __kmpc_warp_active_thread_mask(void);
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_syncwarp: {
+ // Build void __kmpc_syncwarp(kmp_int32 Mask);
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+ break;
+ }
}
return RTLFn;
}
@@ -2700,6 +2718,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalR
llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
+ // Get the mask of active threads in the warp.
+ llvm::Value *Mask = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
// Fetch team-local id of the thread.
llvm::Value *ThreadID = getNVPTXThreadID(CGF);
@@ -2740,8 +2761,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalR
// Block waits for all threads in current team to finish then increments the
// counter variable and returns to the loop.
CGF.EmitBlock(SyncBB);
- emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
- /*ForceSimpleCall=*/true);
+ // Reconverge active threads in the warp.
+ (void)CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
llvm::Value *IncCounterVal =
CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
Modified: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp?rev=369946&r1=369945&r2=369946&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp Mon Aug 26 12:07:48 2019
@@ -343,6 +343,7 @@ int bar(int n){
// CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
// CHECK: [[CC:%.+]] = alloca i32,
+// CHECK: [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask()
// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK: [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: store i32 0, i32* [[CC]],
@@ -362,7 +363,7 @@ int bar(int n){
// CHECK: store i32
// CHECK: call void @__kmpc_end_critical(
-// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_syncwarp(i32 [[MASK]])
// CHECK: [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1
// CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]],
// CHECK: br label
More information about the cfe-commits
mailing list