<div dir="ltr"><div dir="ltr">Hello Alexey,<br><br>It looks like this commit broke tests on one of our builders.<br>This failure did not manifest, but masked by another build failures.<br><br>Please have a look?<br><br>Thanks<br><br>Galina<br><br><br><a href="http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262">http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262</a><br>. . .<br>Failing Tests (10):<br> Clang :: OpenMP/declare_target_codegen_globalization.cpp<br> Clang :: OpenMP/nvptx_SPMD_codegen.cpp<br> Clang :: OpenMP/nvptx_data_sharing.cpp<br> Clang :: OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp<br> Clang :: OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp<br> Clang :: OpenMP/nvptx_parallel_codegen.cpp<br> Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp<br> Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp<br> Clang :: OpenMP/nvptx_teams_codegen.cpp<br> Clang :: OpenMP/nvptx_teams_reduction_codegen.cpp<br></div></div><br><div class="gmail_quote"><div dir="ltr">On Fri, Oct 12, 2018 at 9:06 AM Alexey Bataev via cfe-commits <<a href="mailto:cfe-commits@lists.llvm.org">cfe-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: abataev<br>
Date: Fri Oct 12 09:04:20 2018<br>
New Revision: 344356<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=344356&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=344356&view=rev</a><br>
Log:<br>
[OPENMP][NVPTX]Reduce memory usage in orphaned functions.<br>
<br>
if the function has globalized variables and called in context of<br>
target/teams/distribute regions, it does not need to globalize 32<br>
copies of the same variables for memory coalescing, it is enough to<br>
have just one copy, because there is parallel region.<br>
Patch does this by adding call for `__kmpc_parallel_level` function and<br>
checking its return value. If the code sees that the parallel level is<br>
0, then only one variable is allocated, not 32.<br>
<br>
Modified:<br>
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp<br>
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h<br>
cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp<br>
<br>
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)<br>
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Oct 12 09:04:20 2018<br>
@@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
return;<br>
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {<br>
QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);<br>
+ QualType SecGlobalRecTy;<br>
<br>
// Recover pointer to this function's global record. The runtime will<br>
// handle the specifics of the allocation of the memory.<br>
@@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
llvm::PointerType *GlobalRecPtrTy =<br>
CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();<br>
llvm::Value *GlobalRecCastAddr;<br>
+ llvm::Value *IsTTD = nullptr;<br>
if (WithSPMDCheck ||<br>
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {<br>
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");<br>
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");<br>
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");<br>
+ if (I->getSecond().SecondaryGlobalRecord.hasValue()) {<br>
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);<br>
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);<br>
+ llvm::Value *PL = CGF.EmitRuntimeCall(<br>
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),<br>
+ {RTLoc, ThreadID});<br>
+ IsTTD = Bld.CreateIsNull(PL);<br>
+ }<br>
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(<br>
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));<br>
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);<br>
@@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
// There is no need to emit line number for unconditional branch.<br>
(void)ApplyDebugLocation::CreateEmpty(CGF);<br>
CGF.EmitBlock(NonSPMDBB);<br>
+ llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);<br>
+ if (const RecordDecl *SecGlobalizedVarsRecord =<br>
+ I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {<br>
+ SecGlobalRecTy =<br>
+ CGM.getContext().getRecordType(SecGlobalizedVarsRecord);<br>
+<br>
+ // Recover pointer to this function's global record. The runtime will<br>
+ // handle the specifics of the allocation of the memory.<br>
+ // Use actual memory size of the record including the padding<br>
+ // for alignment purposes.<br>
+ unsigned Alignment =<br>
+ CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();<br>
+ unsigned GlobalRecordSize =<br>
+ CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();<br>
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);<br>
+ Size = Bld.CreateSelect(<br>
+ IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);<br>
+ }<br>
// TODO: allow the usage of shared memory to be controlled by<br>
// the user, for now, default to global.<br>
llvm::Value *GlobalRecordSizeArg[] = {<br>
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),<br>
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};<br>
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};<br>
llvm::Value *GlobalRecValue =<br>
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(<br>
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),<br>
@@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
<br>
// Emit the "global alloca" which is a GEP from the global declaration<br>
// record using the pointer returned by the runtime.<br>
+ LValue SecBase;<br>
+ decltype(I->getSecond().LocalVarData)::const_iterator SecIt;<br>
+ if (IsTTD) {<br>
+ SecIt = I->getSecond().SecondaryLocalVarData->begin();<br>
+ llvm::PointerType *SecGlobalRecPtrTy =<br>
+ CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();<br>
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(<br>
+ Bld.CreatePointerBitCastOrAddrSpaceCast(<br>
+ I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),<br>
+ SecGlobalRecTy);<br>
+ }<br>
for (auto &Rec : I->getSecond().LocalVarData) {<br>
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);<br>
llvm::Value *ParValue;<br>
@@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
// Emit VarAddr basing on lane-id if required.<br>
QualType VarTy;<br>
if (Rec.second.IsOnePerTeam) {<br>
- Rec.second.PrivateAddr = VarAddr.getAddress();<br>
VarTy = Rec.second.FD->getType();<br>
} else {<br>
llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(<br>
VarAddr.getAddress().getPointer(),<br>
{Bld.getInt32(0), getNVPTXLaneID(CGF)});<br>
- Rec.second.PrivateAddr =<br>
- Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));<br>
VarTy =<br>
Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();<br>
- VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,<br>
- AlignmentSource::Decl);<br>
+ VarAddr = CGF.MakeAddrLValue(<br>
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,<br>
+ AlignmentSource::Decl);<br>
}<br>
+ Rec.second.PrivateAddr = VarAddr.getAddress();<br>
if (WithSPMDCheck ||<br>
- getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {<br>
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {<br>
assert(I->getSecond().IsInSPMDModeFlag &&<br>
"Expected unknown execution mode or required SPMD check.");<br>
+ if (IsTTD) {<br>
+ assert(SecIt->second.IsOnePerTeam &&<br>
+ "Secondary glob data must be one per team.");<br>
+ LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);<br>
+ VarAddr.setAddress(<br>
+ Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),<br>
+ VarAddr.getPointer()),<br>
+ VarAddr.getAlignment()));<br>
+ Rec.second.PrivateAddr = VarAddr.getAddress();<br>
+ }<br>
Address GlobalPtr = Rec.second.PrivateAddr;<br>
Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());<br>
Rec.second.PrivateAddr = Address(<br>
@@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
CGF.EmitStoreOfScalar(ParValue, VarAddr);<br>
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());<br>
}<br>
+ ++SecIt;<br>
}<br>
}<br>
for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {<br>
@@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP<br>
Data.insert(<br>
std::make_pair(VD, MappedVarData(FD, IsInTargetMasterThreadRegion)));<br>
}<br>
+ if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&<br>
+ !IsInParallelRegion) {<br>
+ CheckVarsEscapingDeclContext VarChecker(CGF);<br>
+ VarChecker.Visit(Body);<br>
+ I->getSecond().SecondaryGlobalRecord =<br>
+ VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);<br>
+ I->getSecond().SecondaryLocalVarData.emplace();<br>
+ DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();<br>
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {<br>
+ assert(VD->isCanonicalDecl() && "Expected canonical declaration");<br>
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);<br>
+ Data.insert(std::make_pair(<br>
+ VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));<br>
+ }<br>
+ }<br>
if (!NeedToDelayGlobalization) {<br>
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);<br>
struct GlobalizationScope final : EHScopeStack::Cleanup {<br>
<br>
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)<br>
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Oct 12 09:04:20 2018<br>
@@ -376,7 +376,7 @@ private:<br>
/// The data for the single globalized variable.<br>
struct MappedVarData {<br>
/// Corresponding field in the global record.<br>
- const FieldDecl * FD = nullptr;<br>
+ const FieldDecl *FD = nullptr;<br>
/// Corresponding address.<br>
Address PrivateAddr = Address::invalid();<br>
/// true, if only one element is required (for latprivates in SPMD mode),<br>
@@ -392,10 +392,12 @@ private:<br>
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;<br>
struct FunctionData {<br>
DeclToAddrMapTy LocalVarData;<br>
+ llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;<br>
EscapedParamsTy EscapedParameters;<br>
llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;<br>
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;<br>
const RecordDecl *GlobalRecord = nullptr;<br>
+ llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;<br>
llvm::Value *GlobalRecordAddr = nullptr;<br>
llvm::Value *IsInSPMDModeFlag = nullptr;<br>
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;<br>
<br>
Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)<br>
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Oct 12 09:04:20 2018<br>
@@ -557,20 +557,26 @@ int baz(int f, double &a) {<br>
// CHECK: alloca i32,<br>
// CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,<br>
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,<br>
- // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*<br>
// CHECK: store i32 0, i32* [[ZERO_ADDR]]<br>
+ // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*<br>
+ // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])<br>
+ // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0<br>
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()<br>
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0<br>
// CHECK: br i1 [[IS_SPMD]], label<br>
// CHECK: br label<br>
- // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)<br>
+ // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}} 128<br>
+ // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0)<br>
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*<br>
// CHECK: br label<br>
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[REC_ADDR]], {{.+}} ]<br>
+ // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to [[SEC_GLOBAL_ST:%.+]]*<br>
// CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0<br>
// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()<br>
// CHECK: [[LID:%.+]] = and i32 [[TID]], 31<br>
- // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]<br>
+ // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]<br>
+ // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0<br>
+ // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32* [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]]<br>
// CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], i32* [[GLOBAL_F_PTR]]<br>
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],<br>
<br>
<br>
<br>
_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@lists.llvm.org" target="_blank">cfe-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits</a><br>
</blockquote></div>