r344356 - [OPENMP][NVPTX]Reduce memory usage in orphaned functions.
Galina Kistanova via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 15 16:03:20 PDT 2018
Hello Alexey,
It looks like this commit broke tests on one of our builders.
This failure did not manifest, but masked by another build failures.
Please have a look?
Thanks
Galina
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262
. . .
Failing Tests (10):
Clang :: OpenMP/declare_target_codegen_globalization.cpp
Clang :: OpenMP/nvptx_SPMD_codegen.cpp
Clang :: OpenMP/nvptx_data_sharing.cpp
Clang :: OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
Clang :: OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp
Clang :: OpenMP/nvptx_parallel_codegen.cpp
Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
Clang ::
OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
Clang :: OpenMP/nvptx_teams_codegen.cpp
Clang :: OpenMP/nvptx_teams_reduction_codegen.cpp
On Fri, Oct 12, 2018 at 9:06 AM Alexey Bataev via cfe-commits <
cfe-commits at lists.llvm.org> wrote:
> Author: abataev
> Date: Fri Oct 12 09:04:20 2018
> New Revision: 344356
>
> URL: http://llvm.org/viewvc/llvm-project?rev=344356&view=rev
> Log:
> [OPENMP][NVPTX]Reduce memory usage in orphaned functions.
>
> if the function has globalized variables and called in context of
> target/teams/distribute regions, it does not need to globalize 32
> copies of the same variables for memory coalescing, it is enough to
> have just one copy, because there is parallel region.
> Patch does this by adding call for `__kmpc_parallel_level` function and
> checking its return value. If the code sees that the parallel level is
> 0, then only one variable is allocated, not 32.
>
> Modified:
> cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
> cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
> cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
>
> Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Oct 12 09:04:20 2018
> @@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
> return;
> if (const RecordDecl *GlobalizedVarsRecord =
> I->getSecond().GlobalRecord) {
> QualType GlobalRecTy =
> CGM.getContext().getRecordType(GlobalizedVarsRecord);
> + QualType SecGlobalRecTy;
>
> // Recover pointer to this function's global record. The runtime will
> // handle the specifics of the allocation of the memory.
> @@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
> llvm::PointerType *GlobalRecPtrTy =
> CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
> llvm::Value *GlobalRecCastAddr;
> + llvm::Value *IsTTD = nullptr;
> if (WithSPMDCheck ||
> getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
> llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
> llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
> llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
> + if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
> + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
> + llvm::Value *ThreadID = getThreadID(CGF, Loc);
> + llvm::Value *PL = CGF.EmitRuntimeCall(
> + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
> + {RTLoc, ThreadID});
> + IsTTD = Bld.CreateIsNull(PL);
> + }
> llvm::Value *IsSPMD =
> Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
>
> createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
> Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
> @@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
> // There is no need to emit line number for unconditional branch.
> (void)ApplyDebugLocation::CreateEmpty(CGF);
> CGF.EmitBlock(NonSPMDBB);
> + llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy,
> GlobalRecordSize);
> + if (const RecordDecl *SecGlobalizedVarsRecord =
> + I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
> + SecGlobalRecTy =
> + CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
> +
> + // Recover pointer to this function's global record. The runtime
> will
> + // handle the specifics of the allocation of the memory.
> + // Use actual memory size of the record including the padding
> + // for alignment purposes.
> + unsigned Alignment =
> +
> CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
> + unsigned GlobalRecordSize =
> +
> CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
> + GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
> + Size = Bld.CreateSelect(
> + IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
> Size);
> + }
> // TODO: allow the usage of shared memory to be controlled by
> // the user, for now, default to global.
> llvm::Value *GlobalRecordSizeArg[] = {
> - llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
> - CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
> + Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
> llvm::Value *GlobalRecValue =
> CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
>
> OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
> @@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>
> // Emit the "global alloca" which is a GEP from the global declaration
> // record using the pointer returned by the runtime.
> + LValue SecBase;
> + decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
> + if (IsTTD) {
> + SecIt = I->getSecond().SecondaryLocalVarData->begin();
> + llvm::PointerType *SecGlobalRecPtrTy =
> + CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
> + SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
> + Bld.CreatePointerBitCastOrAddrSpaceCast(
> + I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
> + SecGlobalRecTy);
> + }
> for (auto &Rec : I->getSecond().LocalVarData) {
> bool EscapedParam =
> I->getSecond().EscapedParameters.count(Rec.first);
> llvm::Value *ParValue;
> @@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
> // Emit VarAddr basing on lane-id if required.
> QualType VarTy;
> if (Rec.second.IsOnePerTeam) {
> - Rec.second.PrivateAddr = VarAddr.getAddress();
> VarTy = Rec.second.FD->getType();
> } else {
> llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
> VarAddr.getAddress().getPointer(),
> {Bld.getInt32(0), getNVPTXLaneID(CGF)});
> - Rec.second.PrivateAddr =
> - Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));
> VarTy =
>
> Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
> - VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,
> - AlignmentSource::Decl);
> + VarAddr = CGF.MakeAddrLValue(
> + Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
> + AlignmentSource::Decl);
> }
> + Rec.second.PrivateAddr = VarAddr.getAddress();
> if (WithSPMDCheck ||
> - getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
> + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
> assert(I->getSecond().IsInSPMDModeFlag &&
> "Expected unknown execution mode or required SPMD check.");
> + if (IsTTD) {
> + assert(SecIt->second.IsOnePerTeam &&
> + "Secondary glob data must be one per team.");
> + LValue SecVarAddr = CGF.EmitLValueForField(SecBase,
> SecIt->second.FD);
> + VarAddr.setAddress(
> + Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
> + VarAddr.getPointer()),
> + VarAddr.getAlignment()));
> + Rec.second.PrivateAddr = VarAddr.getAddress();
> + }
> Address GlobalPtr = Rec.second.PrivateAddr;
> Address LocalAddr = CGF.CreateMemTemp(VarTy,
> Rec.second.FD->getName());
> Rec.second.PrivateAddr = Address(
> @@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
> CGF.EmitStoreOfScalar(ParValue, VarAddr);
> I->getSecond().MappedParams->setVarAddr(CGF, VD,
> VarAddr.getAddress());
> }
> + ++SecIt;
> }
> }
> for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
> @@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
> Data.insert(
> std::make_pair(VD, MappedVarData(FD,
> IsInTargetMasterThreadRegion)));
> }
> + if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&
> + !IsInParallelRegion) {
> + CheckVarsEscapingDeclContext VarChecker(CGF);
> + VarChecker.Visit(Body);
> + I->getSecond().SecondaryGlobalRecord =
> +
> VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);
> + I->getSecond().SecondaryLocalVarData.emplace();
> + DeclToAddrMapTy &Data =
> I->getSecond().SecondaryLocalVarData.getValue();
> + for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
> + assert(VD->isCanonicalDecl() && "Expected canonical declaration");
> + const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
> + Data.insert(std::make_pair(
> + VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));
> + }
> + }
> if (!NeedToDelayGlobalization) {
> emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
> struct GlobalizationScope final : EHScopeStack::Cleanup {
>
> Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
> +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Oct 12 09:04:20 2018
> @@ -376,7 +376,7 @@ private:
> /// The data for the single globalized variable.
> struct MappedVarData {
> /// Corresponding field in the global record.
> - const FieldDecl * FD = nullptr;
> + const FieldDecl *FD = nullptr;
> /// Corresponding address.
> Address PrivateAddr = Address::invalid();
> /// true, if only one element is required (for latprivates in SPMD
> mode),
> @@ -392,10 +392,12 @@ private:
> using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
> struct FunctionData {
> DeclToAddrMapTy LocalVarData;
> + llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
> EscapedParamsTy EscapedParameters;
> llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
> llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
> const RecordDecl *GlobalRecord = nullptr;
> + llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
> llvm::Value *GlobalRecordAddr = nullptr;
> llvm::Value *IsInSPMDModeFlag = nullptr;
> std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
>
> Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
> +++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Oct 12 09:04:20 2018
> @@ -557,20 +557,26 @@ int baz(int f, double &a) {
> // CHECK: alloca i32,
> // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
> // CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
> - // CHECK: [[GTID:%.+]] = call i32
> @__kmpc_global_thread_num(%struct.ident_t*
> // CHECK: store i32 0, i32* [[ZERO_ADDR]]
> + // CHECK: [[GTID:%.+]] = call i32
> @__kmpc_global_thread_num(%struct.ident_t*
> + // CHECK: [[PAR_LEVEL:%.+]] = call i16
> @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])
> + // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0
> // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
> // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
> // CHECK: br i1 [[IS_SPMD]], label
> // CHECK: br label
> - // CHECK: [[PTR:%.+]] = call i8*
> @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)
> + // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}}
> 128
> + // CHECK: [[PTR:%.+]] = call i8*
> @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0)
> // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*
> // CHECK: br label
> // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [
> [[REC_ADDR]], {{.+}} ]
> + // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to
> [[SEC_GLOBAL_ST:%.+]]*
> // CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]],
> [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
> // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
> // CHECK: [[LID:%.+]] = and i32 [[TID]], 31
> - // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32
> x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
> + // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32],
> [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
> + // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds
> [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0
> + // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32*
> [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]]
> // CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]],
> i32* [[GLOBAL_F_PTR]]
> // CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20181015/e6d38765/attachment-0001.html>
More information about the cfe-commits
mailing list