r344356 - [OPENMP][NVPTX]Reduce memory usage in orphaned functions.

Galina Kistanova via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 15 16:03:20 PDT 2018


Hello Alexey,

It looks like this commit broke tests on one of our builders.
This failure did not manifest, but masked by another build failures.

Please have a look?

Thanks

Galina


http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262
. . .
Failing Tests (10):
    Clang :: OpenMP/declare_target_codegen_globalization.cpp
    Clang :: OpenMP/nvptx_SPMD_codegen.cpp
    Clang :: OpenMP/nvptx_data_sharing.cpp
    Clang :: OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
    Clang :: OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp
    Clang :: OpenMP/nvptx_parallel_codegen.cpp
    Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
    Clang ::
OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
    Clang :: OpenMP/nvptx_teams_codegen.cpp
    Clang :: OpenMP/nvptx_teams_reduction_codegen.cpp

On Fri, Oct 12, 2018 at 9:06 AM Alexey Bataev via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Author: abataev
> Date: Fri Oct 12 09:04:20 2018
> New Revision: 344356
>
> URL: http://llvm.org/viewvc/llvm-project?rev=344356&view=rev
> Log:
> [OPENMP][NVPTX]Reduce memory usage in orphaned functions.
>
> if the function has globalized variables and called in context of
> target/teams/distribute regions, it does not need to globalize 32
> copies of the same variables for memory coalescing, it is enough to
> have just one copy, because there is parallel region.
> Patch does this by adding call for `__kmpc_parallel_level` function and
> checking its return value. If the code sees that the parallel level is
> 0, then only one variable is allocated, not 32.
>
> Modified:
>     cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
>     cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
>     cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
>
> Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Oct 12 09:04:20 2018
> @@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>      return;
>    if (const RecordDecl *GlobalizedVarsRecord =
> I->getSecond().GlobalRecord) {
>      QualType GlobalRecTy =
> CGM.getContext().getRecordType(GlobalizedVarsRecord);
> +    QualType SecGlobalRecTy;
>
>      // Recover pointer to this function's global record. The runtime will
>      // handle the specifics of the allocation of the memory.
> @@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>      llvm::PointerType *GlobalRecPtrTy =
>          CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
>      llvm::Value *GlobalRecCastAddr;
> +    llvm::Value *IsTTD = nullptr;
>      if (WithSPMDCheck ||
>          getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
>        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
>        llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
>        llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
> +      if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
> +        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
> +        llvm::Value *ThreadID = getThreadID(CGF, Loc);
> +        llvm::Value *PL = CGF.EmitRuntimeCall(
> +            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
> +            {RTLoc, ThreadID});
> +        IsTTD = Bld.CreateIsNull(PL);
> +      }
>        llvm::Value *IsSPMD =
> Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
>
>  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
>        Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
> @@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>        // There is no need to emit line number for unconditional branch.
>        (void)ApplyDebugLocation::CreateEmpty(CGF);
>        CGF.EmitBlock(NonSPMDBB);
> +      llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy,
> GlobalRecordSize);
> +      if (const RecordDecl *SecGlobalizedVarsRecord =
> +              I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
> +        SecGlobalRecTy =
> +            CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
> +
> +        // Recover pointer to this function's global record. The runtime
> will
> +        // handle the specifics of the allocation of the memory.
> +        // Use actual memory size of the record including the padding
> +        // for alignment purposes.
> +        unsigned Alignment =
> +
> CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
> +        unsigned GlobalRecordSize =
> +
> CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
> +        GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
> +        Size = Bld.CreateSelect(
> +            IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
> Size);
> +      }
>        // TODO: allow the usage of shared memory to be controlled by
>        // the user, for now, default to global.
>        llvm::Value *GlobalRecordSizeArg[] = {
> -          llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
> -          CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
> +          Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
>        llvm::Value *GlobalRecValue =
>            CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
>
>  OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
> @@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>
>      // Emit the "global alloca" which is a GEP from the global declaration
>      // record using the pointer returned by the runtime.
> +    LValue SecBase;
> +    decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
> +    if (IsTTD) {
> +      SecIt = I->getSecond().SecondaryLocalVarData->begin();
> +      llvm::PointerType *SecGlobalRecPtrTy =
> +          CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
> +      SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
> +          Bld.CreatePointerBitCastOrAddrSpaceCast(
> +              I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
> +          SecGlobalRecTy);
> +    }
>      for (auto &Rec : I->getSecond().LocalVarData) {
>        bool EscapedParam =
> I->getSecond().EscapedParameters.count(Rec.first);
>        llvm::Value *ParValue;
> @@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>        // Emit VarAddr basing on lane-id if required.
>        QualType VarTy;
>        if (Rec.second.IsOnePerTeam) {
> -        Rec.second.PrivateAddr = VarAddr.getAddress();
>          VarTy = Rec.second.FD->getType();
>        } else {
>          llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
>              VarAddr.getAddress().getPointer(),
>              {Bld.getInt32(0), getNVPTXLaneID(CGF)});
> -        Rec.second.PrivateAddr =
> -            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));
>          VarTy =
>
>  Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
> -        VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,
> -                                     AlignmentSource::Decl);
> +        VarAddr = CGF.MakeAddrLValue(
> +            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
> +            AlignmentSource::Decl);
>        }
> +      Rec.second.PrivateAddr = VarAddr.getAddress();
>        if (WithSPMDCheck ||
> -                getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
> +          getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
>          assert(I->getSecond().IsInSPMDModeFlag &&
>                 "Expected unknown execution mode or required SPMD check.");
> +        if (IsTTD) {
> +          assert(SecIt->second.IsOnePerTeam &&
> +                 "Secondary glob data must be one per team.");
> +          LValue SecVarAddr = CGF.EmitLValueForField(SecBase,
> SecIt->second.FD);
> +          VarAddr.setAddress(
> +              Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
> +                                       VarAddr.getPointer()),
> +                      VarAddr.getAlignment()));
> +          Rec.second.PrivateAddr = VarAddr.getAddress();
> +        }
>          Address GlobalPtr = Rec.second.PrivateAddr;
>          Address LocalAddr = CGF.CreateMemTemp(VarTy,
> Rec.second.FD->getName());
>          Rec.second.PrivateAddr = Address(
> @@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
>          CGF.EmitStoreOfScalar(ParValue, VarAddr);
>          I->getSecond().MappedParams->setVarAddr(CGF, VD,
> VarAddr.getAddress());
>        }
> +      ++SecIt;
>      }
>    }
>    for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
> @@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
>      Data.insert(
>          std::make_pair(VD, MappedVarData(FD,
> IsInTargetMasterThreadRegion)));
>    }
> +  if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&
> +      !IsInParallelRegion) {
> +    CheckVarsEscapingDeclContext VarChecker(CGF);
> +    VarChecker.Visit(Body);
> +    I->getSecond().SecondaryGlobalRecord =
> +
> VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);
> +    I->getSecond().SecondaryLocalVarData.emplace();
> +    DeclToAddrMapTy &Data =
> I->getSecond().SecondaryLocalVarData.getValue();
> +    for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
> +      assert(VD->isCanonicalDecl() && "Expected canonical declaration");
> +      const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
> +      Data.insert(std::make_pair(
> +          VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));
> +    }
> +  }
>    if (!NeedToDelayGlobalization) {
>      emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
>      struct GlobalizationScope final : EHScopeStack::Cleanup {
>
> Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
> +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Oct 12 09:04:20 2018
> @@ -376,7 +376,7 @@ private:
>    /// The data for the single globalized variable.
>    struct MappedVarData {
>      /// Corresponding field in the global record.
> -    const FieldDecl * FD = nullptr;
> +    const FieldDecl *FD = nullptr;
>      /// Corresponding address.
>      Address PrivateAddr = Address::invalid();
>      /// true, if only one element is required (for latprivates in SPMD
> mode),
> @@ -392,10 +392,12 @@ private:
>    using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
>    struct FunctionData {
>      DeclToAddrMapTy LocalVarData;
> +    llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
>      EscapedParamsTy EscapedParameters;
>      llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
>      llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
>      const RecordDecl *GlobalRecord = nullptr;
> +    llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
>      llvm::Value *GlobalRecordAddr = nullptr;
>      llvm::Value *IsInSPMDModeFlag = nullptr;
>      std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
>
> Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
> +++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Oct 12 09:04:20 2018
> @@ -557,20 +557,26 @@ int baz(int f, double &a) {
>    // CHECK: alloca i32,
>    // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
>    // CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
> -  // CHECK: [[GTID:%.+]] = call i32
> @__kmpc_global_thread_num(%struct.ident_t*
>    // CHECK: store i32 0, i32* [[ZERO_ADDR]]
> +  // CHECK: [[GTID:%.+]] = call i32
> @__kmpc_global_thread_num(%struct.ident_t*
> +  // CHECK: [[PAR_LEVEL:%.+]] = call i16
> @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])
> +  // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0
>    // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
>    // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
>    // CHECK: br i1 [[IS_SPMD]], label
>    // CHECK: br label
> -  // CHECK: [[PTR:%.+]] = call i8*
> @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)
> +  // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}}
> 128
> +  // CHECK: [[PTR:%.+]] = call i8*
> @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0)
>    // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*
>    // CHECK: br label
>    // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [
> [[REC_ADDR]], {{.+}} ]
> +  // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to
> [[SEC_GLOBAL_ST:%.+]]*
>    // CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]],
> [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
>    // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
>    // CHECK: [[LID:%.+]] = and i32 [[TID]], 31
> -  // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32
> x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
> +  // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32],
> [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
> +  // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds
> [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0
> +  // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32*
> [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]]
>    // CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]],
> i32* [[GLOBAL_F_PTR]]
>    // CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20181015/e6d38765/attachment-0001.html>


More information about the cfe-commits mailing list