r344356 - [OPENMP][NVPTX]Reduce memory usage in orphaned functions.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 15 17:14:01 PDT 2018
Hi Galina, thanks for letting me know. Committed rL344574 to fix this problem.
Best regards,
Alexey Bataev
15 окт. 2018 г., в 19:03, Galina Kistanova <gkistanova at gmail.com<mailto:gkistanova at gmail.com>> написал(а):
Hello Alexey,
It looks like this commit broke tests on one of our builders.
This failure did not manifest, but masked by another build failures.
Please have a look?
Thanks
Galina
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262
. . .
Failing Tests (10):
Clang :: OpenMP/declare_target_codegen_globalization.cpp
Clang :: OpenMP/nvptx_SPMD_codegen.cpp
Clang :: OpenMP/nvptx_data_sharing.cpp
Clang :: OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
Clang :: OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp
Clang :: OpenMP/nvptx_parallel_codegen.cpp
Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
Clang :: OpenMP/nvptx_teams_codegen.cpp
Clang :: OpenMP/nvptx_teams_reduction_codegen.cpp
On Fri, Oct 12, 2018 at 9:06 AM Alexey Bataev via cfe-commits <cfe-commits at lists.llvm.org<mailto:cfe-commits at lists.llvm.org>> wrote:
Author: abataev
Date: Fri Oct 12 09:04:20 2018
New Revision: 344356
URL: http://llvm.org/viewvc/llvm-project?rev=344356&view=rev
Log:
[OPENMP][NVPTX]Reduce memory usage in orphaned functions.
if the function has globalized variables and called in context of
target/teams/distribute regions, it does not need to globalize 32
copies of the same variables for memory coalescing, it is enough to
have just one copy, because there is parallel region.
Patch does this by adding call for `__kmpc_parallel_level` function and
checking its return value. If the code sees that the parallel level is
0, then only one variable is allocated, not 32.
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Oct 12 09:04:20 2018
@@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
return;
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType SecGlobalRecTy;
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
@@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
llvm::PointerType *GlobalRecPtrTy =
CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
llvm::Value *GlobalRecCastAddr;
+ llvm::Value *IsTTD = nullptr;
if (WithSPMDCheck ||
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *PL = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ {RTLoc, ThreadID});
+ IsTTD = Bld.CreateIsNull(PL);
+ }
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
@@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
+ llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+ if (const RecordDecl *SecGlobalizedVarsRecord =
+ I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+ SecGlobalRecTy =
+ CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ Size = Bld.CreateSelect(
+ IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+ }
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue =
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
@@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
+ LValue SecBase;
+ decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+ if (IsTTD) {
+ SecIt = I->getSecond().SecondaryLocalVarData->begin();
+ llvm::PointerType *SecGlobalRecPtrTy =
+ CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+ SecGlobalRecTy);
+ }
for (auto &Rec : I->getSecond().LocalVarData) {
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
llvm::Value *ParValue;
@@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
// Emit VarAddr basing on lane-id if required.
QualType VarTy;
if (Rec.second.IsOnePerTeam) {
- Rec.second.PrivateAddr = VarAddr.getAddress();
VarTy = Rec.second.FD->getType();
} else {
llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
VarAddr.getAddress().getPointer(),
{Bld.getInt32(0), getNVPTXLaneID(CGF)});
- Rec.second.PrivateAddr =
- Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));
VarTy =
Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
- VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,
- AlignmentSource::Decl);
+ VarAddr = CGF.MakeAddrLValue(
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+ AlignmentSource::Decl);
}
+ Rec.second.PrivateAddr = VarAddr.getAddress();
if (WithSPMDCheck ||
- getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
assert(I->getSecond().IsInSPMDModeFlag &&
"Expected unknown execution mode or required SPMD check.");
+ if (IsTTD) {
+ assert(SecIt->second.IsOnePerTeam &&
+ "Secondary glob data must be one per team.");
+ LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+ VarAddr.setAddress(
+ Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
+ VarAddr.getPointer()),
+ VarAddr.getAlignment()));
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ }
Address GlobalPtr = Rec.second.PrivateAddr;
Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
Rec.second.PrivateAddr = Address(
@@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
CGF.EmitStoreOfScalar(ParValue, VarAddr);
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
}
+ ++SecIt;
}
}
for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
@@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
Data.insert(
std::make_pair(VD, MappedVarData(FD, IsInTargetMasterThreadRegion)));
}
+ if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&
+ !IsInParallelRegion) {
+ CheckVarsEscapingDeclContext VarChecker(CGF);
+ VarChecker.Visit(Body);
+ I->getSecond().SecondaryGlobalRecord =
+ VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);
+ I->getSecond().SecondaryLocalVarData.emplace();
+ DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+ assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+ Data.insert(std::make_pair(
+ VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));
+ }
+ }
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
struct GlobalizationScope final : EHScopeStack::Cleanup {
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Oct 12 09:04:20 2018
@@ -376,7 +376,7 @@ private:
/// The data for the single globalized variable.
struct MappedVarData {
/// Corresponding field in the global record.
- const FieldDecl * FD = nullptr;
+ const FieldDecl *FD = nullptr;
/// Corresponding address.
Address PrivateAddr = Address::invalid();
/// true, if only one element is required (for latprivates in SPMD mode),
@@ -392,10 +392,12 @@ private:
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
DeclToAddrMapTy LocalVarData;
+ llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
EscapedParamsTy EscapedParameters;
llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
const RecordDecl *GlobalRecord = nullptr;
+ llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
llvm::Value *GlobalRecordAddr = nullptr;
llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Oct 12 09:04:20 2018
@@ -557,20 +557,26 @@ int baz(int f, double &a) {
// CHECK: alloca i32,
// CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
- // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
+ // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
+ // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])
+ // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
// CHECK: br i1 [[IS_SPMD]], label
// CHECK: br label
- // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)
+ // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}} 128
+ // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0)
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*
// CHECK: br label
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
+ // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to [[SEC_GLOBAL_ST:%.+]]*
// CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK: [[LID:%.+]] = and i32 [[TID]], 31
- // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
+ // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
+ // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0
+ // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32* [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]]
// CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], i32* [[GLOBAL_F_PTR]]
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
_______________________________________________
cfe-commits mailing list
cfe-commits at lists.llvm.org<mailto:cfe-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20181016/af7ee7da/attachment-0001.html>
More information about the cfe-commits
mailing list