<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body dir="auto">
Hi Galina, thanks for letting me know. Committed <span style="background-color: rgba(255, 255, 255, 0);">rL344574 to fix this problem. </span><br>
<br>
<div id="AppleMailSignature" dir="ltr">Best regards,
<div>Alexey Bataev</div>
</div>
<div dir="ltr"><br>
15 окт. 2018 г., в 19:03, Galina Kistanova <<a href="mailto:gkistanova@gmail.com">gkistanova@gmail.com</a>> написал(а):<br>
<br>
</div>
<blockquote type="cite">
<div dir="ltr">
<div dir="ltr">
<div dir="ltr">Hello Alexey,<br>
<br>
It looks like this commit broke tests on one of our builders.<br>
This failure did not manifest, but masked by another build failures.<br>
<br>
Please have a look?<br>
<br>
Thanks<br>
<br>
Galina<br>
<br>
<br>
<a href="http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262">http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/13262</a><br>
. . .<br>
Failing Tests (10):<br>
    Clang :: OpenMP/declare_target_codegen_globalization.cpp<br>
    Clang :: OpenMP/nvptx_SPMD_codegen.cpp<br>
    Clang :: OpenMP/nvptx_data_sharing.cpp<br>
    Clang :: OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp<br>
    Clang :: OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp<br>
    Clang :: OpenMP/nvptx_parallel_codegen.cpp<br>
    Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp<br>
    Clang :: OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp<br>
    Clang :: OpenMP/nvptx_teams_codegen.cpp<br>
    Clang :: OpenMP/nvptx_teams_reduction_codegen.cpp<br>
</div>
</div>
<br>
<div class="gmail_quote">
<div dir="ltr">On Fri, Oct 12, 2018 at 9:06 AM Alexey Bataev via cfe-commits <<a href="mailto:cfe-commits@lists.llvm.org">cfe-commits@lists.llvm.org</a>> wrote:<br>
</div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Author: abataev<br>
Date: Fri Oct 12 09:04:20 2018<br>
New Revision: 344356<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=344356&view=rev" rel="noreferrer" target="_blank">
http://llvm.org/viewvc/llvm-project?rev=344356&view=rev</a><br>
Log:<br>
[OPENMP][NVPTX]Reduce memory usage in orphaned functions.<br>
<br>
if the function has globalized variables and called in context of<br>
target/teams/distribute regions, it does not need to globalize 32<br>
copies of the same variables for memory coalescing, it is enough to<br>
have just one copy, because there is parallel region.<br>
Patch does this by adding call for `__kmpc_parallel_level` function and<br>
checking its return value. If the code sees that the parallel level is<br>
0, then only one variable is allocated, not 32.<br>
<br>
Modified:<br>
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp<br>
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h<br>
    cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp<br>
<br>
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff" rel="noreferrer" target="_blank">
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=344356&r1=344355&r2=344356&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)<br>
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Oct 12 09:04:20 2018<br>
@@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
     return;<br>
   if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {<br>
     QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);<br>
+    QualType SecGlobalRecTy;<br>
<br>
     // Recover pointer to this function's global record. The runtime will<br>
     // handle the specifics of the allocation of the memory.<br>
@@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
     llvm::PointerType *GlobalRecPtrTy =<br>
         CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();<br>
     llvm::Value *GlobalRecCastAddr;<br>
+    llvm::Value *IsTTD = nullptr;<br>
     if (WithSPMDCheck ||<br>
         getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {<br>
       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");<br>
       llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");<br>
       llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");<br>
+      if (I->getSecond().SecondaryGlobalRecord.hasValue()) {<br>
+        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);<br>
+        llvm::Value *ThreadID = getThreadID(CGF, Loc);<br>
+        llvm::Value *PL = CGF.EmitRuntimeCall(<br>
+            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),<br>
+            {RTLoc, ThreadID});<br>
+        IsTTD = Bld.CreateIsNull(PL);<br>
+      }<br>
       llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(<br>
           createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));<br>
       Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);<br>
@@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
       // There is no need to emit line number for unconditional branch.<br>
       (void)ApplyDebugLocation::CreateEmpty(CGF);<br>
       CGF.EmitBlock(NonSPMDBB);<br>
+      llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);<br>
+      if (const RecordDecl *SecGlobalizedVarsRecord =<br>
+              I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {<br>
+        SecGlobalRecTy =<br>
+            CGM.getContext().getRecordType(SecGlobalizedVarsRecord);<br>
+<br>
+        // Recover pointer to this function's global record. The runtime will<br>
+        // handle the specifics of the allocation of the memory.<br>
+        // Use actual memory size of the record including the padding<br>
+        // for alignment purposes.<br>
+        unsigned Alignment =<br>
+            CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();<br>
+        unsigned GlobalRecordSize =<br>
+            CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();<br>
+        GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);<br>
+        Size = Bld.CreateSelect(<br>
+            IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);<br>
+      }<br>
       // TODO: allow the usage of shared memory to be controlled by<br>
       // the user, for now, default to global.<br>
       llvm::Value *GlobalRecordSizeArg[] = {<br>
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),<br>
-          CGF.Builder.getInt16(/*UseSharedMemory=*/0)};<br>
+          Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};<br>
       llvm::Value *GlobalRecValue =<br>
           CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(<br>
                                   OMPRTL_NVPTX__kmpc_data_sharing_push_stack),<br>
@@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
<br>
     // Emit the "global alloca" which is a GEP from the global declaration<br>
     // record using the pointer returned by the runtime.<br>
+    LValue SecBase;<br>
+    decltype(I->getSecond().LocalVarData)::const_iterator SecIt;<br>
+    if (IsTTD) {<br>
+      SecIt = I->getSecond().SecondaryLocalVarData->begin();<br>
+      llvm::PointerType *SecGlobalRecPtrTy =<br>
+          CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();<br>
+      SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(<br>
+          Bld.CreatePointerBitCastOrAddrSpaceCast(<br>
+              I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),<br>
+          SecGlobalRecTy);<br>
+    }<br>
     for (auto &Rec : I->getSecond().LocalVarData) {<br>
       bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);<br>
       llvm::Value *ParValue;<br>
@@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
       // Emit VarAddr basing on lane-id if required.<br>
       QualType VarTy;<br>
       if (Rec.second.IsOnePerTeam) {<br>
-        Rec.second.PrivateAddr = VarAddr.getAddress();<br>
         VarTy = Rec.second.FD->getType();<br>
       } else {<br>
         llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(<br>
             VarAddr.getAddress().getPointer(),<br>
             {Bld.getInt32(0), getNVPTXLaneID(CGF)});<br>
-        Rec.second.PrivateAddr =<br>
-            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));<br>
         VarTy =<br>
             Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();<br>
-        VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,<br>
-                                     AlignmentSource::Decl);<br>
+        VarAddr = CGF.MakeAddrLValue(<br>
+            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,<br>
+            AlignmentSource::Decl);<br>
       }<br>
+      Rec.second.PrivateAddr = VarAddr.getAddress();<br>
       if (WithSPMDCheck ||<br>
-                getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {<br>
+          getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {<br>
         assert(I->getSecond().IsInSPMDModeFlag &&<br>
                "Expected unknown execution mode or required SPMD check.");<br>
+        if (IsTTD) {<br>
+          assert(SecIt->second.IsOnePerTeam &&<br>
+                 "Secondary glob data must be one per team.");<br>
+          LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);<br>
+          VarAddr.setAddress(<br>
+              Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),<br>
+                                       VarAddr.getPointer()),<br>
+                      VarAddr.getAlignment()));<br>
+          Rec.second.PrivateAddr = VarAddr.getAddress();<br>
+        }<br>
         Address GlobalPtr = Rec.second.PrivateAddr;<br>
         Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());<br>
         Rec.second.PrivateAddr = Address(<br>
@@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa<br>
         CGF.EmitStoreOfScalar(ParValue, VarAddr);<br>
         I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());<br>
       }<br>
+      ++SecIt;<br>
     }<br>
   }<br>
   for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {<br>
@@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP<br>
     Data.insert(<br>
         std::make_pair(VD, MappedVarData(FD, IsInTargetMasterThreadRegion)));<br>
   }<br>
+  if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&<br>
+      !IsInParallelRegion) {<br>
+    CheckVarsEscapingDeclContext VarChecker(CGF);<br>
+    VarChecker.Visit(Body);<br>
+    I->getSecond().SecondaryGlobalRecord =<br>
+        VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);<br>
+    I->getSecond().SecondaryLocalVarData.emplace();<br>
+    DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();<br>
+    for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {<br>
+      assert(VD->isCanonicalDecl() && "Expected canonical declaration");<br>
+      const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);<br>
+      Data.insert(std::make_pair(<br>
+          VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));<br>
+    }<br>
+  }<br>
   if (!NeedToDelayGlobalization) {<br>
     emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);<br>
     struct GlobalizationScope final : EHScopeStack::Cleanup {<br>
<br>
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff" rel="noreferrer" target="_blank">
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=344356&r1=344355&r2=344356&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)<br>
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Fri Oct 12 09:04:20 2018<br>
@@ -376,7 +376,7 @@ private:<br>
   /// The data for the single globalized variable.<br>
   struct MappedVarData {<br>
     /// Corresponding field in the global record.<br>
-    const FieldDecl * FD = nullptr;<br>
+    const FieldDecl *FD = nullptr;<br>
     /// Corresponding address.<br>
     Address PrivateAddr = Address::invalid();<br>
     /// true, if only one element is required (for latprivates in SPMD mode),<br>
@@ -392,10 +392,12 @@ private:<br>
   using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;<br>
   struct FunctionData {<br>
     DeclToAddrMapTy LocalVarData;<br>
+    llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;<br>
     EscapedParamsTy EscapedParameters;<br>
     llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;<br>
     llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;<br>
     const RecordDecl *GlobalRecord = nullptr;<br>
+    llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;<br>
     llvm::Value *GlobalRecordAddr = nullptr;<br>
     llvm::Value *IsInSPMDModeFlag = nullptr;<br>
     std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;<br>
<br>
Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff" rel="noreferrer" target="_blank">
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=344356&r1=344355&r2=344356&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)<br>
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Oct 12 09:04:20 2018<br>
@@ -557,20 +557,26 @@ int baz(int f, double &a) {<br>
   // CHECK: alloca i32,<br>
   // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,<br>
   // CHECK: [[ZERO_ADDR:%.+]] = alloca i32,<br>
-  // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*<br>
   // CHECK: store i32 0, i32* [[ZERO_ADDR]]<br>
+  // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*<br>
+  // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])<br>
+  // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0<br>
   // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()<br>
   // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0<br>
   // CHECK: br i1 [[IS_SPMD]], label<br>
   // CHECK: br label<br>
-  // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)<br>
+  // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}} 128<br>
+  // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0)<br>
   // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*<br>
   // CHECK: br label<br>
   // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[REC_ADDR]], {{.+}} ]<br>
+  // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to [[SEC_GLOBAL_ST:%.+]]*<br>
   // CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0<br>
   // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()<br>
   // CHECK: [[LID:%.+]] = and i32 [[TID]], 31<br>
-  // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]<br>
+  // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]<br>
+  // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0<br>
+  // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32* [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]]<br>
   // CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], i32* [[GLOBAL_F_PTR]]<br>
   // CHECK: store i32 %{{.+}}, i32* [[F_PTR]],<br>
<br>
<br>
<br>
_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@lists.llvm.org" target="_blank">cfe-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits</a><br>
</blockquote>
</div>
</div>
</blockquote>
</body>
</html>