[polly] r246414 - Add support for scalar dependences to OpenMP code generation

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 30 22:52:24 PDT 2015


Author: grosser
Date: Mon Aug 31 00:52:24 2015
New Revision: 246414

URL: http://llvm.org/viewvc/llvm-project?rev=246414&view=rev
Log:
Add support for scalar dependences to OpenMP code generation

Scalar dependences between scop statements have caused troubles during parallel
code generation as we did not pass on the new stack allocation created for such
scalars to the parallel subfunctions. This change now detects all scalar
reads/writes in parallel subfunctions, creates the allocas for these scalar
objects, passes the resulting memory locations to the subfunctions and ensures
that within the subfunction requests for these memory locations will return the
rewritten values.

Johannes suggested as a future optimization to privatizing some of the scalars
in the subfunction.

Modified:
    polly/trunk/include/polly/CodeGen/BlockGenerators.h
    polly/trunk/lib/CodeGen/BlockGenerators.cpp
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp

Modified: polly/trunk/include/polly/CodeGen/BlockGenerators.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/BlockGenerators.h?rev=246414&r1=246413&r2=246414&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/BlockGenerators.h (original)
+++ polly/trunk/include/polly/CodeGen/BlockGenerators.h Mon Aug 31 00:52:24 2015
@@ -120,27 +120,42 @@ public:
   /// If no alloca was mapped to @p ScalarBase a new one is created.
   ///
   /// @param ScalarBase The demoted scalar value.
-  ///
-  /// @returns The alloca for @p ScalarBase
-  AllocaInst *getOrCreateScalarAlloca(Value *ScalarBase);
+  /// @param GlobalMap  A mapping from Allocas to other memory locations that
+  ///                   can be used to replace the original alloca locations
+  ///                   with new memory locations, e.g. when passing values to
+  ///                   subfunctions while offloading parallel sections.
+  ///
+  /// @returns The alloca for @p ScalarBase or a replacement value taken from
+  ///          GlobalMap.
+  Value *getOrCreateScalarAlloca(Value *ScalarBase, ValueMapT *GlobalMap);
 
   /// @brief Return the PHi-node alloca for @p ScalarBase
   ///
   /// If no alloca was mapped to @p ScalarBase a new one is created.
   ///
   /// @param ScalarBase The demoted scalar value.
-  ///
-  /// @returns The alloca for @p ScalarBase
-  AllocaInst *getOrCreatePHIAlloca(Value *ScalarBase);
+  /// @param GlobalMap  A mapping from Allocas to other memory locations that
+  ///                   can be used to replace the original alloca locations
+  ///                   with new memory locations, e.g. when passing values to
+  ///                   subfunctions while offloading parallel sections.
+  ///
+  /// @returns The alloca for @p ScalarBase or a replacement value taken from
+  ///          GlobalMap.
+  Value *getOrCreatePHIAlloca(Value *ScalarBase, ValueMapT *GlobalMap);
 
   /// @brief Return the alloca for @p Access
   ///
   /// If no alloca was mapped for @p Access a new one is created.
   ///
-  /// @param Access The memory access for which to generate the alloca
-  ///
-  /// @returns The alloca for @p Access
-  AllocaInst *getOrCreateAlloca(MemoryAccess &Access);
+  /// @param Access    The memory access for which to generate the alloca
+  /// @param GlobalMap A mapping from Allocas to other memory locations that
+  ///                  can be used to replace the original alloca locations with
+  ///                  new memory locations, e.g. when passing values to
+  ///                  subfunctions while offloading parallel sections.
+  ///
+  /// @returns The alloca for @p Access or a replacement value taken from
+  ///          GlobalMap.
+  Value *getOrCreateAlloca(MemoryAccess &Access, ValueMapT *GlobalMap);
 
   /// @brief Finalize the code generation for the SCoP @p S.
   ///
@@ -348,10 +363,15 @@ protected:
   /// @param ScalarBase The demoted scalar value.
   /// @param Map        The map we should look for a mapped alloca value.
   /// @param NameExt    The suffix we add to the name of a new created alloca.
-  ///
-  /// @returns The alloca for @p ScalarBase in @p Map.
-  AllocaInst *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map,
-                                const char *NameExt);
+  /// @param GlobalMap  A mapping from Allocas to other memory locations that
+  ///                   can be used to replace the original alloca locations
+  ///                   with new memory locations, e.g. when passing values to
+  ///                   subfunctions while offloading parallel sections.
+  ///
+  /// @returns The alloca for @p ScalarBase or a replacement value taken from
+  ///          GlobalMap.
+  Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map,
+                           ValueMapT *GlobalMap, const char *NameExt);
 
   /// @brief Generate reload of scalars demoted to memory and needed by @p Inst.
   ///
@@ -359,7 +379,7 @@ protected:
   /// @param Inst  The instruction that might need reloaded values.
   /// @param BBMap A mapping from old values to their new values in this block.
   virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst,
-                                   ValueMapT &BBMap);
+                                   ValueMapT &BBMap, ValueMapT &GlobalMap);
 
   /// @brief Generate the scalar stores for the given statement.
   ///
@@ -376,10 +396,15 @@ protected:
 
   /// @brief Handle users of @p Inst outside the SCoP.
   ///
-  /// @param R        The current SCoP region.
-  /// @param Inst     The current instruction we check.
-  /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP.
-  void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy);
+  /// @param R         The current SCoP region.
+  /// @param GlobalMap A mapping from old values to their new values
+  ///                  (for values recalculated in the new ScoP, but not
+  ///                  within this basic block).
+  /// @param Inst      The current instruction we check.
+  /// @param InstCopy  The copy of the instruction @p Inst in the optimized
+  ///                  SCoP.
+  void handleOutsideUsers(const Region &R, ValueMapT &GlobalMap,
+                          Instruction *Inst, Value *InstCopy);
 
   /// @brief Initialize the memory of demoted scalars.
   ///
@@ -449,6 +474,9 @@ protected:
                             LoopToScevMapT &LTS,
                             isl_id_to_ast_expr *NewAccesses);
 
+  /// @param GlobalMap   A mapping from old values to their new values
+  ///                    (for values recalculated in the new ScoP, but not
+  ///                    within this basic block).
   /// @param NewAccesses A map from memory access ids to new ast expressions,
   ///                    which may contain new access expressions for certain
   ///                    memory accesses.
@@ -752,7 +780,8 @@ private:
   /// @param Inst  The instruction that might need reloaded values.
   /// @param BBMap A mapping from old values to their new values in this block.
   virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst,
-                                   ValueMapT &BBMap) override;
+                                   ValueMapT &BBMap,
+                                   ValueMapT &GlobalMap) override;
 
   /// @brief Generate the scalar stores for the given statement.
   ///

Modified: polly/trunk/lib/CodeGen/BlockGenerators.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/BlockGenerators.cpp?rev=246414&r1=246413&r2=246414&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/BlockGenerators.cpp (original)
+++ polly/trunk/lib/CodeGen/BlockGenerators.cpp Mon Aug 31 00:52:24 2015
@@ -234,7 +234,7 @@ void BlockGenerator::copyInstruction(Sco
                                      isl_id_to_ast_expr *NewAccesses) {
 
   // First check for possible scalar dependences for this instruction.
-  generateScalarLoads(Stmt, Inst, BBMap);
+  generateScalarLoads(Stmt, Inst, BBMap, GlobalMap);
 
   // Terminator instructions control the control flow. They are explicitly
   // expressed in the clast and do not need to be copied.
@@ -321,12 +321,13 @@ void BlockGenerator::copyBB(ScopStmt &St
 
   const Region &R = Stmt.getParent()->getRegion();
   for (Instruction &Inst : *BB)
-    handleOutsideUsers(R, &Inst, BBMap[&Inst]);
+    handleOutsideUsers(R, GlobalMap, &Inst, BBMap[&Inst]);
 }
 
-AllocaInst *BlockGenerator::getOrCreateAlloca(Value *ScalarBase,
-                                              ScalarAllocaMapTy &Map,
-                                              const char *NameExt) {
+Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase,
+                                         ScalarAllocaMapTy &Map,
+                                         ValueMapT *GlobalMap,
+                                         const char *NameExt) {
   // Check if an alloca was cached for the base instruction.
   AllocaInst *&Addr = Map[ScalarBase];
 
@@ -334,29 +335,36 @@ AllocaInst *BlockGenerator::getOrCreateA
   if (!Addr) {
     auto *Ty = ScalarBase->getType();
     Addr = new AllocaInst(Ty, ScalarBase->getName() + NameExt);
+    EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
     Addr->insertBefore(EntryBB->getFirstInsertionPt());
   }
 
+  if (GlobalMap && GlobalMap->count(Addr))
+    return (*GlobalMap)[Addr];
+
   return Addr;
 }
 
-AllocaInst *BlockGenerator::getOrCreateAlloca(MemoryAccess &Access) {
+Value *BlockGenerator::getOrCreateAlloca(MemoryAccess &Access,
+                                         ValueMapT *GlobalMap) {
   if (Access.getScopArrayInfo()->isPHI())
-    return getOrCreatePHIAlloca(Access.getBaseAddr());
+    return getOrCreatePHIAlloca(Access.getBaseAddr(), GlobalMap);
   else
-    return getOrCreateScalarAlloca(Access.getBaseAddr());
+    return getOrCreateScalarAlloca(Access.getBaseAddr(), GlobalMap);
 }
 
-AllocaInst *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase) {
-  return getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
+Value *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase,
+                                               ValueMapT *GlobalMap) {
+  return getOrCreateAlloca(ScalarBase, ScalarMap, GlobalMap, ".s2a");
 }
 
-AllocaInst *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase) {
-  return getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops");
+Value *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase,
+                                            ValueMapT *GlobalMap) {
+  return getOrCreateAlloca(ScalarBase, PHIOpMap, GlobalMap, ".phiops");
 }
 
-void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst,
-                                        Value *InstCopy) {
+void BlockGenerator::handleOutsideUsers(const Region &R, ValueMapT &GlobalMap,
+                                        Instruction *Inst, Value *InstCopy) {
   // If there are escape users we get the alloca for this instruction and put it
   // in the EscapeMap for later finalization. Lastly, if the instruction was
   // copied multiple times we already did this and can exit.
@@ -382,7 +390,8 @@ void BlockGenerator::handleOutsideUsers(
     return;
 
   // Get or create an escape alloca for this instruction.
-  AllocaInst *ScalarAddr = getOrCreateScalarAlloca(Inst);
+  auto *ScalarAddr =
+      cast<AllocaInst>(getOrCreateScalarAlloca(Inst, &GlobalMap));
 
   // Remember that this instruction has escape uses and the escape alloca.
   EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
@@ -390,7 +399,8 @@ void BlockGenerator::handleOutsideUsers(
 
 void BlockGenerator::generateScalarLoads(ScopStmt &Stmt,
                                          const Instruction *Inst,
-                                         ValueMapT &BBMap) {
+                                         ValueMapT &BBMap,
+                                         ValueMapT &GlobalMap) {
   auto *MAL = Stmt.lookupAccessesFor(Inst);
 
   if (!MAL)
@@ -400,7 +410,7 @@ void BlockGenerator::generateScalarLoads
     if (!MA.isScalar() || !MA.isRead())
       continue;
 
-    auto *Address = getOrCreateAlloca(MA);
+    auto *Address = getOrCreateAlloca(MA, &GlobalMap);
     BBMap[MA.getBaseAddr()] =
         Builder.CreateLoad(Address, Address->getName() + ".reload");
   }
@@ -437,7 +447,7 @@ Value *BlockGenerator::getNewScalarValue
     return /* Case (3a) */ ScalarValueCopy;
 
   // Case (3b)
-  Value *Address = getOrCreateScalarAlloca(ScalarValueInst);
+  Value *Address = getOrCreateScalarAlloca(ScalarValueInst, &GlobalMap);
   ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload");
 
   return ScalarValue;
@@ -457,7 +467,7 @@ void BlockGenerator::generateScalarStore
       continue;
 
     Value *Val = MA->getAccessValue();
-    auto *Address = getOrCreateAlloca(*MA);
+    auto *Address = getOrCreateAlloca(*MA, &GlobalMap);
 
     Val = getNewScalarValue(Val, R, BBMap, GlobalMap);
     Builder.CreateStore(Val, Address);
@@ -500,7 +510,7 @@ void BlockGenerator::createScalarInitial
 
       Value *ScalarValue = PHI->getIncomingValue(Idx);
 
-      Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI));
+      Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI, nullptr));
       continue;
     }
 
@@ -509,8 +519,9 @@ void BlockGenerator::createScalarInitial
     if (Inst && R.contains(Inst))
       continue;
 
+    ValueMapT EmptyMap;
     Builder.CreateStore(Array->getBasePtr(),
-                        getOrCreateScalarAlloca(Array->getBasePtr()));
+                        getOrCreateScalarAlloca(Array->getBasePtr(), nullptr));
   }
 }
 
@@ -532,7 +543,7 @@ void BlockGenerator::createScalarFinaliz
     Instruction *EscapeInst = EscapeMapping.getFirst();
     const auto &EscapeMappingValue = EscapeMapping.getSecond();
     const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
-    AllocaInst *ScalarAddr = EscapeMappingValue.first;
+    Value *ScalarAddr = EscapeMappingValue.first;
 
     // Reload the demoted instruction in the optimized version of the SCoP.
     Instruction *EscapeInstReload =
@@ -1068,7 +1079,8 @@ void RegionGenerator::copyStmt(ScopStmt
 
 void RegionGenerator::generateScalarLoads(ScopStmt &Stmt,
                                           const Instruction *Inst,
-                                          ValueMapT &BBMap) {
+                                          ValueMapT &BBMap,
+                                          ValueMapT &GlobalMap) {
 
   // Inside a non-affine region PHI nodes are copied not demoted. Once the
   // phi is copied it will reload all inputs from outside the region, hence
@@ -1077,7 +1089,7 @@ void RegionGenerator::generateScalarLoad
   if (isa<PHINode>(Inst))
     return;
 
-  return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap);
+  return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap, GlobalMap);
 }
 
 void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
@@ -1102,7 +1114,7 @@ void RegionGenerator::generateScalarStor
 
     Value *Val = MA->getAccessValue();
 
-    auto Address = getOrCreateAlloca(*MA);
+    auto Address = getOrCreateAlloca(*MA, &GlobalMap);
 
     Val = getNewScalarValue(Val, R, BBMap, GlobalMap);
     Builder.CreateStore(Val, Address);
@@ -1139,7 +1151,8 @@ void RegionGenerator::addOperandToPHI(Sc
     if (PHICopy->getBasicBlockIndex(BBCopy) >= 0)
       return;
 
-    AllocaInst *PHIOpAddr = getOrCreatePHIAlloca(const_cast<PHINode *>(PHI));
+    Value *PHIOpAddr =
+        getOrCreatePHIAlloca(const_cast<PHINode *>(PHI), &GlobalMap);
     OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload",
                           BlockMap[IncomingBB]->getTerminator());
   }

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=246414&r1=246413&r2=246414&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Mon Aug 31 00:52:24 2015
@@ -178,6 +178,7 @@ struct FindValuesUser {
   Region &R;
   SetVector<Value *> &Values;
   SetVector<const SCEV *> &SCEVs;
+  BlockGenerator &BlockGen;
 };
 
 /// @brief Extract the values and SCEVs needed to generate code for a block.
@@ -192,12 +193,6 @@ static int findValuesInBlock(struct Find
               User.SE.getSCEVAtScope(OpInst, User.LI.getLoopFor(BB)));
           continue;
         }
-      if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal))
-        if (Stmt->getParent()->getRegion().contains(OpInst))
-          continue;
-
-      if (isa<Instruction>(SrcVal) || isa<Argument>(SrcVal))
-        User.Values.insert(SrcVal);
     }
   }
   return 0;
@@ -222,6 +217,20 @@ static isl_stat findValuesInStmt(isl_set
       findValuesInBlock(User, Stmt, BB);
   }
 
+  for (auto &Access : *Stmt) {
+    if (!Access->isScalar()) {
+      auto *BasePtr = Access->getScopArrayInfo()->getBasePtr();
+      if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
+        if (Stmt->getParent()->getRegion().contains(OpInst))
+          continue;
+
+      User.Values.insert(BasePtr);
+      continue;
+    }
+
+    User.Values.insert(User.BlockGen.getOrCreateAlloca(*Access, nullptr));
+  }
+
   isl_id_free(Id);
   isl_set_free(Set);
   return isl_stat_ok;
@@ -232,7 +241,8 @@ void IslNodeBuilder::getReferencesInSubt
                                             SetVector<const Loop *> &Loops) {
 
   SetVector<const SCEV *> SCEVs;
-  struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs};
+  struct FindValuesUser FindValues = {LI,     SE,    S.getRegion(),
+                                      Values, SCEVs, getBlockGenerator()};
 
   for (const auto &I : IDToValue)
     Values.insert(I.second);




More information about the llvm-commits mailing list