[llvm-commits] CVS: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Chris Lattner lattner at cs.uiuc.edu
Mon Aug 8 17:18:20 PDT 2005



Changes in directory llvm/lib/Transforms/Scalar:

LoopStrengthReduce.cpp updated: 1.42 -> 1.43
---
Log message:

Implement: LoopStrengthReduce/share_ivs.ll

Two changes:
  * Only insert one PHI node for each stride.  Other values are live in 
    values.  This cannot introduce higher register pressure than the 
    previous approach, and can take advantage of reg+reg addressing modes.
  * Factor common base values out of uses before moving values from the
    base to the immediate fields.  This improves codegen by starting the
    stride-specific PHI node out at a common place for each IV use.

As an example, we used to generate this for a loop in swim:

.LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_2:        ; no_exit.7.i
        lfd f0, 0(r8)
        stfd f0, 0(r3)
        lfd f0, 0(r6)
        stfd f0, 0(r7)
        lfd f0, 0(r2)
        stfd f0, 0(r5)
        addi r9, r9, 1
        addi r2, r2, 8
        addi r5, r5, 8
        addi r6, r6, 8
        addi r7, r7, 8
        addi r8, r8, 8
        addi r3, r3, 8
        cmpw cr0, r9, r4
        bgt .LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_1

now we emit:

.LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_2:        ; no_exit.7.i
        lfdx f0, r8, r2
        stfdx f0, r9, r2
        lfdx f0, r5, r2
        stfdx f0, r7, r2
        lfdx f0, r3, r2
        stfdx f0, r6, r2
        addi r10, r10, 1
        addi r2, r2, 8
        cmpw cr0, r10, r4
        bgt .LBB_main_no_exit_2E_6_2E_i_no_exit_2E_7_2E_i_1

As another more dramatic example, we used to emit this:

.LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_2:       ; no_exit.1.i19
        lfd f0, 8(r21)
        lfd f4, 8(r3)
        lfd f5, 8(r27)
        lfd f6, 8(r22)
        lfd f7, 8(r5)
        lfd f8, 8(r6)
        lfd f9, 8(r30)
        lfd f10, 8(r11)
        lfd f11, 8(r12)
        fsub f10, f10, f11
        fadd f5, f4, f5
        fmul f5, f5, f1
        fadd f6, f6, f7
        fadd f6, f6, f8
        fadd f6, f6, f9
        fmadd f0, f5, f6, f0
        fnmsub f0, f10, f2, f0
        stfd f0, 8(r4)
        lfd f0, 8(r25)
        lfd f5, 8(r26)
        lfd f6, 8(r23)
        lfd f9, 8(r28)
        lfd f10, 8(r10)
        lfd f12, 8(r9)
        lfd f13, 8(r29)
        fsub f11, f13, f11
        fadd f4, f4, f5
        fmul f4, f4, f1
        fadd f5, f6, f9
        fadd f5, f5, f10
        fadd f5, f5, f12
        fnmsub f0, f4, f5, f0
        fnmsub f0, f11, f3, f0
        stfd f0, 8(r24)
        lfd f0, 8(r8)
        fsub f4, f7, f8
        fsub f5, f12, f10
        fnmsub f0, f5, f2, f0
        fnmsub f0, f4, f3, f0
        stfd f0, 8(r2)
        addi r20, r20, 1
        addi r2, r2, 8
        addi r8, r8, 8
        addi r10, r10, 8
        addi r12, r12, 8
        addi r6, r6, 8
        addi r29, r29, 8
        addi r28, r28, 8
        addi r26, r26, 8
        addi r25, r25, 8
        addi r24, r24, 8
        addi r5, r5, 8
        addi r23, r23, 8
        addi r22, r22, 8
        addi r3, r3, 8
        addi r9, r9, 8
        addi r11, r11, 8
        addi r30, r30, 8
        addi r27, r27, 8
        addi r21, r21, 8
        addi r4, r4, 8
        cmpw cr0, r20, r7
        bgt .LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_1

we now emit:

.LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_2:       ; no_exit.1.i19
        lfdx f0, r21, r20
        lfdx f4, r3, r20
        lfdx f5, r27, r20
        lfdx f6, r22, r20
        lfdx f7, r5, r20
        lfdx f8, r6, r20
        lfdx f9, r30, r20
        lfdx f10, r11, r20
        lfdx f11, r12, r20
        fsub f10, f10, f11
        fadd f5, f4, f5
        fmul f5, f5, f1
        fadd f6, f6, f7
        fadd f6, f6, f8
        fadd f6, f6, f9
        fmadd f0, f5, f6, f0
        fnmsub f0, f10, f2, f0
        stfdx f0, r4, r20
        lfdx f0, r25, r20
        lfdx f5, r26, r20
        lfdx f6, r23, r20
        lfdx f9, r28, r20
        lfdx f10, r10, r20
        lfdx f12, r9, r20
        lfdx f13, r29, r20
        fsub f11, f13, f11
        fadd f4, f4, f5
        fmul f4, f4, f1
        fadd f5, f6, f9
        fadd f5, f5, f10
        fadd f5, f5, f12
        fnmsub f0, f4, f5, f0
        fnmsub f0, f11, f3, f0
        stfdx f0, r24, r20
        lfdx f0, r8, r20
        fsub f4, f7, f8
        fsub f5, f12, f10
        fnmsub f0, f5, f2, f0
        fnmsub f0, f4, f3, f0
        stfdx f0, r2, r20
        addi r19, r19, 1
        addi r20, r20, 8
        cmpw cr0, r19, r7
        bgt .LBB_main_L_90_no_exit_2E_0_2E_i16_no_exit_2E_1_2E_i19_1





---
Diffs of the changes:  (+153 -53)

 LoopStrengthReduce.cpp |  206 ++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 153 insertions(+), 53 deletions(-)


Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff -u llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp:1.42 llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp:1.43
--- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp:1.42	Mon Aug  8 17:56:21 2005
+++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp	Mon Aug  8 19:18:09 2005
@@ -382,7 +382,8 @@
     // Once we rewrite the code to insert the new IVs we want, update the
     // operands of Inst to use the new expression 'NewBase', with 'Imm' added
     // to it.
-    void RewriteInstructionToUseNewBase(Value *NewBase, SCEVExpander &Rewriter);
+    void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
+                                        SCEVExpander &Rewriter);
 
     // Sort by the Base field.
     bool operator<(const BasedUser &BU) const { return Base < BU.Base; }
@@ -403,10 +404,10 @@
 // Once we rewrite the code to insert the new IVs we want, update the
 // operands of Inst to use the new expression 'NewBase', with 'Imm' added
 // to it.
-void BasedUser::RewriteInstructionToUseNewBase(Value *NewBase,
+void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
                                                SCEVExpander &Rewriter) {
   if (!isa<PHINode>(Inst)) {
-    SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewBase), Imm);
+    SCEVHandle NewValSCEV = SCEVAddExpr::get(NewBase, Imm);
     Value *NewVal = Rewriter.expandCodeFor(NewValSCEV, Inst,
                                            OperandValToReplace->getType());
     
@@ -426,7 +427,7 @@
       // Insert the code into the end of the predecessor block.
       BasicBlock::iterator InsertPt = PN->getIncomingBlock(i)->getTerminator();
       
-      SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewBase), Imm);
+      SCEVHandle NewValSCEV = SCEVAddExpr::get(NewBase, Imm);
       Value *NewVal = Rewriter.expandCodeFor(NewValSCEV, InsertPt,
                                              OperandValToReplace->getType());
       
@@ -552,6 +553,73 @@
   // Otherwise, no immediates to move.
 }
 
+/// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases,
+/// removing any common subexpressions from it.  Anything truly common is
+/// removed, accumulated, and returned.  This looks for things like (a+b+c) and
+/// (a+c+d) -> (a+c).  The common expression is *removed* from the Bases.
+static SCEVHandle 
+RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses) {
+  unsigned NumUses = Uses.size();
+
+  // Only one use?  Use its base, regardless of what it is!
+  SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Uses[0].Base->getType());
+  SCEVHandle Result = Zero;
+  if (NumUses == 1) {
+    std::swap(Result, Uses[0].Base);
+    return Result;
+  }
+
+  // To find common subexpressions, count how many of Uses use each expression.
+  // If any subexpressions are used Uses.size() times, they are common.
+  std::map<SCEVHandle, unsigned> SubExpressionUseCounts;
+  
+  for (unsigned i = 0; i != NumUses; ++i)
+    if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Uses[i].Base)) {
+      for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
+        SubExpressionUseCounts[AE->getOperand(j)]++;
+    } else {
+      // If the base is zero (which is common), return zero now, there are no
+      // CSEs we can find.
+      if (Uses[i].Base == Zero) return Result;
+      SubExpressionUseCounts[Uses[i].Base]++;
+    }
+  
+  // Now that we know how many times each is used, build Result.
+  for (std::map<SCEVHandle, unsigned>::iterator I =
+       SubExpressionUseCounts.begin(), E = SubExpressionUseCounts.end();
+       I != E; )
+    if (I->second == NumUses) {  // Found CSE!
+      Result = SCEVAddExpr::get(Result, I->first);
+      ++I;
+    } else {
+      // Remove non-cse's from SubExpressionUseCounts.
+      SubExpressionUseCounts.erase(I++);
+    }
+  
+  // If we found no CSE's, return now.
+  if (Result == Zero) return Result;
+  
+  // Otherwise, remove all of the CSE's we found from each of the base values.
+  for (unsigned i = 0; i != NumUses; ++i)
+    if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Uses[i].Base)) {
+      std::vector<SCEVHandle> NewOps;
+      
+      // Remove all of the values that are now in SubExpressionUseCounts.
+      for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
+        if (!SubExpressionUseCounts.count(AE->getOperand(j)))
+          NewOps.push_back(AE->getOperand(j));
+      Uses[i].Base = SCEVAddExpr::get(NewOps);
+    } else {
+      // If the base is zero (which is common), return zero now, there are no
+      // CSEs we can find.
+      assert(Uses[i].Base == Result);
+      Uses[i].Base = Zero;
+    }
+ 
+  return Result;
+}
+
+
 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
 /// stride of IV.  All of the users may have different starting values, and this
 /// may not be the only stride (we know it is if isOnlyStride is true).
@@ -578,25 +646,19 @@
            "Base value is not loop invariant!");
   }
   
-  SCEVExpander Rewriter(*SE, *LI);
-  SCEVExpander PreheaderRewriter(*SE, *LI);
-
-  BasicBlock  *Preheader = L->getLoopPreheader();
-  Instruction *PreInsertPt = Preheader->getTerminator();
-  Instruction *PhiInsertBefore = L->getHeader()->begin();
-
-  assert(isa<PHINode>(PhiInsertBefore) &&
-         "How could this loop have IV's without any phis?");
-  PHINode *SomeLoopPHI = cast<PHINode>(PhiInsertBefore);
-  assert(SomeLoopPHI->getNumIncomingValues() == 2 &&
-         "This loop isn't canonicalized right");
-  BasicBlock *LatchBlock =
-   SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader);
-
-
+  // We now have a whole bunch of uses of like-strided induction variables, but
+  // they might all have different bases.  We want to emit one PHI node for this
+  // stride which we fold as many common expressions (between the IVs) into as
+  // possible.  Start by identifying the common expressions in the base values 
+  // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
+  // "A+B"), emit it to the preheader, then remove the expression from the
+  // UsersToProcess base values.
+  SCEVHandle CommonExprs = RemoveCommonExpressionsFromUseBases(UsersToProcess);
+  
   // Next, figure out what we can represent in the immediate fields of
   // instructions.  If we can represent anything there, move it to the imm
-  // fields of the BasedUsers.
+  // fields of the BasedUsers.  We do this so that it increases the commonality
+  // of the remaining uses.
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
     // Addressing modes can be folded into loads and stores.  Be careful that
     // the store is through the expression, not of the expression though.
@@ -609,59 +671,95 @@
                         isAddress, L);
   }
  
+  // Now that we know what we need to do, insert the PHI node itself.
+  //
+  DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE "
+        << *CommonExprs << " :\n");
+    
+  SCEVExpander Rewriter(*SE, *LI);
+  SCEVExpander PreheaderRewriter(*SE, *LI);
   
+  BasicBlock  *Preheader = L->getLoopPreheader();
+  Instruction *PreInsertPt = Preheader->getTerminator();
+  Instruction *PhiInsertBefore = L->getHeader()->begin();
   
-  DEBUG(std::cerr << "INSERTING IVs of STRIDE " << *Stride << ":\n");
+  assert(isa<PHINode>(PhiInsertBefore) &&
+         "How could this loop have IV's without any phis?");
+  PHINode *SomeLoopPHI = cast<PHINode>(PhiInsertBefore);
+  assert(SomeLoopPHI->getNumIncomingValues() == 2 &&
+         "This loop isn't canonicalized right");
+  BasicBlock *LatchBlock =
+    SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader);
   
+  // Create a new Phi for this base, and stick it in the loop header.
+  const Type *ReplacedTy = CommonExprs->getType();
+  PHINode *NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
+  ++NumInserted;
+  
+  // Emit the initial base value into the loop preheader, and add it to the
+  // Phi node.
+  Value *PHIBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt,
+                                                    ReplacedTy);
+  NewPHI->addIncoming(PHIBaseV, Preheader);
+  
+  // Emit the increment of the base value before the terminator of the loop
+  // latch block, and add it to the Phi node.
+  SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
+                                       SCEVUnknown::get(Stride));
+  
+  Value *IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(),
+                                       ReplacedTy);
+  IncV->setName(NewPHI->getName()+".inc");
+  NewPHI->addIncoming(IncV, LatchBlock);
+
   // Sort by the base value, so that all IVs with identical bases are next to
-  // each other.  
+  // each other.
   std::sort(UsersToProcess.begin(), UsersToProcess.end());
   while (!UsersToProcess.empty()) {
     SCEVHandle Base = UsersToProcess.front().Base;
 
-    DEBUG(std::cerr << "  INSERTING PHI with BASE = " << *Base << ":\n");
+    DEBUG(std::cerr << "  INSERTING code for BASE = " << *Base << ":\n");
    
-    // Create a new Phi for this base, and stick it in the loop header.
-    const Type *ReplacedTy = Base->getType();
-    PHINode *NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
-    ++NumInserted;
-
-    // Emit the initial base value into the loop preheader, and add it to the
-    // Phi node.
+    // Emit the code for Base into the preheader.
     Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt,
                                                    ReplacedTy);
-    NewPHI->addIncoming(BaseV, Preheader);
-
-    // Emit the increment of the base value before the terminator of the loop
-    // latch block, and add it to the Phi node.
-    SCEVHandle Inc = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
-                                      SCEVUnknown::get(Stride));
-
-    Value *IncV = Rewriter.expandCodeFor(Inc, LatchBlock->getTerminator(),
-                                         ReplacedTy);
-    IncV->setName(NewPHI->getName()+".inc");
-    NewPHI->addIncoming(IncV, LatchBlock);
-
+    
+    // If BaseV is a constant other than 0, make sure that it gets inserted into
+    // the preheader, instead of being forward substituted into the uses.  We do
+    // this by forcing a noop cast to be inserted into the preheader in this
+    // case.
+    if (Constant *C = dyn_cast<Constant>(BaseV))
+      if (!C->isNullValue()) {
+        // We want this constant emitted into the preheader!
+        BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert",
+                             PreInsertPt);       
+      }
+    
     // Emit the code to add the immediate offset to the Phi value, just before
     // the instructions that we identified as using this stride and base.
     while (!UsersToProcess.empty() && UsersToProcess.front().Base == Base) {
       BasedUser &User = UsersToProcess.front();
 
-      // Clear the SCEVExpander's expression map so that we are guaranteed
-      // to have the code emitted where we expect it.
-      Rewriter.clear();
-      
-      // Now that we know what we need to do, insert code before User for the
-      // immediate and any loop-variant expressions.
-      Value *NewBase = NewPHI;
-
       // If this instruction wants to use the post-incremented value, move it
       // after the post-inc and use its value instead of the PHI.
+      Value *RewriteOp = NewPHI;
       if (User.isUseOfPostIncrementedValue) {
-        NewBase = IncV;
+        RewriteOp = IncV;
         User.Inst->moveBefore(LatchBlock->getTerminator());
       }
-      User.RewriteInstructionToUseNewBase(NewBase, Rewriter);
+      SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp);
+
+      // Clear the SCEVExpander's expression map so that we are guaranteed
+      // to have the code emitted where we expect it.
+      Rewriter.clear();
+     
+      // Now that we know what we need to do, insert code before User for the
+      // immediate and any loop-variant expressions.
+      if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue())
+        // Add BaseV to the PHI value if needed.
+        RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV));
+      
+      User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter);
 
       // Mark old value we replaced as possibly dead, so that it is elminated
       // if we just replaced the last use of that value.
@@ -782,6 +880,8 @@
   // If we only have one stride, we can more aggressively eliminate some things.
   bool HasOneStride = IVUsesByStride.size() == 1;
 
+  // Note: this processes each stride/type pair individually.  All users passed
+  // into StrengthReduceStridedIVUsers have the same type AND stride.
   for (std::map<Value*, IVUsersOfOneStride>::iterator SI
         = IVUsesByStride.begin(), E = IVUsesByStride.end(); SI != E; ++SI)
     StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);






More information about the llvm-commits mailing list