[llvm-branch-commits] [llvm-branch] r91646 - in /llvm/branches/Apple/Zoidberg: lib/CodeGen/LiveIntervalAnalysis.cpp lib/CodeGen/PHIElimination.cpp lib/CodeGen/PHIElimination.h test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll

Jakob Stoklund Olesen stoklund at 2pi.dk
Thu Dec 17 16:31:34 PST 2009


Author: stoklund
Date: Thu Dec 17 18:31:34 2009
New Revision: 91646

URL: http://llvm.org/viewvc/llvm-project?rev=91646&view=rev
Log:
Merge 91549 91642 Interned PHI node reuse

Added:
    llvm/branches/Apple/Zoidberg/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
Modified:
    llvm/branches/Apple/Zoidberg/lib/CodeGen/LiveIntervalAnalysis.cpp
    llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.cpp
    llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.h

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/LiveIntervalAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=91646&r1=91645&r2=91646&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/LiveIntervalAnalysis.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/LiveIntervalAnalysis.cpp Thu Dec 17 18:31:34 2009
@@ -415,19 +415,32 @@
       // first redefinition of the vreg that we have seen, go back and change
       // the live range in the PHI block to be a different value number.
       if (interval.containsOneValue()) {
-        // Remove the old range that we now know has an incorrect number.
+
         VNInfo *VNI = interval.getValNumInfo(0);
-        MachineInstr *Killer = vi.Kills[0];
-        SlotIndex Start = getMBBStartIdx(Killer->getParent());
-        SlotIndex End = getInstructionIndex(Killer).getDefIndex();
-        DEBUG({
-            errs() << " Removing [" << Start << "," << End << "] from: ";
-            interval.print(errs(), tri_);
-            errs() << "\n";
-          });
-        interval.removeRange(Start, End);        
-        assert(interval.ranges.size() == 1 &&
-               "Newly discovered PHI interval has >1 ranges.");
+        // Phi elimination may have reused the register for multiple identical
+        // phi nodes. There will be a kill per phi. Remove the old ranges that
+        // we now know have an incorrect number.
+        for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) {
+          MachineInstr *Killer = vi.Kills[ki];
+          SlotIndex Start = getMBBStartIdx(Killer->getParent());
+          SlotIndex End = getInstructionIndex(Killer).getDefIndex();
+          DEBUG({
+              errs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
+              interval.print(errs(), tri_);
+            });
+          interval.removeRange(Start, End);
+
+          // Replace the interval with one of a NEW value number.  Note that
+          // this value number isn't actually defined by an instruction, weird
+          // huh? :)
+          LiveRange LR(Start, End,
+                       interval.getNextValue(SlotIndex(Start, true),
+                                             0, false, VNInfoAllocator));
+          LR.valno->setIsPHIDef(true);
+          interval.addRange(LR);
+          LR.valno->addKill(End);
+        }
+
         MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex());
         VNI->addKill(indexes_->getTerminatorGap(killMBB));
         VNI->setHasPHIKill(true);
@@ -435,20 +448,6 @@
             errs() << " RESULT: ";
             interval.print(errs(), tri_);
           });
-
-        // Replace the interval with one of a NEW value number.  Note that this
-        // value number isn't actually defined by an instruction, weird huh? :)
-        LiveRange LR(Start, End,
-                     interval.getNextValue(SlotIndex(getMBBStartIdx(mbb), true),
-                       0, false, VNInfoAllocator));
-        LR.valno->setIsPHIDef(true);
-        DEBUG(errs() << " replace range with " << LR);
-        interval.addRange(LR);
-        LR.valno->addKill(End);
-        DEBUG({
-            errs() << " RESULT: ";
-            interval.print(errs(), tri_);
-          });
       }
 
       // In the case of PHI elimination, each variable definition is only

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.cpp?rev=91646&r1=91645&r2=91646&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.cpp Thu Dec 17 18:31:34 2009
@@ -35,6 +35,7 @@
 
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
 STATISTIC(NumSplits, "Number of critical edges split on demand");
+STATISTIC(NumReused, "Number of reused lowered phis");
 
 char PHIElimination::ID = 0;
 static RegisterPass<PHIElimination>
@@ -78,6 +79,12 @@
       DefMI->eraseFromParent();
   }
 
+  // Clean up the lowered PHI instructions.
+  for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+       I != E; ++I)
+    Fn.DeleteMachineInstr(I->first);
+  LoweredPHIs.clear();
+
   ImpDefs.clear();
   VRegPHIUseCount.clear();
   return Changed;
@@ -168,6 +175,7 @@
 void llvm::PHIElimination::LowerAtomicPHINode(
                                       MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator AfterPHIsIt) {
+  ++NumAtomic;
   // Unlink the PHI node from the basic block, but don't delete the PHI yet.
   MachineInstr *MPhi = MBB.remove(MBB.begin());
 
@@ -179,6 +187,7 @@
   MachineFunction &MF = *MBB.getParent();
   const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
   unsigned IncomingReg = 0;
+  bool reusedIncoming = false;  // Is IncomingReg reused from an earlier PHI?
 
   // Insert a register to register copy at the top of the current block (but
   // after any remaining phi nodes) which copies the new incoming register
@@ -190,7 +199,18 @@
     BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
             TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg);
   else {
-    IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+    // Can we reuse an earlier PHI node? This only happens for critical edges,
+    // typically those created by tail duplication.
+    unsigned &entry = LoweredPHIs[MPhi];
+    if (entry) {
+      // An identical PHI node was already lowered. Reuse the incoming register.
+      IncomingReg = entry;
+      reusedIncoming = true;
+      ++NumReused;
+      DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+    } else {
+      entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+    }
     TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
   }
 
@@ -204,8 +224,20 @@
     MachineInstr *PHICopy = prior(AfterPHIsIt);
 
     if (IncomingReg) {
+      LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
       // Increment use count of the newly created virtual register.
-      LV->getVarInfo(IncomingReg).NumUses++;
+      VI.NumUses++;
+
+      // When we are reusing the incoming register, it may already have been
+      // killed in this block. The old kill will also have been inserted at
+      // AfterPHIsIt, so it appears before the current PHICopy.
+      if (reusedIncoming)
+        if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+          DEBUG(errs() << "Remove old kill from " << *OldKill);
+          LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+          DEBUG(MBB.dump());
+        }
 
       // Add information to LiveVariables to know that the incoming value is
       // killed.  Note that because the value is defined in several places (once
@@ -228,7 +260,7 @@
 
   // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
   for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
-    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(),
+    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
                                  MPhi->getOperand(i).getReg())];
 
   // Now loop over all of the incoming arguments, changing them to copy into the
@@ -266,7 +298,8 @@
       FindCopyInsertPoint(opBlock, MBB, SrcReg);
 
     // Insert the copy.
-    TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
+    if (!reusedIncoming && IncomingReg)
+      TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
 
     // Now update live variable information if we have it.  Otherwise we're done
     if (!LV) continue;
@@ -283,7 +316,7 @@
     // point later.
 
     // Is it used by any PHI instructions in this block?
-    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
+    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
 
     // Okay, if we now know that the value is not live out of the block, we can
     // add a kill marker in this block saying that it kills the incoming value!
@@ -293,11 +326,10 @@
       // terminator instruction at the end of the block may also use the value.
       // In this case, we should mark *it* as being the killing block, not the
       // copy.
-      MachineBasicBlock::iterator KillInst = prior(InsertPos);
+      MachineBasicBlock::iterator KillInst;
       MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
-      if (Term != opBlock.end()) {
-        if (Term->readsRegister(SrcReg))
-          KillInst = Term;
+      if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
+        KillInst = Term;
 
         // Check that no other terminators use values.
 #ifndef NDEBUG
@@ -308,7 +340,17 @@
                  "they are the first terminator in a block!");
         }
 #endif
+      } else if (reusedIncoming || !IncomingReg) {
+        // We may have to rewind a bit if we didn't insert a copy this time.
+        KillInst = Term;
+        while (KillInst != opBlock.begin())
+          if ((--KillInst)->readsRegister(SrcReg))
+            break;
+      } else {
+        // We just inserted this copy.
+        KillInst = prior(InsertPos);
       }
+      assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
 
       // Finally, mark it killed.
       LV->addVirtualRegisterKilled(SrcReg, KillInst);
@@ -319,9 +361,9 @@
     }
   }
 
-  // Really delete the PHI instruction now!
-  MF.DeleteMachineInstr(MPhi);
-  ++NumAtomic;
+  // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+  if (reusedIncoming || !IncomingReg)
+    MF.DeleteMachineInstr(MPhi);
 }
 
 /// analyzePHINodes - Gather information about the PHI nodes in here. In
@@ -335,7 +377,7 @@
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
          BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
       for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
-        ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(),
+        ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
                                      BBI->getOperand(i).getReg())];
 }
 
@@ -409,3 +451,34 @@
 
   return NMBB;
 }
+
+unsigned
+PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) {
+  if (!MI || MI==getEmptyKey() || MI==getTombstoneKey())
+    return DenseMapInfo<MachineInstr*>::getHashValue(MI);
+  unsigned hash = 0;
+  for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2)
+    hash = hash*37 + DenseMapInfo<BBVRegPair>::
+      getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(),
+                              MI->getOperand(ni).getReg()));
+  return hash;
+}
+
+bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS,
+                                            const MachineInstr *RHS) {
+  const MachineInstr *EmptyKey = getEmptyKey();
+  const MachineInstr *TombstoneKey = getTombstoneKey();
+  if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey ||
+      LHS==TombstoneKey || RHS==TombstoneKey)
+    return LHS==RHS;
+
+  unsigned ne = LHS->getNumOperands();
+  if (ne != RHS->getNumOperands())
+      return false;
+  // Ignore operand 0, the defined register.
+  for (unsigned ni = 1; ni != ne; ni += 2)
+    if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() ||
+        LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB())
+      return false;
+  return true;
+}

Modified: llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.h?rev=91646&r1=91645&r2=91646&view=diff

==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.h (original)
+++ llvm/branches/Apple/Zoidberg/lib/CodeGen/PHIElimination.h Thu Dec 17 18:31:34 2009
@@ -16,8 +16,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
-#include <map>
-
 namespace llvm {
 
   /// Lower PHI instructions to copies.  
@@ -120,8 +118,8 @@
       return I;
     }
 
-    typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
-    typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+    typedef std::pair<unsigned, unsigned> BBVRegPair;
+    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
 
     VRegPHIUse VRegPHIUseCount;
     PHIDefMap PHIDefs;
@@ -129,6 +127,17 @@
 
     // Defs of PHI sources which are implicit_def.
     SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+    // Lowered PHI nodes may be reused. We provide special DenseMap traits to
+    // match PHI nodes with identical arguments.
+    struct PHINodeTraits : public DenseMapInfo<MachineInstr*> {
+      static unsigned getHashValue(const MachineInstr *PtrVal);
+      static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS);
+    };
+
+    // Map reusable lowered PHI node -> incoming join register.
+    typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap;
+    LoweredPHIMap LoweredPHIs;
   };
 
 }

Added: llvm/branches/Apple/Zoidberg/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll?rev=91646&view=auto

==============================================================================
--- llvm/branches/Apple/Zoidberg/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll (added)
+++ llvm/branches/Apple/Zoidberg/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll Thu Dec 17 18:31:34 2009
@@ -0,0 +1,66 @@
+; RUN: llc -O3 -pre-regalloc-taildup < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This test should not produce any spills, even when tail duplication creates lots of phi nodes.
+; CHECK-NOT: push
+; CHECK-NOT: pop
+; CHECK: bx lr
+
+ at codetable.2928 = internal constant [5 x i8*] [i8* blockaddress(@interpret_threaded, %RETURN), i8* blockaddress(@interpret_threaded, %INCREMENT), i8* blockaddress(@interpret_threaded, %DECREMENT), i8* blockaddress(@interpret_threaded, %DOUBLE), i8* blockaddress(@interpret_threaded, %SWAPWORD)] ; <[5 x i8*]*> [#uses=5]
+ at llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i8*)* @interpret_threaded to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc i32 @interpret_threaded(i8* nocapture %opcodes) nounwind readonly optsize {
+entry:
+  %0 = load i8* %opcodes, align 1                 ; <i8> [#uses=1]
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %2 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; <i8**> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.backedge, %entry
+  %indvar = phi i32 [ %phitmp, %bb.backedge ], [ 1, %entry ] ; <i32> [#uses=2]
+  %gotovar.22.0.in = phi i8** [ %gotovar.22.0.in.be, %bb.backedge ], [ %2, %entry ] ; <i8**> [#uses=1]
+  %result.0 = phi i32 [ %result.0.be, %bb.backedge ], [ 0, %entry ] ; <i32> [#uses=6]
+  %opcodes_addr.0 = getelementptr i8* %opcodes, i32 %indvar ; <i8*> [#uses=4]
+  %gotovar.22.0 = load i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
+  indirectbr i8* %gotovar.22.0, [label %RETURN, label %INCREMENT, label %DECREMENT, label %DOUBLE, label %SWAPWORD]
+
+RETURN:                                           ; preds = %bb
+  ret i32 %result.0
+
+INCREMENT:                                        ; preds = %bb
+  %3 = add nsw i32 %result.0, 1                   ; <i32> [#uses=1]
+  %4 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=1]
+  %6 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+bb.backedge:                                      ; preds = %SWAPWORD, %DOUBLE, %DECREMENT, %INCREMENT
+  %gotovar.22.0.in.be = phi i8** [ %20, %SWAPWORD ], [ %14, %DOUBLE ], [ %10, %DECREMENT ], [ %6, %INCREMENT ] ; <i8**> [#uses=1]
+  %result.0.be = phi i32 [ %17, %SWAPWORD ], [ %11, %DOUBLE ], [ %7, %DECREMENT ], [ %3, %INCREMENT ] ; <i32> [#uses=1]
+  %phitmp = add i32 %indvar, 1                    ; <i32> [#uses=1]
+  br label %bb
+
+DECREMENT:                                        ; preds = %bb
+  %7 = add i32 %result.0, -1                      ; <i32> [#uses=1]
+  %8 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %9 = zext i8 %8 to i32                          ; <i32> [#uses=1]
+  %10 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+DOUBLE:                                           ; preds = %bb
+  %11 = shl i32 %result.0, 1                      ; <i32> [#uses=1]
+  %12 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %13 = zext i8 %12 to i32                        ; <i32> [#uses=1]
+  %14 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+SWAPWORD:                                         ; preds = %bb
+  %15 = shl i32 %result.0, 16                     ; <i32> [#uses=1]
+  %16 = ashr i32 %result.0, 16                    ; <i32> [#uses=1]
+  %17 = or i32 %15, %16                           ; <i32> [#uses=1]
+  %18 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %19 = zext i8 %18 to i32                        ; <i32> [#uses=1]
+  %20 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; <i8**> [#uses=1]
+  br label %bb.backedge
+}





More information about the llvm-branch-commits mailing list