[llvm-commits] [llvm] r147827 - in /llvm/trunk: lib/CodeGen/MachineCSE.cpp test/CodeGen/ARM/machine-cse-cmp.ll test/CodeGen/Thumb2/thumb2-cbnz.ll test/CodeGen/X86/machine-cse.ll

Evan Cheng evan.cheng at apple.com
Mon Jan 9 18:02:58 PST 2012


Author: evancheng
Date: Mon Jan  9 20:02:58 2012
New Revision: 147827

URL: http://llvm.org/viewvc/llvm-project?rev=147827&view=rev
Log:
Allow machine-cse to look across MBB boundary when cse'ing instructions that
define physical registers. It's currently very restrictive, only catching
cases where the CE is in an immediate (and only) predecessor. But it catches
a surprising large number of cases.

rdar://10660865

Modified:
    llvm/trunk/lib/CodeGen/MachineCSE.cpp
    llvm/trunk/test/CodeGen/ARM/machine-cse-cmp.ll
    llvm/trunk/test/CodeGen/Thumb2/thumb2-cbnz.ll
    llvm/trunk/test/CodeGen/X86/machine-cse.ll

Modified: llvm/trunk/lib/CodeGen/MachineCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCSE.cpp?rev=147827&r1=147826&r2=147827&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineCSE.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineCSE.cpp Mon Jan  9 20:02:58 2012
@@ -26,13 +26,14 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
-
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
 STATISTIC(NumPhysCSEs,
           "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+          "Number of cross-MBB physreg referencing CS eliminated");
 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
 
 namespace {
@@ -82,9 +83,11 @@
                                 MachineBasicBlock::const_iterator E) const ;
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
-                               SmallSet<unsigned,8> &PhysRefs) const;
+                               SmallSet<unsigned,8> &PhysRefs,
+                               SmallVector<unsigned,2> &PhysDefs) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                          SmallSet<unsigned,8> &PhysRefs) const;
+                          SmallSet<unsigned,8> &PhysRefs,
+                          bool &NonLocal) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
@@ -189,7 +192,8 @@
 /// instruction does not uses a physical register.
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
-                                       SmallSet<unsigned,8> &PhysRefs) const {
+                                       SmallSet<unsigned,8> &PhysRefs,
+                                       SmallVector<unsigned,2> &PhysDefs) const{
   MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
@@ -207,6 +211,8 @@
         (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
       continue;
     PhysRefs.insert(Reg);
+    if (MO.isDef())
+      PhysDefs.push_back(Reg);
     for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
       PhysRefs.insert(*Alias);
   }
@@ -215,20 +221,39 @@
 }
 
 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                                  SmallSet<unsigned,8> &PhysRefs) const {
+                                  SmallSet<unsigned,8> &PhysRefs,
+                                  bool &NonLocal) const {
   // For now conservatively returns false if the common subexpression is
-  // not in the same basic block as the given instruction.
-  MachineBasicBlock *MBB = MI->getParent();
-  if (CSMI->getParent() != MBB)
-    return false;
+  // not in the same basic block as the given instruction. The only exception
+  // is if the common subexpression is in the sole predecessor block.
+  const MachineBasicBlock *MBB = MI->getParent();
+  const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+  bool CrossMBB = false;
+  if (CSMBB != MBB) {
+    if (MBB->pred_size() == 1 && *MBB->pred_begin() == CSMBB)
+      CrossMBB = true;
+    else
+      return false;
+  }
   MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
   MachineBasicBlock::const_iterator E = MI;
+  MachineBasicBlock::const_iterator EE = CSMBB->end();
   unsigned LookAheadLeft = LookAheadLimit;
   while (LookAheadLeft) {
     // Skip over dbg_value's.
-    while (I != E && I->isDebugValue())
+    while (I != E && I != EE && I->isDebugValue())
       ++I;
 
+    if (I == EE) {
+      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+      CrossMBB = false;
+      NonLocal = true;
+      I = MBB->begin();
+      EE = MBB->end();
+      continue;
+    }
+
     if (I == E)
       return true;
 
@@ -393,16 +418,18 @@
     // If the instruction defines physical registers and the values *may* be
     // used, then it's not safe to replace it with a common subexpression.
     // It's also not safe if the instruction uses physical registers.
+    bool CrossMBBPhysDef = false;
     SmallSet<unsigned,8> PhysRefs;
-    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+    SmallVector<unsigned, 2> PhysDefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
       FoundCSE = false;
 
-      // ... Unless the CS is local and it also defines the physical register
-      // which is not clobbered in between and the physical register uses 
-      // were not clobbered.
+      // ... Unless the CS is local or is in the sole predecessor block
+      // and it also defines the physical register which is not clobbered
+      // in between and the physical register uses were not clobbered.
       unsigned CSVN = VNT.lookup(MI);
       MachineInstr *CSMI = Exps[CSVN];
-      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs, CrossMBBPhysDef))
         FoundCSE = true;
     }
 
@@ -457,6 +484,18 @@
         MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
         MRI->clearKillFlags(CSEPairs[i].second);
       }
+
+      if (CrossMBBPhysDef) {
+        // Add physical register defs now coming in from a predecessor to MBB
+        // livein list.
+        while (!PhysDefs.empty()) {
+          unsigned LiveIn = PhysDefs.pop_back_val();
+          if (!MBB->isLiveIn(LiveIn))
+            MBB->addLiveIn(LiveIn);
+        }
+        ++NumCrossBBCSEs;
+      }
+
       MI->eraseFromParent();
       ++NumCSEs;
       if (!PhysRefs.empty())

Modified: llvm/trunk/test/CodeGen/ARM/machine-cse-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/machine-cse-cmp.ll?rev=147827&r1=147826&r2=147827&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/machine-cse-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/machine-cse-cmp.ll Mon Jan  9 20:02:58 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,6 +6,7 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
@@ -16,3 +17,31 @@
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+ at foo = external global i32
+ at bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind

Modified: llvm/trunk/test/CodeGen/Thumb2/thumb2-cbnz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-cbnz.ll?rev=147827&r1=147826&r2=147827&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/thumb2-cbnz.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/thumb2-cbnz.ll Mon Jan  9 20:02:58 2012
@@ -24,7 +24,6 @@
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
-; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
   %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11

Modified: llvm/trunk/test/CodeGen/X86/machine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-cse.ll?rev=147827&r1=147826&r2=147827&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-cse.ll Mon Jan  9 20:02:58 2012
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { i8* }
@@ -77,3 +77,25 @@
 sw.bb307:                                         ; preds = %sw.bb, %entry
   ret void
 }
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+  %cmp = icmp ugt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+  %cmp1 = icmp ult i32 %a, %b
+  %. = sext i1 %cmp1 to i32
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+  ret i32 %retval.0
+}





More information about the llvm-commits mailing list