[llvm-commits] [llvm] r143145 - in /llvm/trunk: lib/Target/ARM/Thumb2SizeReduction.cpp test/CodeGen/ARM/avoid-cpsr-rmw.ll

Evan Cheng evan.cheng at apple.com
Thu Oct 27 14:21:05 PDT 2011


Author: evancheng
Date: Thu Oct 27 16:21:05 2011
New Revision: 143145

URL: http://llvm.org/viewvc/llvm-project?rev=143145&view=rev
Log:
Avoid partial CPSR dependency from loop backedges. rdar://10357570

Modified:
    llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
    llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll

Modified: llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp?rev=143145&r1=143144&r2=143145&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp Thu Oct 27 16:21:05 2011
@@ -146,7 +146,8 @@
     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 
-    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+                             bool IsSelfLoop);
 
     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                          bool is2Addr, ARMCC::CondCodes Pred,
@@ -157,19 +158,21 @@
 
     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry, bool LiveCPSR,
-                       MachineInstr *CPSRDef);
+                       MachineInstr *CPSRDef, bool IsSelfLoop);
 
     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     /// instruction.
     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry,
-                       bool LiveCPSR, MachineInstr *CPSRDef);
+                       bool LiveCPSR, MachineInstr *CPSRDef,
+                       bool IsSelfLoop);
 
     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     /// non-two-address instruction.
     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                         const ReduceEntry &Entry,
-                        bool LiveCPSR, MachineInstr *CPSRDef);
+                        bool LiveCPSR, MachineInstr *CPSRDef,
+                        bool IsSelfLoop);
 
     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     bool ReduceMBB(MachineBasicBlock &MBB);
@@ -210,10 +213,17 @@
 /// In this case it would have been ok to narrow the mul.w to muls since there
 /// are indirect RAW dependency between the muls and the mul.w
 bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
-  if (!Def || !STI->avoidCPSRPartialUpdate())
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+                                      bool FirstInSelfLoop) {
+  // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
+  if (!STI->avoidCPSRPartialUpdate())
     return false;
 
+  if (!Def)
+    // If this BB loops back to itself, conservatively avoid narrowing the
+    // first instruction that does partial flag update.
+    return FirstInSelfLoop;
+
   SmallSet<unsigned, 2> Defs;
   for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = Def->getOperand(i);
@@ -476,15 +486,16 @@
 bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef) {
+                                bool LiveCPSR, MachineInstr *CPSRDef,
+                                bool IsSelfLoop) {
   unsigned Opc = MI->getOpcode();
   if (Opc == ARM::t2ADDri) {
     // If the source register is SP, try to reduce to tADDrSPi, otherwise
     // it's a normal reduce.
     if (MI->getOperand(1).getReg() != ARM::SP) {
-      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
         return true;
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
     }
     // Try to reduce to tADDrSPi.
     unsigned Imm = MI->getOperand(2).getImm();
@@ -535,12 +546,12 @@
       switch (Opc) {
       default: break;
       case ARM::t2ADDSri: {
-        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
           return true;
         // fallthrough
       }
       case ARM::t2ADDSrr:
-        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
       }
     }
     break;
@@ -552,13 +563,13 @@
   case ARM::t2UXTB:
   case ARM::t2UXTH:
     if (MI->getOperand(2).getImm() == 0)
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
     break;
   case ARM::t2MOVi16:
     // Can convert only 'pure' immediate operands, not immediates obtained as
     // globals' addresses.
     if (MI->getOperand(1).isImm())
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
     break;
   case ARM::t2CMPrr: {
     // Try to reduce to the lo-reg only version first. Why there are two
@@ -568,9 +579,9 @@
     // source insn opcode. So for now, we hack a local entry record to use.
     static const ReduceEntry NarrowEntry =
       { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
-    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
       return true;
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
   }
   }
   return false;
@@ -579,7 +590,8 @@
 bool
 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef) {
+                                bool LiveCPSR, MachineInstr *CPSRDef,
+                                bool IsSelfLoop) {
 
   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;
@@ -637,7 +649,7 @@
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI))
+      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
@@ -674,7 +686,8 @@
 bool
 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                                  const ReduceEntry &Entry,
-                                 bool LiveCPSR, MachineInstr *CPSRDef) {
+                                 bool LiveCPSR, MachineInstr *CPSRDef,
+                                 bool IsSelfLoop) {
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;
 
@@ -727,7 +740,7 @@
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI))
+      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
@@ -818,6 +831,9 @@
   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
   MachineInstr *CPSRDef = 0;
 
+  // If this BB loops back to itself, conservatively avoid narrowing the
+  // first instruction that does partial flag update.
+  bool IsSelfLoop = MBB.isSuccessor(&MBB);
   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
   for (; MII != E; MII = NextMII) {
@@ -832,7 +848,7 @@
       const ReduceEntry &Entry = ReduceTable[OPI->second];
       // Ignore "special" cases for now.
       if (Entry.Special) {
-        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
           Modified = true;
           MachineBasicBlock::iterator I = prior(NextMII);
           MI = &*I;
@@ -842,7 +858,7 @@
 
       // Try to transform to a 16-bit two-address instruction.
       if (Entry.NarrowOpc2 &&
-          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
         Modified = true;
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
@@ -851,7 +867,7 @@
 
       // Try to transform to a 16-bit non-two-address instruction.
       if (Entry.NarrowOpc1 &&
-          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
         Modified = true;
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
@@ -861,12 +877,15 @@
   ProcessNext:
     bool DefCPSR = false;
     LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
-    if (MI->getDesc().isCall())
+    if (MI->getDesc().isCall()) {
       // Calls don't really set CPSR.
       CPSRDef = 0;
-    else if (DefCPSR)
+      IsSelfLoop = false;
+    } else if (DefCPSR) {
       // This is the last CPSR defining instruction.
       CPSRDef = MI;
+      IsSelfLoop = false;
+    }
   }
 
   return Modified;

Modified: llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll?rev=143145&r1=143144&r2=143145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/avoid-cpsr-rmw.ll Thu Oct 27 16:21:05 2011
@@ -3,9 +3,9 @@
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208
 
-define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
-; CHECK: t:
+; CHECK: t1:
 ; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
 ; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
 ; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
@@ -14,3 +14,37 @@
   %2 = mul nsw i32 %0, %1
   ret i32 %2
 }
+
+; Avoid partial CPSR dependency via loop backedge.
+; rdar://10357570
+define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
+entry:
+; CHECK: t2:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK-NOT: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}





More information about the llvm-commits mailing list