[llvm] r197465 - Allow MachineCSE to coalesce trivial subregister copies the same way that it coalesces normal copies.

Andrew Trick atrick at apple.com
Mon Dec 16 20:50:46 PST 2013


Author: atrick
Date: Mon Dec 16 22:50:45 2013
New Revision: 197465

URL: http://llvm.org/viewvc/llvm-project?rev=197465&view=rev
Log:
Allow MachineCSE to coalesce trivial subregister copies the same way that it coalesces normal copies.

Without this, MachineCSE is powerless to handle redundant operations with truncated source operands.

This required fixing the 2-addr pass to handle tied subregisters. It isn't clear what combinations of subregisters can legally be tied, but the simple case of truncated source operands is now safely handled:

     %vreg11<def> = COPY %vreg1:sub_32bit; GR32:%vreg11 GR64:%vreg1
     %vreg12<def> = COPY %vreg2:sub_32bit; GR32:%vreg12 GR64:%vreg2
     %vreg13<def,tied1> = ADD32rr %vreg11<tied0>, %vreg12<kill>, %EFLAGS<imp-def>

Test case: cse-add-with-overflow.ll.

This exposed an existing bug in
PPCInstrInfo::commuteInstruction. Thanks to Rafael for the test case:
PowerPC/crash.ll.

Added:
    llvm/trunk/test/CodeGen/X86/cse-add-with-overflow.ll
Modified:
    llvm/trunk/lib/CodeGen/MachineCSE.cpp
    llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/cmov.ll

Modified: llvm/trunk/lib/CodeGen/MachineCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCSE.cpp?rev=197465&r1=197464&r2=197465&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineCSE.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineCSE.cpp Mon Dec 16 22:50:45 2013
@@ -131,13 +131,18 @@ bool MachineCSE::PerformTrivialCoalescin
     unsigned SrcReg = DefMI->getOperand(1).getReg();
     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
       continue;
-    if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+    if (DefMI->getOperand(0).getSubReg())
       continue;
-    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+    unsigned SrcSubReg = DefMI->getOperand(1).getSubReg();
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    if (SrcSubReg)
+      RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
+                                         SrcSubReg);
+    if (!MRI->constrainRegClass(SrcReg, RC))
       continue;
     DEBUG(dbgs() << "Coalescing: " << *DefMI);
     DEBUG(dbgs() << "***     to: " << *MI);
-    MO.setReg(SrcReg);
+    MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
     MRI->clearKillFlags(SrcReg);
     DefMI->eraseFromParent();
     ++NumCoalesces;

Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=197465&r1=197464&r2=197465&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon Dec 16 22:50:45 2013
@@ -1317,13 +1317,14 @@ collectTiedOperands(MachineInstr *MI, Ti
     assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
 
     // Deal with <undef> uses immediately - simply rewrite the src operand.
-    if (SrcMO.isUndef()) {
+    if (SrcMO.isUndef() && !DstMO.getSubReg()) {
       // Constrain the DstReg register class if required.
       if (TargetRegisterInfo::isVirtualRegister(DstReg))
         if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
                                                              TRI, *MF))
           MRI->constrainRegClass(DstReg, RC);
       SrcMO.setReg(DstReg);
+      SrcMO.setSubReg(0);
       DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
       continue;
     }
@@ -1349,6 +1350,7 @@ TwoAddressInstructionPass::processTiedPa
   unsigned LastCopiedReg = 0;
   SlotIndex LastCopyIdx;
   unsigned RegB = 0;
+  unsigned SubRegB = 0;
   for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
     unsigned SrcIdx = TiedPairs[tpi].first;
     unsigned DstIdx = TiedPairs[tpi].second;
@@ -1359,6 +1361,7 @@ TwoAddressInstructionPass::processTiedPa
     // Grab RegB from the instruction because it may have changed if the
     // instruction was commuted.
     RegB = MI->getOperand(SrcIdx).getReg();
+    SubRegB = MI->getOperand(SrcIdx).getSubReg();
 
     if (RegA == RegB) {
       // The register is tied to multiple destinations (or else we would
@@ -1383,8 +1386,25 @@ TwoAddressInstructionPass::processTiedPa
 #endif
 
     // Emit a copy.
-    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
-            TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+    MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                      TII->get(TargetOpcode::COPY), RegA);
+    // If this operand is folding a truncation, the truncation now moves to the
+    // copy so that the register classes remain valid for the operands.
+    MIB.addReg(RegB, 0, SubRegB);
+    const TargetRegisterClass *RC = MRI->getRegClass(RegB);
+    if (SubRegB) {
+      if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+        assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
+                                             SubRegB) &&
+               "tied subregister must be a truncation");
+        // The superreg class will not be used to constrain the subreg class.
+        RC = 0;
+      }
+      else {
+        assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
+               && "tied subregister must be a truncation");
+      }
+    }
 
     // Update DistanceMap.
     MachineBasicBlock::iterator PrevMI = MI;
@@ -1404,7 +1424,7 @@ TwoAddressInstructionPass::processTiedPa
       }
     }
 
-    DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+    DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
 
     MachineOperand &MO = MI->getOperand(SrcIdx);
     assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
@@ -1417,9 +1437,12 @@ TwoAddressInstructionPass::processTiedPa
     // Make sure regA is a legal regclass for the SrcIdx operand.
     if (TargetRegisterInfo::isVirtualRegister(RegA) &&
         TargetRegisterInfo::isVirtualRegister(RegB))
-      MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
-
+      MRI->constrainRegClass(RegA, RC);
     MO.setReg(RegA);
+    // The getMatchingSuper asserts guarantee that the register class projected
+    // by SubRegB is compatible with RegA with no subregister. So regardless of
+    // whether the dest oper writes a subreg, the source oper should not.
+    MO.setSubReg(0);
 
     // Propagate SrcRegMap.
     SrcRegMap[RegA] = RegB;
@@ -1431,12 +1454,14 @@ TwoAddressInstructionPass::processTiedPa
       // Replace other (un-tied) uses of regB with LastCopiedReg.
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+        if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+            MO.isUse()) {
           if (MO.isKill()) {
             MO.setIsKill(false);
             RemovedKillFlag = true;
           }
           MO.setReg(LastCopiedReg);
+          MO.setSubReg(0);
         }
       }
     }

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=197465&r1=197464&r2=197465&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Mon Dec 16 22:50:45 2013
@@ -227,6 +227,8 @@ PPCInstrInfo::commuteInstruction(Machine
   unsigned Reg0 = MI->getOperand(0).getReg();
   unsigned Reg1 = MI->getOperand(1).getReg();
   unsigned Reg2 = MI->getOperand(2).getReg();
+  unsigned SubReg1 = MI->getOperand(1).getSubReg();
+  unsigned SubReg2 = MI->getOperand(2).getSubReg();
   bool Reg1IsKill = MI->getOperand(1).isKill();
   bool Reg2IsKill = MI->getOperand(2).isKill();
   bool ChangeReg0 = false;
@@ -236,6 +238,7 @@ PPCInstrInfo::commuteInstruction(Machine
     // Must be two address instruction!
     assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
            "Expecting a two-address instruction!");
+    assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
     Reg2IsKill = false;
     ChangeReg0 = true;
   }
@@ -256,10 +259,14 @@ PPCInstrInfo::commuteInstruction(Machine
       .addImm((MB-1) & 31);
   }
 
-  if (ChangeReg0)
+  if (ChangeReg0) {
     MI->getOperand(0).setReg(Reg2);
+    MI->getOperand(0).setSubReg(SubReg2);
+  }
   MI->getOperand(2).setReg(Reg1);
   MI->getOperand(1).setReg(Reg2);
+  MI->getOperand(2).setSubReg(SubReg1);
+  MI->getOperand(1).setSubReg(SubReg2);
   MI->getOperand(2).setIsKill(Reg1IsKill);
   MI->getOperand(1).setIsKill(Reg2IsKill);
 
@@ -1591,4 +1598,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TY
 char PPCEarlyReturn::ID = 0;
 FunctionPass*
 llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
-

Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=197465&r1=197464&r2=197465&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Mon Dec 16 22:50:45 2013
@@ -275,8 +275,10 @@ MachineInstr *SIInstrInfo::commuteInstru
       return 0;
 
     unsigned Reg = MI->getOperand(1).getReg();
+    unsigned SubReg = MI->getOperand(1).getSubReg();
     MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
     MI->getOperand(2).ChangeToRegister(Reg, false);
+    MI->getOperand(2).setSubReg(SubReg);
   } else {
     MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
   }

Modified: llvm/trunk/test/CodeGen/X86/cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov.ll?rev=197465&r1=197464&r2=197465&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov.ll Mon Dec 16 22:50:45 2013
@@ -41,8 +41,8 @@ declare void @bar(i64) nounwind
 
 define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
 ; CHECK-LABEL: test3:
-; CHECK:      cmovnel %edi, %esi
-; CHECK-NEXT: movl    %esi, %edi
+; CHECK:      cmov{{n?}}el %[[R1:e..]], %[[R2:e..]]
+; CHECK-NEXT: movl    %[[R2]], %[[R2]]
 
   %c = trunc i64 %a to i32
   %d = trunc i64 %b to i32

Added: llvm/trunk/test/CodeGen/X86/cse-add-with-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cse-add-with-overflow.ll?rev=197465&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cse-add-with-overflow.ll (added)
+++ llvm/trunk/test/CodeGen/X86/cse-add-with-overflow.ll Mon Dec 16 22:50:45 2013
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s
+; rdar:15661073 simple example of redundant adds
+;
+; MachineCSE should coalesce trivial subregister copies.
+;
+; The extra movl+addl should be removed during MachineCSE.
+; CHECK-LABEL: redundantadd
+; CHECK: cmpq
+; CHECK: movq
+; CHECK-NOT: movl
+; CHECK: addl
+; CHECK-NOT: addl
+; CHECK: ret
+
+define i64 @redundantadd(i64* %a0, i64* %a1) {
+entry:
+  %tmp8 = load i64* %a0, align 8
+  %tmp12 = load i64* %a1, align 8
+  %tmp13 = icmp ult i64 %tmp12, -281474976710656
+  br i1 %tmp13, label %exit1, label %body
+
+exit1:
+  unreachable
+
+body:
+  %tmp14 = trunc i64 %tmp8 to i32
+  %tmp15 = trunc i64 %tmp12 to i32
+  %tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15)
+  %tmp17 = extractvalue { i32, i1 } %tmp16, 1
+  br i1 %tmp17, label %exit2, label %return
+
+exit2:
+  unreachable
+
+return:
+  %tmp18 = add i64 %tmp12, %tmp8
+  %tmp19 = and i64 %tmp18, 4294967295
+  %tmp20 = or i64 %tmp19, -281474976710656
+  ret i64 %tmp20
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)





More information about the llvm-commits mailing list