[llvm-commits] [llvm] r144559 - in /llvm/trunk: lib/CodeGen/TwoAddressInstructionPass.cpp test/CodeGen/X86/iv-users-in-other-loops.ll test/CodeGen/X86/lsr-reuse-trunc.ll test/CodeGen/X86/sse3.ll

Evan Cheng evan.cheng at apple.com
Mon Nov 14 11:48:55 PST 2011


Author: evancheng
Date: Mon Nov 14 13:48:55 2011
New Revision: 144559

URL: http://llvm.org/viewvc/llvm-project?rev=144559&view=rev
Log:
Teach two-address pass to re-schedule two-address instructions (or the kill
instructions of the two-address operands) in order to avoid inserting copies.
This fixes the few regressions introduced when the two-address hack was
disabled (without regressing the improvements).
rdar://10422688

Modified:
    llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
    llvm/trunk/test/CodeGen/X86/iv-users-in-other-loops.ll
    llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll
    llvm/trunk/test/CodeGen/X86/sse3.ll

Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=144559&r1=144558&r2=144559&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon Nov 14 13:48:55 2011
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -56,11 +57,14 @@
 STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReMats,           "Number of instructions re-materialized");
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
 namespace {
   class TwoAddressInstructionPass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
     MachineRegisterInfo *MRI;
     LiveVariables *LV;
     AliasAnalysis *AA;
@@ -120,6 +124,18 @@
                            MachineBasicBlock::iterator &nmi,
                            MachineFunction::iterator &mbbi, unsigned Dist);
 
+    bool isDefTooClose(unsigned Reg, unsigned Dist,
+                       MachineInstr *MI, MachineBasicBlock *MBB);
+
+    bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
+                               MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned Reg);
+    bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
+                               MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned Reg);
+
     bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
                                  MachineBasicBlock::iterator &nmi,
                                  MachineFunction::iterator &mbbi,
@@ -467,6 +483,32 @@
   return false;
 }
 
+/// findLocalKill - Look for an instruction below MI in the MBB that kills the
+/// specified register. Returns null if there are any other Reg use between the
+/// instructions.
+static
+MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
+                            MachineInstr *MI, MachineRegisterInfo *MRI,
+                            DenseMap<MachineInstr*, unsigned> &DistanceMap) {
+  MachineInstr *KillMI = 0;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI->use_nodbg_begin(Reg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI == MI || UseMI->getParent() != MBB)
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+    if (DI != DistanceMap.end())
+      continue;
+    if (!UI.getOperand().isKill())
+      return 0;
+    assert(!KillMI && "More than one local kills?");
+    KillMI = UseMI;
+  }
+
+  return KillMI;
+}
+
 /// findOnlyInterestingUse - Given a register, if has a single in-basic block
 /// use, return the use instruction if it's a copy or a two-address use.
 static
@@ -852,6 +894,285 @@
   return true;
 }
 
+/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool
+TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator &mi,
+                                     MachineBasicBlock::iterator &nmi,
+                                     unsigned Reg) {
+  MachineInstr *MI = &*mi;
+  DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); 
+  if (DI == DistanceMap.end())
+    // Must be created from unfolded load. Don't waste time trying this.
+    return false;
+
+  MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+  if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+    // Don't mess with copies, they may be coalesced later.
+    return false;
+
+  const MCInstrDesc &MCID = KillMI->getDesc();
+  if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() ||
+      MCID.isTerminator())
+    // Don't move pass calls, etc.
+    return false;
+
+  unsigned DstReg;
+  if (isTwoAddrUse(*KillMI, Reg, DstReg))
+    return false;
+
+  bool SeenStore;
+  if (!MI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  if (TII->getInstrLatency(InstrItins, MI) > 1)
+    // FIXME: Needs more sophisticated heuristics.
+    return false;
+
+  SmallSet<unsigned, 2> Uses;
+  SmallSet<unsigned, 2> Defs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MO.isDef())
+      Defs.insert(MOReg);
+    else
+      Uses.insert(MOReg);
+  }
+
+  // Move the copies connected to MI down as well.
+  MachineBasicBlock::iterator From = MI;
+  MachineBasicBlock::iterator To = llvm::next(From);
+  while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
+    Defs.insert(To->getOperand(0).getReg());
+    ++To;
+  }
+
+  // Check if the reschedule will not break depedencies.
+  unsigned NumVisited = 0;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  ++KillPos;
+  for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    const MCInstrDesc &OMCID = OtherMI->getDesc();
+    if (OMCID.hasUnmodeledSideEffects() || OMCID.isCall() || OMCID.isBranch() ||
+        OMCID.isTerminator())
+      // Don't move pass calls, etc.
+      return false;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (MO.isDef()) {
+        if (Uses.count(MOReg))
+          // Physical register use would be clobbered.
+          return false;
+        if (!MO.isDead() && Defs.count(MOReg))
+          // May clobber a physical register def.
+          // FIXME: This may be too conservative. It's ok if the instruction
+          // is sunken completely below the use.
+          return false;
+      } else {
+        if (Defs.count(MOReg))
+          return false;
+        if (MOReg != Reg && MO.isKill() && Uses.count(MOReg))
+          // Don't want to extend other live ranges and update kills.
+          return false;
+      }
+    }
+  }
+
+  // Move debug info as well.
+  if (From != MBB->begin()) {
+    while (llvm::prior(From)->isDebugValue())
+      --From;
+  }
+
+  // Copies following MI may have been moved as well.
+  nmi = To;
+  MBB->splice(KillPos, MBB, From, To);
+  DistanceMap.erase(DI);
+
+  if (LV) {
+    // Update live variables
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  } else {
+    for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = KillMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+        continue;
+      MO.setIsKill(false);
+    }
+    MI->addRegisterKilled(Reg, 0);
+  }
+
+  return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+                                              MachineInstr *MI,
+                                              MachineBasicBlock *MBB) {
+  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+         DE = MRI->def_end(); DI != DE; ++DI) {
+    MachineInstr *DefMI = &*DI;
+    if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+      continue;
+    if (DefMI == MI)
+      return true; // MI is defining something KillMI uses
+    DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+    if (DDI == DistanceMap.end())
+      return true;  // Below MI
+    unsigned DefDist = DDI->second;
+    assert(Dist > DefDist && "Visited def already?");
+    if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+      return true;
+  }
+  return false;
+}
+
+/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool
+TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator &mi,
+                                     MachineBasicBlock::iterator &nmi,
+                                     unsigned Reg) {
+  MachineInstr *MI = &*mi;
+  DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+  if (DI == DistanceMap.end())
+    // Must be created from unfolded load. Don't waste time trying this.
+    return false;
+
+  MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+  if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+    // Don't mess with copies, they may be coalesced later.
+    return false;
+
+  unsigned DstReg;
+  if (isTwoAddrUse(*KillMI, Reg, DstReg))
+    return false;
+
+  bool SeenStore;
+  if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  SmallSet<unsigned, 2> Uses;
+  SmallSet<unsigned, 2> Kills;
+  SmallSet<unsigned, 2> Defs;
+  SmallSet<unsigned, 2> LiveDefs;
+  for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = KillMI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (MO.isUse()) {
+      if (!MOReg)
+        continue;
+      if (isDefTooClose(MOReg, DI->second, MI, MBB))
+        return false;
+      Uses.insert(MOReg);
+      if (MO.isKill() && MOReg != Reg)
+        Kills.insert(MOReg);
+    } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+      Defs.insert(MOReg);
+      if (!MO.isDead())
+        LiveDefs.insert(MOReg);
+    }
+  }
+
+  // Check if the reschedule will not break depedencies.
+  unsigned NumVisited = 0;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    const MCInstrDesc &MCID = OtherMI->getDesc();
+    if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() ||
+        MCID.isTerminator())
+      // Don't move pass calls, etc.
+      return false;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (MO.isUse()) {
+        if (Defs.count(MOReg))
+          // Moving KillMI can clobber the physical register if the def has
+          // not been seen.
+          return false;
+        if (Kills.count(MOReg))
+          // Don't want to extend other live ranges and update kills.
+          return false;
+      } else {
+        if (Uses.count(MOReg))
+          return false;
+        if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+            LiveDefs.count(MOReg))
+          return false;
+        // Physical register def is seen.
+        Defs.erase(MOReg);
+      }
+    }
+  }
+
+  // Move the old kill above MI, don't forget to move debug info as well.
+  MachineBasicBlock::iterator InsertPos = mi;
+  if (InsertPos != MBB->begin())
+    while (llvm::prior(InsertPos)->isDebugValue())
+      --InsertPos;
+  MachineBasicBlock::iterator From = KillMI;
+  MachineBasicBlock::iterator To = llvm::next(From);
+  while (llvm::prior(From)->isDebugValue())
+    --From;
+  MBB->splice(InsertPos, MBB, From, To);
+
+  nmi = llvm::prior(mi); // Backtrack so we process the moved instruction.
+  DistanceMap.erase(DI);
+
+  if (LV) {
+    // Update live variables
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  } else {
+    for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = KillMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+        continue;
+      MO.setIsKill(false);
+    }
+    MI->addRegisterKilled(Reg, 0);
+  }
+  return true;
+}
+
 /// TryInstructionTransform - For the case where an instruction has a single
 /// pair of tied register operands, attempt some transformations that may
 /// either eliminate the tied operands or improve the opportunities for
@@ -863,17 +1184,18 @@
                         MachineFunction::iterator &mbbi,
                         unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
                         SmallPtrSet<MachineInstr*, 8> &Processed) {
-  const MCInstrDesc &MCID = mi->getDesc();
-  unsigned regA = mi->getOperand(DstIdx).getReg();
-  unsigned regB = mi->getOperand(SrcIdx).getReg();
+  MachineInstr &MI = *mi;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned regA = MI.getOperand(DstIdx).getReg();
+  unsigned regB = MI.getOperand(SrcIdx).getReg();
 
   assert(TargetRegisterInfo::isVirtualRegister(regB) &&
          "cannot make instruction into two-address form");
 
   // If regA is dead and the instruction can be deleted, just delete
   // it so it doesn't clobber regB.
-  bool regBKilled = isKilled(*mi, regB, MRI, TII);
-  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+  bool regBKilled = isKilled(MI, regB, MRI, TII);
+  if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
       DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
     ++NumDeletes;
     return true; // Done with this instruction.
@@ -885,20 +1207,20 @@
   unsigned regCIdx = ~0U;
   bool TryCommute = false;
   bool AggressiveCommute = false;
-  if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
-      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+  if (MCID.isCommutable() && MI.getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
     if (SrcIdx == SrcOp1)
       regCIdx = SrcOp2;
     else if (SrcIdx == SrcOp2)
       regCIdx = SrcOp1;
 
     if (regCIdx != ~0U) {
-      regC = mi->getOperand(regCIdx).getReg();
-      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+      regC = MI.getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(MI, regC, MRI, TII))
         // If C dies but B does not, swap the B and C operands.
         // This makes the live ranges of A and C joinable.
         TryCommute = true;
-      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+      else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
         TryCommute = true;
         AggressiveCommute = true;
       }
@@ -913,6 +1235,13 @@
     return false;
   }
 
+  // If there is one more use of regB later in the same MBB, consider
+  // re-schedule this MI below it.
+  if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+    ++NumReSchedDowns;
+    return true;
+  }
+
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
 
@@ -928,6 +1257,13 @@
     }
   }
 
+  // If there is one more use of regB later in the same MBB, consider
+  // re-schedule it before this MI if it's legal.
+  if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+    ++NumReSchedUps;
+    return true;
+  }
+
   // If this is an instruction with a load folded into it, try unfolding
   // the load, e.g. avoid this:
   //   movq %rdx, %rcx
@@ -940,7 +1276,7 @@
     // Determine if a load can be unfolded.
     unsigned LoadRegIndex;
     unsigned NewOpc =
-      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+      TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
                                       /*UnfoldLoad=*/true,
                                       /*UnfoldStore=*/false,
                                       &LoadRegIndex);
@@ -950,12 +1286,12 @@
         MachineFunction &MF = *mbbi->getParent();
 
         // Unfold the load.
-        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI);
         const TargetRegisterClass *RC =
           TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
         unsigned Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
-        if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+        if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
                                       /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
                                       NewMIs)) {
           DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -986,21 +1322,21 @@
           // Success, or at least we made an improvement. Keep the unfolded
           // instructions and discard the original.
           if (LV) {
-            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-              MachineOperand &MO = mi->getOperand(i);
+            for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = MI.getOperand(i);
               if (MO.isReg() && 
                   TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
                 if (MO.isUse()) {
                   if (MO.isKill()) {
                     if (NewMIs[0]->killsRegister(MO.getReg()))
-                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+                      LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
                     else {
                       assert(NewMIs[1]->killsRegister(MO.getReg()) &&
                              "Kill missing after load unfold!");
-                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+                      LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
                     }
                   }
-                } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+                } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
                   if (NewMIs[1]->registerDefIsDead(MO.getReg()))
                     LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
                   else {
@@ -1013,7 +1349,7 @@
             }
             LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
           }
-          mi->eraseFromParent();
+          MI.eraseFromParent();
           mi = NewMIs[1];
           if (TransformSuccess)
             return true;
@@ -1040,6 +1376,7 @@
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
+  InstrItins = TM.getInstrItineraryData();
   LV = getAnalysisIfAvailable<LiveVariables>();
   AA = &getAnalysis<AliasAnalysis>();
 

Modified: llvm/trunk/test/CodeGen/X86/iv-users-in-other-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/iv-users-in-other-loops.ll?rev=144559&r1=144558&r2=144559&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/iv-users-in-other-loops.ll (original)
+++ llvm/trunk/test/CodeGen/X86/iv-users-in-other-loops.ll Mon Nov 14 13:48:55 2011
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
 ; RUN: not grep inc %t
 ; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 10
+; RUN: grep addq %t | count 12
 ; RUN: not grep addb %t
 ; RUN: not grep leal %t
 ; RUN: not grep movq %t

Modified: llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll?rev=144559&r1=144558&r2=144559&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-reuse-trunc.ll Mon Nov 14 13:48:55 2011
@@ -4,14 +4,13 @@
 ; Full strength reduction wouldn't reduce register pressure, so LSR should
 ; stick with indexing here.
 
-; FIXME: This is worse off from disabling of scheduler 2-address hack.
 ; CHECK: movaps        (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
-; CHECK: leaq  4(%rax), %{{rcx|r9}}
 ; CHECK: cvtdq2ps
 ; CHECK: orps          {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
 ; CHECK: movaps        [[X4]], (%{{rdi|rcx}},%rax,4)
-; CHECK: cmpl  %{{ecx|r9d}}, (%{{rdx|r8}})
-; CHECK: jg
+; CHECK: addq  $4, %rax
+; CHECK: cmpl  %eax, (%{{rdx|r8}})
+; CHECK-NEXT: jg
 
 define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
 entry:

Modified: llvm/trunk/test/CodeGen/X86/sse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3.ll?rev=144559&r1=144558&r2=144559&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3.ll Mon Nov 14 13:48:55 2011
@@ -164,12 +164,12 @@
         store <4 x i16> %6, <4 x i16>* @g2, align 8
         ret void
 ; X64: 	t10:
-; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
-; X64: 		movlhps [[X1:%xmm[0-9]+]]
-; X64: 		pshuflw	$8, [[X1]], [[X2:%xmm[0-9]+]]
-; X64: 		pinsrw	$2, %eax, [[X2]]
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %ecx
 ; X64: 		pextrw	$6, [[X0]], %eax
-; X64: 		pinsrw	$3, %eax, [[X2]]
+; X64: 		movlhps [[X0]], [[X0]]
+; X64: 		pshuflw	$8, [[X0]], [[X0]]
+; X64: 		pinsrw	$2, %ecx, [[X0]]
+; X64: 		pinsrw	$3, %eax, [[X0]]
 }
 
 
@@ -232,10 +232,9 @@
         %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
         ret <8 x i16> %tmp8
 ; X64: 	t15:
-; X64:          movdqa %xmm0, %xmm2
+; X64: 		pextrw	$7, %xmm0, %eax
 ; X64: 		punpcklqdq	%xmm1, %xmm0
 ; X64: 		pshuflw	$-128, %xmm0, %xmm0
-; X64: 		pextrw	$7, %xmm2, %eax
 ; X64: 		pinsrw	$2, %eax, %xmm0
 ; X64: 		ret
 }
@@ -248,12 +247,11 @@
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		movdqa	%xmm1, %xmm2
-; X64: 		pslldq	$2, %xmm2
-; X64: 		movd	%xmm2, %eax
-; X64: 		pinsrw	$0, %eax, %xmm0
-; X64: 		pextrw	$8, %xmm1, %eax
-; X64: 		pextrw	$1, %xmm2, %ecx
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pslldq	$2, %xmm0
+; X64: 		movd	%xmm0, %ecx
+; X64: 		pextrw	$1, %xmm0, %edx
+; X64: 		pinsrw	$0, %ecx, %xmm0
 ; X64: 		ret
 }
 





More information about the llvm-commits mailing list