[llvm-commits] [llvm] r64405 - in /llvm/branches/Apple/Dib: lib/CodeGen/ScheduleDAGPrinter.cpp lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp lib/CodeGen/VirtRegMap.cpp test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll test/CodeGen/X86/2008-02-22-ReMatBug.ll test/CodeGen/X86/2008-08-05-SpillerBug.ll test/CodeGen/X86/pmul.ll test/CodeGen/X86/pr3495-2.ll test/CodeGen/X86/pr3495.ll test/CodeGen/X86/sse_reload_fold.ll
Bill Wendling
isanbard at gmail.com
Thu Feb 12 14:12:13 PST 2009
Author: void
Date: Thu Feb 12 16:12:12 2009
New Revision: 64405
URL: http://llvm.org/viewvc/llvm-project?rev=64405&view=rev
Log:
Pulled revs 64184 64210 64298 64319 64327 64328 64339 64340 64369 64372 64373
into Dib:
--- Merging (from foreign repository) r64184 into '.':
U lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
--- Merging (from foreign repository) r64210 into '.':
A test/CodeGen/X86/pr3457.ll
Skipped 'test/CodeGen/X86/pr3495.ll'
--- Merging (from foreign repository) r64298 into '.':
U lib/CodeGen/VirtRegMap.cpp
--- Merging (from foreign repository) r64319 into '.':
U lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
--- Merging (from foreign repository) r64327 into '.':
U test/CodeGen/X86/2008-08-05-SpillerBug.ll
U test/CodeGen/X86/pmul.ll
U test/CodeGen/X86/sse_reload_fold.ll
U test/CodeGen/X86/2008-02-22-ReMatBug.ll
G lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
--- Merging (from foreign repository) r64328 into '.':
G lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
--- Merging (from foreign repository) r64339 into '.':
A test/CodeGen/X86/pr3495-2.ll
G lib/CodeGen/VirtRegMap.cpp
--- Merging (from foreign repository) r64340 into '.':
U lib/CodeGen/ScheduleDAGPrinter.cpp
--- Merging (from foreign repository) r64369 into '.':
U test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
G test/CodeGen/X86/2008-08-05-SpillerBug.ll
G lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
--- Merging (from foreign repository) r64372 into '.':
G lib/CodeGen/VirtRegMap.cpp
--- Merging (from foreign repository) r64373 into '.':
G lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
Added:
llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495-2.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll
Modified:
llvm/branches/Apple/Dib/lib/CodeGen/ScheduleDAGPrinter.cpp
llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
llvm/branches/Apple/Dib/lib/CodeGen/VirtRegMap.cpp
llvm/branches/Apple/Dib/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/2008-02-22-ReMatBug.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/2008-08-05-SpillerBug.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/pmul.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/sse_reload_fold.ll
Modified: llvm/branches/Apple/Dib/lib/CodeGen/ScheduleDAGPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/CodeGen/ScheduleDAGPrinter.cpp?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/CodeGen/ScheduleDAGPrinter.cpp (original)
+++ llvm/branches/Apple/Dib/lib/CodeGen/ScheduleDAGPrinter.cpp Thu Feb 12 16:12:12 2009
@@ -83,9 +83,13 @@
void ScheduleDAG::viewGraph() {
// This code is only for debugging!
#ifndef NDEBUG
- ViewGraph(this, "dag." + MF.getFunction()->getName(),
- "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
- BB->getBasicBlock()->getName());
+ if (BB->getBasicBlock())
+ ViewGraph(this, "dag." + MF.getFunction()->getName(),
+ "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
+ BB->getBasicBlock()->getName());
+ else
+ ViewGraph(this, "dag." + MF.getFunction()->getName(),
+ "Scheduling-Units Graph for " + MF.getFunction()->getName());
#else
cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
Modified: llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original)
+++ llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Thu Feb 12 16:12:12 2009
@@ -879,12 +879,6 @@
};
} // end anonymous namespace
-static inline bool isCopyFromLiveIn(const SUnit *SU) {
- SDNode *N = SU->getNode();
- return N && N->getOpcode() == ISD::CopyFromReg &&
- N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
-}
-
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
static unsigned
@@ -902,7 +896,7 @@
if (PredSethiUllman > SethiUllmanNumber) {
SethiUllmanNumber = PredSethiUllman;
Extra = 0;
- } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl())
+ } else if (PredSethiUllman == SethiUllmanNumber)
++Extra;
}
@@ -966,11 +960,6 @@
unsigned getNodePriority(const SUnit *SU) const {
assert(SU->NodeNum < SethiUllmanNumbers.size());
unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
- if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
- // CopyFromReg should be close to its def because it restricts
- // allocation choices. But if it is a livein then perhaps we want it
- // closer to its uses so it can be coalesced.
- return 0xffff;
if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
// CopyToReg should be close to its uses to facilitate coalescing and
// avoid spilling.
@@ -980,15 +969,16 @@
// EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
// facilitate coalescing.
return 0;
- if (SU->NumSuccs == 0)
- // If SU does not have a use, i.e. it doesn't produce a value that would
- // be consumed (e.g. store), then it terminates a chain of computation.
- // Give it a large SethiUllman number so it will be scheduled right
- // before its predecessors that it doesn't lengthen their live ranges.
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
return 0xffff;
- if (SU->NumPreds == 0)
- // If SU does not have a def, schedule it close to its uses because it
- // does not lengthen any live ranges.
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
return 0;
return SethiUllmanNumbers[SU->NodeNum];
}
@@ -1060,23 +1050,13 @@
}
/// calcMaxScratches - Returns an cost estimate of the worse case requirement
-/// for scratch registers. Live-in operands and live-out results don't count
-/// since they are "fixed".
+/// for scratch registers, i.e. number of data dependencies.
static unsigned calcMaxScratches(const SUnit *SU) {
unsigned Scratches = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- if (!I->getSUnit()->getNode() ||
- I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg)
- Scratches++;
- }
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain succs
- if (!I->getSUnit()->getNode() ||
- I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg)
- Scratches += 10;
+ Scratches++;
}
return Scratches;
}
@@ -1110,10 +1090,7 @@
if (LDist != RDist)
return LDist < RDist;
- // Intuitively, it's good to push down instructions whose results are
- // liveout so their long live ranges won't conflict with other values
- // which are needed inside the BB. Further prioritize liveout instructions
- // by the number of operands which are calculated within the BB.
+ // How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
if (LScratch != RScratch)
@@ -1242,8 +1219,8 @@
if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
continue;
}
- // Don't constraint extract_subreg / insert_subreg these may be
- // coalesced away. We don't them close to their uses.
+ // Don't constrain extract_subreg / insert_subreg; these may be
+ // coalesced away. We want them close to their uses.
unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
SuccOpc == TargetInstrInfo::INSERT_SUBREG)
Modified: llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp (original)
+++ llvm/branches/Apple/Dib/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp Thu Feb 12 16:12:12 2009
@@ -125,10 +125,11 @@
} else {
// Create the reg, emit the copy.
VRBase = MRI.createVirtualRegister(DstRC);
- bool Emitted =
- TII->copyRegToReg(*BB, End, VRBase, SrcReg, DstRC, SrcRC);
- Emitted = Emitted; // Silence compiler warning.
- assert(Emitted && "Unable to issue a copy instruction!");
+ bool Emitted = TII->copyRegToReg(*BB, End, VRBase, SrcReg, DstRC, SrcRC);
+ if (!Emitted) {
+ cerr << "Unable to issue a copy instruction!\n";
+ abort();
+ }
}
SDValue Op(Node, ResNo);
@@ -529,7 +530,11 @@
else
DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
Node->getOperand(1).getValueType());
- TII->copyRegToReg(*BB, End, DestReg, SrcReg, DstTRC, SrcTRC);
+ bool Emitted = TII->copyRegToReg(*BB, End, DestReg, SrcReg, DstTRC, SrcTRC);
+ if (!Emitted) {
+ cerr << "Unable to issue a copy instruction!\n";
+ abort();
+ }
break;
}
case ISD::CopyFromReg: {
Modified: llvm/branches/Apple/Dib/lib/CodeGen/VirtRegMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/CodeGen/VirtRegMap.cpp?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/CodeGen/VirtRegMap.cpp (original)
+++ llvm/branches/Apple/Dib/lib/CodeGen/VirtRegMap.cpp Thu Feb 12 16:12:12 2009
@@ -26,10 +26,11 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -47,6 +48,8 @@
STATISTIC(NumDCE , "Number of copies elided");
STATISTIC(NumDSS , "Number of dead spill slots removed");
STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
namespace {
enum SpillerName { simple, local };
@@ -308,79 +311,6 @@
// Local Spiller Implementation
//===----------------------------------------------------------------------===//
-namespace {
- class AvailableSpills;
-
- /// LocalSpiller - This spiller does a simple pass over the machine basic
- /// block to attempt to keep spills in registers as much as possible for
- /// blocks that have low register pressure (the vreg may be spilled due to
- /// register pressure in other blocks).
- class VISIBILITY_HIDDEN LocalSpiller : public Spiller {
- MachineRegisterInfo *RegInfo;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- DenseMap<MachineInstr*, unsigned> DistanceMap;
- public:
- bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
- RegInfo = &MF.getRegInfo();
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
- DOUT << "\n**** Local spiller rewriting function '"
- << MF.getFunction()->getName() << "':\n";
- DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
- " ****\n";
- DEBUG(MF.dump());
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB)
- RewriteMBB(*MBB, VRM);
-
- // Mark unused spill slots.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- int SS = VRM.getLowSpillSlot();
- if (SS != VirtRegMap::NO_STACK_SLOT)
- for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
- if (!VRM.isSpillSlotUsed(SS)) {
- MFI->RemoveStackObject(SS);
- ++NumDSS;
- }
-
- DOUT << "**** Post Machine Instrs ****\n";
- DEBUG(MF.dump());
-
- return true;
- }
- private:
- void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
- unsigned Reg, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps);
- bool PrepForUnfoldOpti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MII,
- std::vector<MachineInstr*> &MaybeDeadStores,
- AvailableSpills &Spills, BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM);
- bool CommuteToFoldReload(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MII,
- unsigned VirtReg, unsigned SrcReg, int SS,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- const TargetRegisterInfo *TRI,
- VirtRegMap &VRM);
- void SpillRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MII,
- int Idx, unsigned PhysReg, int StackSlot,
- const TargetRegisterClass *RC,
- bool isAvailable, MachineInstr *&LastStore,
- AvailableSpills &Spills,
- SmallSet<MachineInstr*, 4> &ReMatDefs,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps,
- VirtRegMap &VRM);
- void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM);
- };
-}
-
/// AvailableSpills - As the local spiller is scanning and rewriting an MBB from
/// top down, keep track of which spills slots or remat are available in each
/// register.
@@ -415,6 +345,12 @@
AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
: TRI(tri), TII(tii) {
}
+
+ /// clear - Reset the state.
+ void clear() {
+ SpillSlotsOrReMatsAvailable.clear();
+ PhysRegsAvailable.clear();
+ }
const TargetRegisterInfo *getRegInfo() const { return TRI; }
@@ -433,8 +369,7 @@
/// addAvailable - Mark that the specified stack slot / remat is available in
/// the specified physreg. If CanClobber is true, the physreg can be modified
/// at any time without changing the semantics of the program.
- void addAvailable(int SlotOrReMat, MachineInstr *MI, unsigned Reg,
- bool CanClobber = true) {
+ void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
// If this stack slot is thought to be available in some other physreg,
// remove its record.
ModifyStackSlotOrReMat(SlotOrReMat);
@@ -472,6 +407,8 @@
/// slot changes. This removes information about which register the previous
/// value for this slot lives in (as the previous value is dead now).
void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+ void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB);
};
}
@@ -551,7 +488,142 @@
PhysRegsAvailable.erase(I);
}
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as live-in's
+/// so register scavenger and post-allocation scheduler are happy.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB) {
+ for (std::multimap<unsigned, int>::iterator
+ I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+ I != E; ++I) {
+ unsigned Reg = (*I).first;
+ if (!MBB.isLiveIn(Reg))
+ MBB.addLiveIn(Reg);
+ }
+}
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &Succs) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->pred_size() == 1)
+ Succs.push_back(SuccMBB);
+ }
+}
+
+namespace {
+ /// LocalSpiller - This spiller does a simple pass over the machine basic
+ /// block to attempt to keep spills in registers as much as possible for
+ /// blocks that have low register pressure (the vreg may be spilled due to
+ /// register pressure in other blocks).
+ class VISIBILITY_HIDDEN LocalSpiller : public Spiller {
+ MachineRegisterInfo *RegInfo;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+ public:
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
+ RegInfo = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DOUT << "\n**** Local spiller rewriting function '"
+ << MF.getFunction()->getName() << "':\n";
+ DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
+ " ****\n";
+ DEBUG(MF.dump());
+
+ // Spills - Keep track of which spilled values are available in physregs
+ // so that we can choose to reuse the physregs instead of emitting
+ // reloads. This is usually refreshed per basic block.
+ AvailableSpills Spills(TRI, TII);
+
+ // SingleEntrySuccs - Successor blocks which have a single predecessor.
+ SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+ SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+ // Traverse the basic blocks depth first.
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+ if (!EarlyVisited.count(MBB))
+ RewriteMBB(*MBB, VRM, Spills);
+
+ // If this MBB is the only predecessor of a successor. Keep the
+ // availability information and visit it next.
+ do {
+ // Keep visiting single predecessor successor as long as possible.
+ SinglePredSuccs.clear();
+ findSinglePredSuccessor(MBB, SinglePredSuccs);
+ if (SinglePredSuccs.empty())
+ MBB = 0;
+ else {
+ // FIXME: More than one successors, each of which has MBB has
+ // the only predecessor.
+ MBB = SinglePredSuccs[0];
+ if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+ Spills.AddAvailableRegsToLiveIn(*MBB);
+ RewriteMBB(*MBB, VRM, Spills);
+ }
+ }
+ } while (MBB);
+
+ // Clear the availability info.
+ Spills.clear();
+ }
+
+ DOUT << "**** Post Machine Instrs ****\n";
+ DEBUG(MF.dump());
+
+ // Mark unused spill slots.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int SS = VRM.getLowSpillSlot();
+ if (SS != VirtRegMap::NO_STACK_SLOT)
+ for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
+ if (!VRM.isSpillSlotUsed(SS)) {
+ MFI->RemoveStackObject(SS);
+ ++NumDSS;
+ }
+
+ return true;
+ }
+ private:
+ void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
+ unsigned Reg, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+ bool PrepForUnfoldOpti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
+ bool CommuteToFoldReload(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned VirtReg, unsigned SrcReg, int SS,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM);
+ void SpillRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ int Idx, unsigned PhysReg, int StackSlot,
+ const TargetRegisterClass *RC,
+ bool isAvailable, MachineInstr *&LastStore,
+ AvailableSpills &Spills,
+ SmallSet<MachineInstr*, 4> &ReMatDefs,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
+ void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ AvailableSpills &Spills);
+ };
+}
/// InvalidateKills - MI is going to be deleted. If any of its operands are
/// marked kill, then invalidate the information.
@@ -843,7 +915,7 @@
unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg;
MI->getOperand(NewOp.Operand).setReg(RReg);
- Spills.addAvailable(NewOp.StackSlotOrReMat, MI, NewPhysReg);
+ Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
--MII;
UpdateKills(*MII, RegKills, KillOps, TRI);
DOUT << '\t' << *MII;
@@ -1152,7 +1224,7 @@
// in PhysReg.
Spills.ModifyStackSlotOrReMat(StackSlot);
Spills.ClobberPhysReg(PhysReg);
- Spills.addAvailable(StackSlot, LastStore, PhysReg, isAvailable);
+ Spills.addAvailable(StackSlot, PhysReg, isAvailable);
++NumStores;
}
@@ -1201,15 +1273,13 @@
/// rewriteMBB - Keep track of which spills are available even after the
/// register allocator is done with them. If possible, avid reloading vregs.
-void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
- DOUT << MBB.getBasicBlock()->getName() << ":\n";
+void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ AvailableSpills &Spills) {
+ DOUT << "\n**** Local spiller rewriting MBB '"
+ << MBB.getBasicBlock()->getName() << ":\n";
MachineFunction &MF = *MBB.getParent();
- // Spills - Keep track of which spilled values are available in physregs so
- // that we can choose to reuse the physregs instead of emitting reloads.
- AvailableSpills Spills(TRI, TII);
-
// MaybeDeadStores - When we need to write a value back into a stack slot,
// keep track of the inserted store. If the stack slot value is never read
// (because the value was used from some available register, for example), and
@@ -1277,18 +1347,80 @@
continue; // Split interval spilled again.
unsigned Phys = VRM.getPhys(VirtReg);
RegInfo->setPhysRegUsed(Phys);
+
+ // Check if the value being restored if available. If so, it must be
+ // from a predecessor BB that fallthrough into this BB. We do not
+ // expect:
+ // BB1:
+ // r1 = load fi#1
+ // ...
+ // = r1<kill>
+ // ... # r1 not clobbered
+ // ...
+ // = load fi#1
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+ if (InReg == Phys) {
+ // If the value is already available in the expected register, save
+ // a reload / remat.
+ if (SSorRMId)
+ DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << SSorRMId;
+ DOUT << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(Phys) << "\n";
+ ++NumOmitted;
+ continue;
+ } else if (InReg && InReg != Phys) {
+ if (SSorRMId)
+ DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << SSorRMId;
+ DOUT << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" by copying it into physreg "
+ << TRI->getName(Phys) << "\n";
+
+ // If the reloaded / remat value is available in another register,
+ // copy it to the desired register.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC);
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ // Mark is killed.
+ MachineInstr *CopyMI = prior(MII);
+ MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+ UpdateKills(*CopyMI, RegKills, KillOps, TRI);
+
+ DOUT << '\t' << *CopyMI;
+ ++NumCopified;
+ continue;
+ }
+
if (VRM.isReMaterialized(VirtReg)) {
ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM);
} else {
const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
- int SS = VRM.getStackSlot(VirtReg);
- TII->loadRegFromStackSlot(MBB, &MI, Phys, SS, RC);
+ TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC);
MachineInstr *LoadMI = prior(MII);
- VRM.addSpillSlotUse(SS, LoadMI);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
++NumLoads;
}
+
// This invalidates Phys.
Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
UpdateKills(*prior(MII), RegKills, KillOps, TRI);
DOUT << '\t' << *prior(MII);
}
@@ -1510,7 +1642,7 @@
// This invalidates DesignatedReg.
Spills.ClobberPhysReg(DesignatedReg);
- Spills.addAvailable(ReuseSlot, &MI, DesignatedReg);
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
unsigned RReg =
SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
MI.getOperand(i).setReg(RReg);
@@ -1548,7 +1680,7 @@
// Any stores to this stack slot are not dead anymore.
if (!DoReMat)
MaybeDeadStores[SSorRMId] = NULL;
- Spills.addAvailable(SSorRMId, &MI, PhysReg);
+ Spills.addAvailable(SSorRMId, PhysReg);
// Assumes this is the last use. IsKill will be unset if reg is reused
// unless it's a two-address operand.
if (TID.getOperandConstraint(i, TOI::TIED_TO) == -1)
@@ -1738,7 +1870,7 @@
// If the stack slot value was previously available in some other
// register, change it now. Otherwise, make the register
// available in PhysReg.
- Spills.addAvailable(StackSlot, &MI, SrcReg, false/*!clobber*/);
+ Spills.addAvailable(StackSlot, SrcReg, false/*!clobber*/);
}
}
}
@@ -1788,7 +1920,7 @@
// If it is a folded reference, then it's not safe to clobber.
bool Folded = FoldedSS.count(FrameIdx);
// Otherwise, if it wasn't available, remember that it is now!
- Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded);
+ Spills.addAvailable(FrameIdx, DestReg, !Folded);
goto ProcessNextInst;
}
@@ -1863,6 +1995,7 @@
}
MII = NextMII;
}
+
}
llvm::Spiller* llvm::createSpiller() {
Modified: llvm/branches/Apple/Dib/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll Thu Feb 12 16:12:12 2009
@@ -1,4 +1,5 @@
; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 186
%"struct.Adv5::Ekin<3>" = type <{ i8 }>
%"struct.Adv5::X::Energyflux<3>" = type { double }
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/2008-02-22-ReMatBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/2008-02-22-ReMatBug.ll?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/2008-02-22-ReMatBug.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/2008-02-22-ReMatBug.ll Thu Feb 12 16:12:12 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 4
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
; rdar://5761454
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/2008-08-05-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/2008-08-05-SpillerBug.ll?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/2008-08-05-SpillerBug.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/2008-08-05-SpillerBug.ll Thu Feb 12 16:12:12 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats -info-output-file - | not grep {Number of dead stores elided}
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
; PR2568
@g_3 = external global i16 ; <i16*> [#uses=1]
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/pmul.ll?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/pmul.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/pmul.ll Thu Feb 12 16:12:12 2009
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 15
+; RUN: grep mov %t | count 12
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
Added: llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495-2.ll?rev=64405&view=auto
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495-2.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495-2.ll Thu Feb 12 16:12:12 2009
@@ -0,0 +1,49 @@
+; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+
+target triple = "i386-apple-darwin9.6"
+ %struct.constraintVCGType = type { i32, i32, i32, i32 }
+ %struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
+
+define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
+entry:
+ %0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5 ; <i32*> [#uses=2]
+ %1 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %2 = icmp eq i32 %1, 0 ; <i1> [#uses=1]
+ br i1 %2, label %bb5, label %bb.nph3
+
+bb.nph3: ; preds = %entry
+ %3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4 ; <%struct.constraintVCGType**> [#uses=1]
+ br label %bb
+
+bb: ; preds = %bb3, %bb.nph3
+ %s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ] ; <i32> [#uses=2]
+ %4 = load %struct.constraintVCGType** %3, align 4 ; <%struct.constraintVCGType*> [#uses=1]
+ %5 = icmp eq i32 0, 0 ; <i1> [#uses=1]
+ br i1 %5, label %bb1, label %bb3
+
+bb1: ; preds = %bb
+ %6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0 ; <i32*> [#uses=1]
+ %7 = load i32* %6, align 4 ; <i32> [#uses=2]
+ %8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7 ; <i32*> [#uses=1]
+ %9 = load i32* %8, align 4 ; <i32> [#uses=1]
+ %10 = icmp eq i32 %9, 0 ; <i1> [#uses=1]
+ br i1 %10, label %bb2, label %bb3
+
+bb2: ; preds = %bb1
+ %11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4 ; <%struct.constraintVCGType**> [#uses=0]
+ br label %bb.i
+
+bb.i: ; preds = %bb.i, %bb2
+ br label %bb.i
+
+bb3: ; preds = %bb1, %bb
+ %12 = add i32 %s.02, 1 ; <i32> [#uses=2]
+ %13 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %14 = icmp ugt i32 %13, %12 ; <i1> [#uses=1]
+ br i1 %14, label %bb, label %bb5
+
+bb5: ; preds = %bb3, %entry
+ %15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6 ; <i32*> [#uses=1]
+ store i32 %label, i32* %15, align 4
+ ret void
+}
Added: llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll?rev=64405&view=auto
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll (added)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll Thu Feb 12 16:12:12 2009
@@ -0,0 +1,78 @@
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited}
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of available reloads turned into copies}
+; PR3495
+
+target triple = "i386-pc-linux-gnu"
+ at x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1]
+ at rows = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=2]
+ at up = external global [15 x i32], align 32 ; <[15 x i32]*> [#uses=2]
+ at down = external global [15 x i32], align 32 ; <[15 x i32]*> [#uses=1]
+
+define i32 @queens(i32 %c) nounwind {
+entry:
+ %tmp91 = add i32 %c, 1 ; <i32> [#uses=3]
+ %tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91 ; <i32*> [#uses=1]
+ br label %bb
+
+bb: ; preds = %bb569, %entry
+ %r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ] ; <i32> [#uses=4]
+ %tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0 ; <i32*> [#uses=1]
+ %tmp28 = load i32* %tmp27, align 4 ; <i32> [#uses=1]
+ %tmp29 = icmp eq i32 %tmp28, 0 ; <i1> [#uses=1]
+ br i1 %tmp29, label %bb569, label %bb31
+
+bb31: ; preds = %bb
+ %tmp35 = sub i32 %r25.0.reg2mem.0, 0 ; <i32> [#uses=1]
+ %tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35 ; <i32*> [#uses=1]
+ %tmp37 = load i32* %tmp36, align 4 ; <i32> [#uses=1]
+ %tmp38 = icmp eq i32 %tmp37, 0 ; <i1> [#uses=1]
+ br i1 %tmp38, label %bb569, label %bb41
+
+bb41: ; preds = %bb31
+ %tmp54 = sub i32 %r25.0.reg2mem.0, %c ; <i32> [#uses=1]
+ %tmp55 = add i32 %tmp54, 7 ; <i32> [#uses=1]
+ %tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55 ; <i32*> [#uses=2]
+ store i32 0, i32* %tmp62, align 4
+ br label %bb92
+
+bb92: ; preds = %bb545, %bb41
+ %r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ] ; <i32> [#uses=5]
+ %tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0 ; <i32*> [#uses=1]
+ %tmp95 = load i32* %tmp94, align 4 ; <i32> [#uses=0]
+ %tmp112 = add i32 %r20.0.reg2mem.0, %tmp91 ; <i32> [#uses=1]
+ %tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112 ; <i32*> [#uses=2]
+ %tmp114 = load i32* %tmp113, align 4 ; <i32> [#uses=1]
+ %tmp115 = icmp eq i32 %tmp114, 0 ; <i1> [#uses=1]
+ br i1 %tmp115, label %bb545, label %bb118
+
+bb118: ; preds = %bb92
+ %tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91 ; <i32> [#uses=0]
+ store i32 0, i32* %tmp113, align 4
+ store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
+ br label %bb142
+
+bb142: ; preds = %bb142, %bb118
+ %k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ] ; <i32> [#uses=1]
+ %indvar.next709 = add i32 %k18.0.reg2mem.0, 1 ; <i32> [#uses=2]
+ %exitcond710 = icmp eq i32 %indvar.next709, 8 ; <i1> [#uses=1]
+ br i1 %exitcond710, label %bb155, label %bb142
+
+bb155: ; preds = %bb142
+ %tmp156 = tail call i32 @putchar(i32 10) nounwind ; <i32> [#uses=0]
+ br label %bb545
+
+bb545: ; preds = %bb155, %bb92
+ %indvar.next711 = add i32 %r20.0.reg2mem.0, 1 ; <i32> [#uses=2]
+ %exitcond712 = icmp eq i32 %indvar.next711, 8 ; <i1> [#uses=1]
+ br i1 %exitcond712, label %bb553, label %bb92
+
+bb553: ; preds = %bb545
+ store i32 1, i32* %tmp62, align 4
+ br label %bb569
+
+bb569: ; preds = %bb553, %bb31, %bb
+ %indvar.next715 = add i32 %r25.0.reg2mem.0, 1 ; <i32> [#uses=1]
+ br label %bb
+}
+
+declare i32 @putchar(i32)
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/sse_reload_fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/sse_reload_fold.ll?rev=64405&r1=64404&r2=64405&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/sse_reload_fold.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/sse_reload_fold.ll Thu Feb 12 16:12:12 2009
@@ -2,9 +2,10 @@
; RUN: llc -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
; RUN: grep fail | count 1
-declare void @bar()
-declare <4 x float> @qux()
-declare <2 x double> @pin()
+declare float @test_f(float %f)
+declare double @test_d(double %f)
+declare <4 x float> @test_vf(<4 x float> %f)
+declare <2 x double> @test_vd(<2 x double> %f)
declare float @llvm.sqrt.f32(float)
declare double @llvm.sqrt.f64(double)
@@ -26,99 +27,99 @@
declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
define float @foo(float %f) {
- call void @bar()
+ %a = call float @test_f(float %f)
%t = call float @llvm.sqrt.f32(float %f)
ret float %t
}
define double @doo(double %f) {
- call void @bar()
+ %a = call double @test_d(double %f)
%t = call double @llvm.sqrt.f64(double %f)
ret double %t
}
define <4 x float> @a0(<4 x float> %f) {
- call void @bar()
+ %a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @a1(<4 x float> %f) {
- call void @bar()
+ %a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @a2(<4 x float> %f) {
- call void @bar()
+ %a = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @b3(<4 x float> %f) {
- %y = call <4 x float> @qux()
+ %y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @b4(<4 x float> %f) {
- %y = call <4 x float> @qux()
+ %y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @b5(<4 x float> %f) {
- %y = call <4 x float> @qux()
+ %y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
ret <4 x float> %t
}
define <4 x float> @b6(<4 x float> %f) {
- %y = call <4 x float> @qux()
+ %y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @b7(<4 x float> %f) {
- %y = call <4 x float> @qux()
+ %y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
define <4 x float> @b8(<4 x float> %f) {
- %y = call <4 x float> @qux()
+ %y = call <4 x float> @test_vf(<4 x float> %f)
%t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
ret <4 x float> %t
}
define <2 x double> @c1(<2 x double> %f) {
- call void @bar()
+ %a = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
ret <2 x double> %t
}
define <2 x double> @d3(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
define <2 x double> @d4(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
define <2 x double> @d5(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
ret <2 x double> %t
}
define <2 x double> @d6(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
define <2 x double> @d7(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
define <2 x double> @d8(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
ret <2 x double> %t
}
; This one should fail to fuse.
define <2 x double> @z0(<2 x double> %f) {
- %y = call <2 x double> @pin()
+ %y = call <2 x double> @test_vd(<2 x double> %f)
%t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
ret <2 x double> %t
}
More information about the llvm-commits
mailing list