[llvm-branch-commits] [llvm-branch] r99976 - in /llvm/branches/Apple/Morbo: ./ lib/Target/X86/ lib/Target/X86/AsmPrinter/ lib/Transforms/IPO/ test/CodeGen/X86/
Jakob Stoklund Olesen
stoklund at 2pi.dk
Tue Mar 30 17:49:27 PDT 2010
Author: stoklund
Date: Tue Mar 30 19:49:27 2010
New Revision: 99976
URL: http://llvm.org/viewvc/llvm-project?rev=99976&view=rev
Log:
Merge SSEDomainFix pass from trunk.
Revisions 99524 99539 99540 99848 99855 99916 99952 99953 99954 99959 99974 99975
Added:
llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp
- copied, changed from r99524, llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
Modified:
llvm/branches/Apple/Morbo/ (props changed)
llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt
llvm/branches/Apple/Morbo/lib/Target/X86/X86.h
llvm/branches/Apple/Morbo/lib/Target/X86/X86.td
llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td
llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp
llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h
llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td
llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp
llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h
llvm/branches/Apple/Morbo/lib/Transforms/IPO/FunctionAttrs.cpp (props changed)
llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll
llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll
Propchange: llvm/branches/Apple/Morbo/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 30 19:49:27 2010
@@ -1,2 +1,2 @@
/llvm/branches/Apple/Hermes:96832,96835,96858,96870,96876,96879
-/llvm/trunk:98602,98604,98612,98615-98616,98675,98686,98743-98744,98773,98778,98780,98810,98835,98839,98845,98855,98862,98881,98920,98977,99032-99033,99043,99223,99263,99282-99284,99306,99319-99321,99324,99336,99378,99418,99423,99429,99455,99463,99465,99469,99484,99490,99492-99494,99537,99539,99544,99570,99575,99629-99630,99671,99692,99695,99697,99699,99722,99846,99850,99910,99957
+/llvm/trunk:98602,98604,98612,98615-98616,98675,98686,98743-98744,98773,98778,98780,98810,98835,98839,98845,98855,98862,98881,98920,98977,99032-99033,99043,99223,99263,99282-99284,99306,99319-99321,99324,99336,99378,99418,99423,99429,99455,99463,99465,99469,99484,99490,99492-99494,99524,99537,99539-99540,99544,99570,99575,99629-99630,99671,99692,99695,99697,99699,99722,99846,99848,99850,99855,99910,99916,99952-99954,99957,99959,99974-99975
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Tue Mar 30 19:49:27 2010
@@ -287,7 +287,9 @@
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::MOV16r0:
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt Tue Mar 30 19:49:27 2010
@@ -15,6 +15,7 @@
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ SSEDomainFix.cpp
X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
Copied: llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp (from r99524, llvm/trunk/lib/Target/X86/SSEDomainFix.cpp)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp?p2=llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp&p1=llvm/trunk/lib/Target/X86/SSEDomainFix.cpp&r1=99524&r2=99976&rev=99976&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp Tue Mar 30 19:49:27 2010
@@ -29,12 +29,116 @@
using namespace llvm;
namespace {
+
+/// Allocate objects from a pool, allow objects to be recycled, and provide a
+/// way of deleting everything.
+template<typename T, unsigned PageSize = 64>
+class PoolAllocator {
+ std::vector<T*> Pages, Avail;
+public:
+ ~PoolAllocator() { Clear(); }
+
+ T* Alloc() {
+ if (Avail.empty()) {
+ T *p = new T[PageSize];
+ Pages.push_back(p);
+ Avail.reserve(PageSize);
+ for (unsigned n = 0; n != PageSize; ++n)
+ Avail.push_back(p+n);
+ }
+ T *p = Avail.back();
+ Avail.pop_back();
+ return p;
+ }
+
+ // Allow object to be reallocated. It won't be reconstructed.
+ void Recycle(T *p) {
+ p->clear();
+ Avail.push_back(p);
+ }
+
+ // Destroy all objects, make sure there are no external pointers to them.
+ void Clear() {
+ Avail.clear();
+ while (!Pages.empty()) {
+ delete[] Pages.back();
+ Pages.pop_back();
+ }
+ }
+};
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Available domains. For an open DomainValue, it is the still possible
+ // domains for collapsing. For a collapsed DomainValue it is the domains where
+ // the register is available for free.
+ unsigned Mask;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // Collapsed DomainValue have no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool collapsed() const { return Instrs.empty(); }
+
+ // Is any domain in mask available?
+ bool compat(unsigned mask) const {
+ return Mask & mask;
+ }
+
+ // Mark domain as available.
+ void add(unsigned domain) {
+ Mask |= 1u << domain;
+ }
+
+ // First domain available in mask.
+ unsigned firstDomain() const {
+ return CountTrailingZeros_32(Mask);
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = Mask = Dist = 0;
+ Instrs.clear();
+ }
+};
+
+static const unsigned NumRegs = 16;
+
class SSEDomainFixPass : public MachineFunctionPass {
static char ID;
- const X86InstrInfo *TII;
+ PoolAllocator<DomainValue> Pool;
MachineFunction *MF;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
MachineBasicBlock *MBB;
+ DomainValue **LiveRegs;
+ typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveOutMap LiveOuts;
+ unsigned Distance;
+
public:
SSEDomainFixPass() : MachineFunctionPass(&ID) {}
@@ -50,46 +154,337 @@
}
private:
- void enterBasicBlock(MachineBasicBlock *MBB);
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock();
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+
};
}
char SSEDomainFixPass::ID = 0;
-void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
- MBB = mbb;
- DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+ // Registers are sorted lexicographically.
+ // We just need them to be consecutive, ordering doesn't matter.
+ assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ reg -= X86::XMM0;
+ return reg < NumRegs ? reg : -1;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs)
+ LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs);
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs || !LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
+ Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain());
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ DomainValue *dv;
+ if (LiveRegs && (dv = LiveRegs[rx])) {
+ if (dv->collapsed())
+ dv->add(domain);
+ else
+ Collapse(dv, domain);
+ } else {
+ // Set up basic collapsed DomainValue.
+ dv = Pool.Alloc();
+ dv->Dist = Distance;
+ dv->add(domain);
+ SetLiveReg(rx, dv);
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->compat(1u << domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty()) {
+ MachineInstr *mi = dv->Instrs.back();
+ TII->SetSSEDomain(mi, domain);
+ dv->Instrs.pop_back();
+ }
+ dv->Mask = 1u << domain;
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1) {
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv) {
+ DomainValue *dv2 = Pool.Alloc();
+ dv2->Dist = Distance;
+ dv2->add(domain);
+ SetLiveReg(rx, dv2);
+ }
+ }
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->collapsed() && "Cannot merge into collapsed");
+ assert(!B->collapsed() && "Cannot merge from collapsed");
+ if (!A->compat(B->Mask))
+ return false;
+ A->Mask &= B->Mask;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = RegIndex(*i);
+ if (rx < 0) continue;
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) continue;
+ DomainValue *pdv = fi->second[rx];
+ if (!pdv) continue;
+ if (!LiveRegs || !LiveRegs[rx])
+ SetLiveReg(rx, pdv);
+ else {
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->collapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ if (!pdv->collapsed())
+ Collapse(pdv, LiveRegs[rx]->firstDomain());
+ } else {
+ // Currently open, merge in predecessor.
+ if (!pdv->collapsed())
+ Merge(LiveRegs[rx], pdv);
+ else
+ Collapse(LiveRegs[rx], pdv->firstDomain());
+ }
+ }
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Scan the explicit use operands for incoming domains.
+ unsigned collmask = mask;
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Is it possible to use this collapsed register for free?
+ if (dv->collapsed()) {
+ if (unsigned m = collmask & dv->Mask)
+ collmask = m;
+ } else if (dv->compat(collmask))
+ used.push_back(rx);
+ else
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(collmask)) {
+ unsigned domain = CountTrailingZeros_32(collmask);
+ TII->SetSSEDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match collmask, and build a list of
+ // incoming DomainValue that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!dv->compat(collmask)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv)
+ dv = doms.back();
+ else if (!Merge(dv, doms.back()))
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
+ if (LiveRegs[*i] == doms.back())
+ Kill(*i);
+ doms.pop_back();
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Pool.Alloc();
+ dv->Dist = Distance;
+ dv->Mask = collmask;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any XMM registers redefined.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
}
bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+ LiveRegs = 0;
+ Distance = 0;
+ assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
// If no XMM registers are used in the function, we can skip it completely.
- bool XMMIsUsed = false;
+ bool anyregs = false;
for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
E = X86::VR128RegClass.end(); I != E; ++I)
if (MF->getRegInfo().isPhysRegUsed(*I)) {
- XMMIsUsed = true;
+ anyregs = true;
break;
}
- if (!XMMIsUsed) return false;
+ if (!anyregs) return false;
MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*, 16> Visited;
- for (df_ext_iterator<MachineBasicBlock*,
- SmallPtrSet<MachineBasicBlock*, 16> >
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
- DFI != DFE; ++DFI) {
- enterBasicBlock(*DFI);
+ DFI != DFE; ++DFI) {
+ MBB = *DFI;
+ enterBasicBlock();
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I) {
- MachineInstr *MI = I;
- const unsigned *equiv = 0;
- X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
- DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : " ") << *MI);
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ ++Distance;
+ std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (LiveRegs)
+ visitGenericInstr(mi);
}
+
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ if (LiveRegs)
+ LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ LiveRegs = 0;
}
+
+ // Clear the LiveOuts vectors. Should we also collapse any remaining
+ // DomainValues?
+ for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+ i != e; ++i)
+ free(i->second);
+ LiveOuts.clear();
+ Pool.Clear();
+
return false;
}
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86.h?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86.h Tue Mar 30 19:49:27 2010
@@ -41,6 +41,10 @@
///
FunctionPass *createX86FloatingPointStackifierPass();
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
/// createX87FPRegKillInserterPass - This function returns a pass which
/// inserts FP_REG_KILL instructions where needed.
///
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86.td?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86.td Tue Mar 30 19:49:27 2010
@@ -164,6 +164,7 @@
"FPForm.Value",
"hasLockPrefix",
"SegOvrBits",
+ "ExeDomain.Value",
"Opcode"];
let TSFlagsShifts = [0,
6,
@@ -174,6 +175,7 @@
16,
19,
20,
+ 22,
24];
}
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td Tue Mar 30 19:49:27 2010
@@ -68,6 +68,16 @@
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt : Domain<3>;
+
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
@@ -93,7 +103,7 @@
class TF { bits<4> Prefix = 15; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
- string AsmStr>
+ string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
@@ -117,16 +127,18 @@
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain ExeDomain = d;
}
-class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
- : X86Inst<o, f, NoImm, outs, ins, asm> {
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -194,14 +206,16 @@
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
// SSE2 Instruction Templates:
//
@@ -220,10 +234,12 @@
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -233,12 +249,15 @@
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
// SSSE3 Instruction Templates:
@@ -252,10 +271,12 @@
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
@@ -264,17 +285,20 @@
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
// SS42FI - SSE 4.2 instructions with TF prefix.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -284,7 +308,8 @@
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp Tue Mar 30 19:49:27 2010
@@ -2514,7 +2514,9 @@
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
Alignment = 16;
break;
@@ -2544,11 +2546,13 @@
SmallVector<MachineOperand,X86AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
- // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
@@ -3657,3 +3661,51 @@
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain,
+ domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
+}
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h Tue Mar 30 19:49:27 2010
@@ -398,7 +398,10 @@
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Bits 22 -> 23 are unused
+ // Execution domain for SSE instructions in bits 22, 23.
+ // 0 in bits 22-23 means normal, non-SSE instruction.
+ SSEDomainShift = 22,
+
OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
};
@@ -486,7 +489,7 @@
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
-
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,13 @@
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+ /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+ std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+ /// SetSSEDomain - Set the SSEDomain of MI.
+ void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td Tue Mar 30 19:49:27 2010
@@ -1115,15 +1115,19 @@
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo!
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
@@ -1937,6 +1941,7 @@
//===---------------------------------------------------------------------===//
// SSE integer instructions
+let ExeDomain = SSEPackedInt in {
// Move Instructions
let neverHasSideEffects = 1 in
@@ -2045,6 +2050,7 @@
}
} // Constraints = "$src1 = $dst"
+} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
@@ -2107,7 +2113,8 @@
int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
+ ExeDomain = SSEPackedInt in {
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}", []>;
@@ -2141,7 +2148,7 @@
defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"pandn\t{$src2, $dst|$dst, $src2}",
@@ -2195,6 +2202,8 @@
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+let ExeDomain = SSEPackedInt in {
+
// Shuffle and unpack instructions
let AddedComplexity = 5 in {
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
@@ -2371,10 +2380,13 @@
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+} // ExeDomain = SSEPackedInt
+
// Non-temporal stores
def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+let ExeDomain = SSEPackedInt in
def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
@@ -2388,6 +2400,7 @@
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -2416,7 +2429,7 @@
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
// FIXME: Change encoding to pseudo.
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
@@ -3018,14 +3031,14 @@
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
+ (MOVSSrr (v4f32 (V_SET0PS)),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
+ (MOVSSrr (v4i32 (V_SET0PI)),
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp Tue Mar 30 19:49:27 2010
@@ -17,6 +17,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -169,6 +170,15 @@
return true; // -print-machineinstr should print after this.
}
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+ PM.add(createSSEDomainFixPass());
+ return true;
+ }
+ return false;
+}
+
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h Tue Mar 30 19:49:27 2010
@@ -66,6 +66,7 @@
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};
Propchange: llvm/branches/Apple/Morbo/lib/Transforms/IPO/FunctionAttrs.cpp
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 30 19:49:27 2010
@@ -1 +1 @@
-/llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp:99492,99539,99699,99836,99846,99850,99910,99957
+/llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp:99492,99524,99539-99540,99699,99836,99846,99848,99850,99855,99910,99916,99952-99954,99957,99959,99974-99975
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 2
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss %t | count 2
+; RUN: grep movaps %t | count 2
+; RUN: grep movdqa %t | count 2
define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
newFuncRoot:
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll Tue Mar 30 19:49:27 2010
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
-; RUN: grep unpcklpd %t | count 1
-; RUN: grep movapd %t | count 1
-; RUN: grep movaps %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
; Shows a dag combine bug that will generate an illegal build vector
; with v2i64 build_vector i32, i32.
+; CHECK: test:
+; CHECK: unpcklpd
+; CHECK: movapd
define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
entry:
%tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
@@ -13,6 +13,8 @@
ret void
}
+; CHECK: test2:
+; CHECK: movdqa
define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
entry:
%tmp1 = load <4 x i16>* %src
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll Tue Mar 30 19:49:27 2010
@@ -4,7 +4,7 @@
; bounce the vector off of cache rather than shuffling each individual
; element out of the index vector.
-; CHECK: pand (%rdx), %xmm0
+; CHECK: andps (%rdx), %xmm0
; CHECK: movaps %xmm0, -24(%rsp)
; CHECK: movslq -24(%rsp), %rax
; CHECK: movsd (%rdi,%rax,8), %xmm0
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll Tue Mar 30 19:49:27 2010
@@ -1,10 +1,8 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep unpck %t | count 2
-; RUN: grep shuf %t | count 2
-; RUN: grep ps %t | count 4
-; RUN: grep pd %t | count 4
-; RUN: grep movup %t | count 4
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: a:
+; CHECK: movdqu
+; CHECK: pshufd
define <4 x float> @a(<4 x float>* %y) nounwind {
%x = load <4 x float>* %y, align 4
%a = extractelement <4 x float> %x, i32 0
@@ -17,6 +15,10 @@
%s = insertelement <4 x float> %r, float %a, i32 3
ret <4 x float> %s
}
+
+; CHECK: b:
+; CHECK: movups
+; CHECK: unpckhps
define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
%x = load <4 x float>* %y, align 4
%a = extractelement <4 x float> %x, i32 2
@@ -29,6 +31,10 @@
%s = insertelement <4 x float> %r, float %b, i32 3
ret <4 x float> %s
}
+
+; CHECK: c:
+; CHECK: movupd
+; CHECK: shufpd
define <2 x double> @c(<2 x double>* %y) nounwind {
%x = load <2 x double>* %y, align 8
%a = extractelement <2 x double> %x, i32 0
@@ -37,6 +43,10 @@
%r = insertelement <2 x double> %p, double %a, i32 1
ret <2 x double> %r
}
+
+; CHECK: d:
+; CHECK: movupd
+; CHECK: unpckhpd
define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
%x = load <2 x double>* %y, align 8
%a = extractelement <2 x double> %x, i32 1
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll Tue Mar 30 19:49:27 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
%t = load <2 x i64>* %p, align 8
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll Tue Mar 30 19:49:27 2010
@@ -20,7 +20,7 @@
; X64: pshuflw $0, %xmm0, %xmm0
; X64: xorl %eax, %eax
; X64: pinsrw $0, %eax, %xmm0
-; X64: movaps %xmm0, (%rdi)
+; X64: movdqa %xmm0, (%rdi)
; X64: ret
}
@@ -32,7 +32,7 @@
; X64: t1:
; X64: movl (%rsi), %eax
-; X64: movaps (%rdi), %xmm0
+; X64: movdqa (%rdi), %xmm0
; X64: pinsrw $0, %eax, %xmm0
; X64: ret
}
@@ -66,7 +66,7 @@
; X64: pshufhw $100, %xmm0, %xmm2
; X64: pinsrw $1, %eax, %xmm2
; X64: pextrw $1, %xmm0, %eax
-; X64: movaps %xmm2, %xmm0
+; X64: movdqa %xmm2, %xmm0
; X64: pinsrw $4, %eax, %xmm0
; X64: ret
}
@@ -122,7 +122,7 @@
; X64: t8:
; X64: pshuflw $-58, (%rsi), %xmm0
; X64: pshufhw $-58, %xmm0, %xmm0
-; X64: movaps %xmm0, (%rdi)
+; X64: movdqa %xmm0, (%rdi)
; X64: ret
}
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll Tue Mar 30 19:49:27 2010
@@ -15,7 +15,7 @@
; CHECK: test2:
; CHECK: pcmp
; CHECK: pcmp
-; CHECK: xorps
+; CHECK: pxor
; CHECK: ret
%C = icmp sge <4 x i32> %A, %B
%D = sext <4 x i1> %C to <4 x i32>
@@ -25,7 +25,7 @@
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: test3:
; CHECK: pcmpgtd
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: ret
%C = icmp slt <4 x i32> %A, %B
%D = sext <4 x i1> %C to <4 x i32>
@@ -34,7 +34,7 @@
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: test4:
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: pcmpgtd
; CHECK: ret
%C = icmp ugt <4 x i32> %A, %B
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep xorps %t | count 1
+; RUN: grep pxor %t | count 1
; RUN: grep movaps %t | count 1
; RUN: not grep shuf %t
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep xorps %t | count 1
+; RUN: grep pxor %t | count 1
; RUN: not grep shufps %t
define void @test() {
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll Tue Mar 30 19:49:27 2010
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
define <4 x i32> @test(i8** %ptr) {
-; CHECK: xorps
+; CHECK: pxor
; CHECK: punpcklbw
; CHECK: punpcklwd
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK: xorps
define void @foo(<4 x float>* %P) {
%T = load <4 x float>* %P ; <<4 x float>> [#uses=1]
%S = fadd <4 x float> zeroinitializer, %T ; <<4 x float>> [#uses=1]
@@ -7,6 +8,7 @@
ret void
}
+; CHECK: pxor
define void @bar(<4 x i32>* %P) {
%T = load <4 x i32>* %P ; <<4 x i32>> [#uses=1]
%S = add <4 x i32> zeroinitializer, %T ; <<4 x i32>> [#uses=1]
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
@M1 = external global <1 x i64>
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: pmulld
; CHECK: psubd
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll Tue Mar 30 19:49:27 2010
@@ -2,7 +2,7 @@
; CHECK: pextrd
; CHECK: pextrd
; CHECK: movd
-; CHECK: movaps
+; CHECK: movdqa
; bitcast v14i16 to v7i32
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll Tue Mar 30 19:49:27 2010
@@ -5,7 +5,7 @@
%i32vec3 = type <3 x i32>
define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: paddd
; CHECK: pextrd
; CHECK: movq
@@ -33,13 +33,13 @@
%i32vec7 = type <7 x i32>
define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
; CHECK: paddd
; CHECK: paddd
; CHECK: pextrd
; CHECK: movq
-; CHECK: movaps
+; CHECK: movdqa
%a = load %i32vec7* %ap, align 16
%b = load %i32vec7* %bp, align 16
%x = add %i32vec7 %a, %b
@@ -49,15 +49,15 @@
%i32vec12 = type <12 x i32>
define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
; CHECK: paddd
; CHECK: paddd
; CHECK: paddd
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
%a = load %i32vec12* %ap, align 16
%b = load %i32vec12* %bp, align 16
%x = add %i32vec12 %a, %b
@@ -68,7 +68,7 @@
%i16vec3 = type <3 x i16>
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: paddw
; CHECK: movd
; CHECK: pextrw
@@ -81,7 +81,7 @@
%i16vec4 = type <4 x i16>
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: paddw
; CHECK: movq
%a = load %i16vec4* %ap, align 16
@@ -93,12 +93,12 @@
%i16vec12 = type <12 x i16>
define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
; CHECK: paddw
; CHECK: paddw
; CHECK: movq
-; CHECK: movaps
+; CHECK: movdqa
%a = load %i16vec12* %ap, align 16
%b = load %i16vec12* %bp, align 16
%x = add %i16vec12 %a, %b
@@ -108,15 +108,15 @@
%i16vec18 = type <18 x i16>
define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
; CHECK: paddw
; CHECK: paddw
; CHECK: paddw
; CHECK: movd
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
%a = load %i16vec18* %ap, align 16
%b = load %i16vec18* %bp, align 16
%x = add %i16vec18 %a, %b
@@ -127,7 +127,7 @@
%i8vec3 = type <3 x i8>
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: movaps
+; CHECK: movdqa
; CHECK: paddb
; CHECK: pextrb
; CHECK: movb
@@ -140,8 +140,8 @@
%i8vec31 = type <31 x i8>
define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
; CHECK: paddb
; CHECK: paddb
; CHECK: movq
Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll Tue Mar 30 19:49:27 2010
@@ -7,7 +7,7 @@
ret <4 x i32> %tmp
; X32: test1:
-; X32: xorps %xmm0, %xmm0
+; X32: pxor %xmm0, %xmm0
; X32: ret
}
More information about the llvm-branch-commits
mailing list