[llvm-branch-commits] [llvm-branch] r99976 - in /llvm/branches/Apple/Morbo: ./ lib/Target/X86/ lib/Target/X86/AsmPrinter/ lib/Transforms/IPO/ test/CodeGen/X86/

Jakob Stoklund Olesen stoklund at 2pi.dk
Tue Mar 30 17:49:27 PDT 2010


Author: stoklund
Date: Tue Mar 30 19:49:27 2010
New Revision: 99976

URL: http://llvm.org/viewvc/llvm-project?rev=99976&view=rev
Log:
Merge SSEDomainFix pass from trunk.
Revisions 99524 99539 99540 99848 99855 99916 99952 99953 99954 99959 99974 99975

Added:
    llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp
      - copied, changed from r99524, llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
Modified:
    llvm/branches/Apple/Morbo/   (props changed)
    llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
    llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt
    llvm/branches/Apple/Morbo/lib/Target/X86/X86.h
    llvm/branches/Apple/Morbo/lib/Target/X86/X86.td
    llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td
    llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp
    llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h
    llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td
    llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp
    llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h
    llvm/branches/Apple/Morbo/lib/Transforms/IPO/FunctionAttrs.cpp   (props changed)
    llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll

Propchange: llvm/branches/Apple/Morbo/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 30 19:49:27 2010
@@ -1,2 +1,2 @@
 /llvm/branches/Apple/Hermes:96832,96835,96858,96870,96876,96879
-/llvm/trunk:98602,98604,98612,98615-98616,98675,98686,98743-98744,98773,98778,98780,98810,98835,98839,98845,98855,98862,98881,98920,98977,99032-99033,99043,99223,99263,99282-99284,99306,99319-99321,99324,99336,99378,99418,99423,99429,99455,99463,99465,99469,99484,99490,99492-99494,99537,99539,99544,99570,99575,99629-99630,99671,99692,99695,99697,99699,99722,99846,99850,99910,99957
+/llvm/trunk:98602,98604,98612,98615-98616,98675,98686,98743-98744,98773,98778,98780,98810,98835,98839,98845,98855,98862,98881,98920,98977,99032-99033,99043,99223,99263,99282-99284,99306,99319-99321,99324,99336,99378,99418,99423,99429,99455,99463,99465,99469,99484,99490,99492-99494,99524,99537,99539-99540,99544,99570,99575,99629-99630,99671,99692,99695,99697,99699,99722,99846,99848,99850,99855,99910,99916,99952-99954,99957,99959,99974-99975

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp Tue Mar 30 19:49:27 2010
@@ -287,7 +287,9 @@
     LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
   case X86::FsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
   case X86::FsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::V_SET0:       LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+  case X86::V_SET0PS:     LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+  case X86::V_SET0PD:     LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+  case X86::V_SET0PI:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
   case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
 
   case X86::MOV16r0:

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/CMakeLists.txt Tue Mar 30 19:49:27 2010
@@ -15,6 +15,7 @@
 tablegen(X86GenSubtarget.inc -gen-subtarget)
 
 set(sources
+  SSEDomainFix.cpp
   X86AsmBackend.cpp
   X86CodeEmitter.cpp
   X86COFFMachineModuleInfo.cpp

Copied: llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp (from r99524, llvm/trunk/lib/Target/X86/SSEDomainFix.cpp)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp?p2=llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp&p1=llvm/trunk/lib/Target/X86/SSEDomainFix.cpp&r1=99524&r2=99976&rev=99976&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/SSEDomainFix.cpp Tue Mar 30 19:49:27 2010
@@ -29,12 +29,116 @@
 using namespace llvm;
 
 namespace {
+
+/// Allocate objects from a pool, allow objects to be recycled, and provide a
+/// way of deleting everything.
+template<typename T, unsigned PageSize = 64>
+class PoolAllocator {
+  std::vector<T*> Pages, Avail;
+public:
+  ~PoolAllocator() { Clear(); }
+
+  T* Alloc() {
+    if (Avail.empty()) {
+      T *p = new T[PageSize];
+      Pages.push_back(p);
+      Avail.reserve(PageSize);
+      for (unsigned n = 0; n != PageSize; ++n)
+        Avail.push_back(p+n);
+    }
+    T *p = Avail.back();
+    Avail.pop_back();
+    return p;
+  }
+
+  // Allow object to be reallocated. It won't be reconstructed.
+  void Recycle(T *p) {
+    p->clear();
+    Avail.push_back(p);
+  }
+
+  // Destroy all objects, make sure there are no external pointers to them.
+  void Clear() {
+    Avail.clear();
+    while (!Pages.empty()) {
+      delete[] Pages.back();
+      Pages.pop_back();
+    }
+  }
+};
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+struct DomainValue {
+  // Basic reference counting.
+  unsigned Refs;
+
+  // Available domains. For an open DomainValue, it is the still possible
+  // domains for collapsing. For a collapsed DomainValue it is the domains where
+  // the register is available for free.
+  unsigned Mask;
+
+  // Position of the last defining instruction.
+  unsigned Dist;
+
+  // Twiddleable instructions using or defining these registers.
+  SmallVector<MachineInstr*, 8> Instrs;
+
+  // Collapsed DomainValue have no instructions to twiddle - it simply keeps
+  // track of the domains where the registers are already available.
+  bool collapsed() const { return Instrs.empty(); }
+
+  // Is any domain in mask available?
+  bool compat(unsigned mask) const {
+    return Mask & mask;
+  }
+
+  // Mark domain as available.
+  void add(unsigned domain) {
+    Mask |= 1u << domain;
+  }
+
+  // First domain available in mask.
+  unsigned firstDomain() const {
+    return CountTrailingZeros_32(Mask);
+  }
+
+  DomainValue() { clear(); }
+
+  void clear() {
+    Refs = Mask = Dist = 0;
+    Instrs.clear();
+  }
+};
+
+static const unsigned NumRegs = 16;
+
 class SSEDomainFixPass : public MachineFunctionPass {
   static char ID;
-  const X86InstrInfo *TII;
+  PoolAllocator<DomainValue> Pool;
 
   MachineFunction *MF;
+  const X86InstrInfo *TII;
+  const TargetRegisterInfo *TRI;
   MachineBasicBlock *MBB;
+  DomainValue **LiveRegs;
+  typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+  LiveOutMap LiveOuts;
+  unsigned Distance;
+
 public:
   SSEDomainFixPass() : MachineFunctionPass(&ID) {}
 
@@ -50,46 +154,337 @@
   }
 
 private:
-  void enterBasicBlock(MachineBasicBlock *MBB);
+  // Register mapping.
+  int RegIndex(unsigned Reg);
+
+  // LiveRegs manipulations.
+  void SetLiveReg(int rx, DomainValue *DV);
+  void Kill(int rx);
+  void Force(int rx, unsigned domain);
+  void Collapse(DomainValue *dv, unsigned domain);
+  bool Merge(DomainValue *A, DomainValue *B);
+
+  void enterBasicBlock();
+  void visitGenericInstr(MachineInstr*);
+  void visitSoftInstr(MachineInstr*, unsigned mask);
+  void visitHardInstr(MachineInstr*, unsigned domain);
+
 };
 }
 
 char SSEDomainFixPass::ID = 0;
 
-void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
-  MBB = mbb;
-  DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+  // Registers are sorted lexicographically.
+  // We just need them to be consecutive, ordering doesn't matter.
+  assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+  reg -= X86::XMM0;
+  return reg < NumRegs ? reg : -1;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  if (!LiveRegs)
+    LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs);
+
+  if (LiveRegs[rx] == dv)
+    return;
+  if (LiveRegs[rx]) {
+    assert(LiveRegs[rx]->Refs && "Bad refcount");
+    if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
+  }
+  LiveRegs[rx] = dv;
+  if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  if (!LiveRegs || !LiveRegs[rx]) return;
+
+  // Before killing the last reference to an open DomainValue, collapse it to
+  // the first available domain.
+  if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
+    Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain());
+  else
+    SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  DomainValue *dv;
+  if (LiveRegs && (dv = LiveRegs[rx])) {
+    if (dv->collapsed())
+      dv->add(domain);
+    else
+      Collapse(dv, domain);
+  } else {
+    // Set up basic collapsed DomainValue.
+    dv = Pool.Alloc();
+    dv->Dist = Distance;
+    dv->add(domain);
+    SetLiveReg(rx, dv);
+  }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+  assert(dv->compat(1u << domain) && "Cannot collapse");
+
+  // Collapse all the instructions.
+  while (!dv->Instrs.empty()) {
+    MachineInstr *mi = dv->Instrs.back();
+    TII->SetSSEDomain(mi, domain);
+    dv->Instrs.pop_back();
+  }
+  dv->Mask = 1u << domain;
+
+  // If there are multiple users, give them new, unique DomainValues.
+  if (LiveRegs && dv->Refs > 1) {
+    for (unsigned rx = 0; rx != NumRegs; ++rx)
+      if (LiveRegs[rx] == dv) {
+        DomainValue *dv2 = Pool.Alloc();
+        dv2->Dist = Distance;
+        dv2->add(domain);
+        SetLiveReg(rx, dv2);
+      }
+  }
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+  assert(!A->collapsed() && "Cannot merge into collapsed");
+  assert(!B->collapsed() && "Cannot merge from collapsed");
+  if (!A->compat(B->Mask))
+    return false;
+  A->Mask &= B->Mask;
+  A->Dist = std::max(A->Dist, B->Dist);
+  A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+  for (unsigned rx = 0; rx != NumRegs; ++rx)
+    if (LiveRegs[rx] == B)
+      SetLiveReg(rx, A);
+  return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+  // Try to coalesce live-out registers from predecessors.
+  for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
+         e = MBB->livein_end(); i != e; ++i) {
+    int rx = RegIndex(*i);
+    if (rx < 0) continue;
+    for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+           pe = MBB->pred_end(); pi != pe; ++pi) {
+      LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+      if (fi == LiveOuts.end()) continue;
+      DomainValue *pdv = fi->second[rx];
+      if (!pdv) continue;
+      if (!LiveRegs || !LiveRegs[rx])
+        SetLiveReg(rx, pdv);
+      else {
+        // We have a live DomainValue from more than one predecessor.
+        if (LiveRegs[rx]->collapsed()) {
+          // We are already collapsed, but predecessor is not. Force him.
+          if (!pdv->collapsed())
+            Collapse(pdv, LiveRegs[rx]->firstDomain());
+        } else {
+          // Currently open, merge in predecessor.
+          if (!pdv->collapsed())
+            Merge(LiveRegs[rx], pdv);
+          else
+            Collapse(LiveRegs[rx], pdv->firstDomain());
+        }
+      }
+    }
+  }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+  // Collapse all uses.
+  for (unsigned i = mi->getDesc().getNumDefs(),
+                e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Force(rx, domain);
+  }
+
+  // Kill all defs and force them.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+    Force(rx, domain);
+  }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+  // Scan the explicit use operands for incoming domains.
+  unsigned collmask = mask;
+  SmallVector<int, 4> used;
+  if (LiveRegs)
+    for (unsigned i = mi->getDesc().getNumDefs(),
+                  e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    if (DomainValue *dv = LiveRegs[rx]) {
+      // Is it possible to use this collapsed register for free?
+      if (dv->collapsed()) {
+        if (unsigned m = collmask & dv->Mask)
+          collmask = m;
+      } else if (dv->compat(collmask))
+        used.push_back(rx);
+      else
+        Kill(rx);
+    }
+  }
+
+  // If the collapsed operands force a single domain, propagate the collapse.
+  if (isPowerOf2_32(collmask)) {
+    unsigned domain = CountTrailingZeros_32(collmask);
+    TII->SetSSEDomain(mi, domain);
+    visitHardInstr(mi, domain);
+    return;
+  }
+
+  // Kill off any remaining uses that don't match collmask, and build a list of
+  // incoming DomainValue that we want to merge.
+  SmallVector<DomainValue*,4> doms;
+  for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+    int rx = *i;
+    DomainValue *dv = LiveRegs[rx];
+    // This useless DomainValue could have been missed above.
+    if (!dv->compat(collmask)) {
+      Kill(*i);
+      continue;
+    }
+    // sorted, uniqued insert.
+    bool inserted = false;
+    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+           i != e && !inserted; ++i) {
+      if (dv == *i)
+        inserted = true;
+      else if (dv->Dist < (*i)->Dist) {
+        inserted = true;
+        doms.insert(i, dv);
+      }
+    }
+    if (!inserted)
+      doms.push_back(dv);
+  }
+
+  //  doms are now sorted in order of appearance. Try to merge them all, giving
+  //  priority to the latest ones.
+  DomainValue *dv = 0;
+  while (!doms.empty()) {
+    if (!dv)
+      dv = doms.back();
+    else if (!Merge(dv, doms.back()))
+      for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
+        if (LiveRegs[*i] == doms.back())
+          Kill(*i);
+    doms.pop_back();
+  }
+
+  // dv is the DomainValue we are going to use for this instruction.
+  if (!dv)
+    dv = Pool.Alloc();
+  dv->Dist = Distance;
+  dv->Mask = collmask;
+  dv->Instrs.push_back(mi);
+
+  // Finally set all defs and non-collapsed uses to dv.
+  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+      Kill(rx);
+      SetLiveReg(rx, dv);
+    }
+  }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+  // Process explicit defs, kill any XMM registers redefined.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+  }
 }
 
 bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+  TRI = MF->getTarget().getRegisterInfo();
+  MBB = 0;
+  LiveRegs = 0;
+  Distance = 0;
+  assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
 
   // If no XMM registers are used in the function, we can skip it completely.
-  bool XMMIsUsed = false;
+  bool anyregs = false;
   for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
          E = X86::VR128RegClass.end(); I != E; ++I)
     if (MF->getRegInfo().isPhysRegUsed(*I)) {
-      XMMIsUsed = true;
+      anyregs = true;
       break;
     }
-  if (!XMMIsUsed) return false;
+  if (!anyregs) return false;
 
   MachineBasicBlock *Entry = MF->begin();
   SmallPtrSet<MachineBasicBlock*, 16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*,
-         SmallPtrSet<MachineBasicBlock*, 16> >
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
          DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
-       DFI != DFE; ++DFI) {
-    enterBasicBlock(*DFI);
+         DFI != DFE; ++DFI) {
+    MBB = *DFI;
+    enterBasicBlock();
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
         ++I) {
-      MachineInstr *MI = I;
-      const unsigned *equiv = 0;
-      X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
-      DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : "  ") << *MI);
+      MachineInstr *mi = I;
+      if (mi->isDebugValue()) continue;
+      ++Distance;
+      std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+      if (domp.first)
+        if (domp.second)
+          visitSoftInstr(mi, domp.second);
+        else
+          visitHardInstr(mi, domp.first);
+      else if (LiveRegs)
+        visitGenericInstr(mi);
     }
+
+    // Save live registers at end of MBB - used by enterBasicBlock().
+    if (LiveRegs)
+      LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+    LiveRegs = 0;
   }
+
+  // Clear the LiveOuts vectors. Should we also collapse any remaining
+  // DomainValues?
+  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+         i != e; ++i)
+    free(i->second);
+  LiveOuts.clear();
+  Pool.Clear();
+
   return false;
 }
 

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86.h?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86.h Tue Mar 30 19:49:27 2010
@@ -41,6 +41,10 @@
 ///
 FunctionPass *createX86FloatingPointStackifierPass();
 
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
 /// createX87FPRegKillInserterPass - This function returns a pass which
 /// inserts FP_REG_KILL instructions where needed.
 ///

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86.td?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86.td Tue Mar 30 19:49:27 2010
@@ -164,6 +164,7 @@
                        "FPForm.Value",
                        "hasLockPrefix",
                        "SegOvrBits",
+                       "ExeDomain.Value",
                        "Opcode"];
   let TSFlagsShifts = [0,
                        6,
@@ -174,6 +175,7 @@
                        16,
                        19,
                        20,
+                       22,
                        24];
 }
 

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrFormats.td Tue Mar 30 19:49:27 2010
@@ -68,6 +68,16 @@
 def CondMovFP  : FPFormat<6>;
 def SpecialFP  : FPFormat<7>;
 
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+  bits<2> Value = val;
+}
+def GenericDomain   : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt    : Domain<3>;
+
 // Prefix byte classes which are used to indicate to the ad-hoc machine code
 // emitter that various prefix bytes are required.
 class OpSize { bit hasOpSizePrefix = 1; }
@@ -93,7 +103,7 @@
 class TF     { bits<4> Prefix = 15; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
-              string AsmStr>
+              string AsmStr, Domain d = GenericDomain>
   : Instruction {
   let Namespace = "X86";
 
@@ -117,16 +127,18 @@
   FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
   bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
   bits<2> SegOvrBits = 0;   // Segment override prefix.
+  Domain ExeDomain = d;
 }
 
-class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
-  : X86Inst<o, f, NoImm, outs, ins, asm> {
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+        list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, NoImm, outs, ins, asm, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-  : X86Inst<o, f, Imm8 , outs, ins, asm> {
+           list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8, outs, ins, asm, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
@@ -194,14 +206,16 @@
 
 class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, 
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
 class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasSSE1]>;
 class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasSSE1]>;
 
 // SSE2 Instruction Templates:
 // 
@@ -220,10 +234,12 @@
              list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
 class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE2]>;
 class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE2]>;
 
 // SSE3 Instruction Templates:
 // 
@@ -233,12 +249,15 @@
 
 class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, 
            list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+        Requires<[HasSSE3]>;
 class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, 
            list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+        Requires<[HasSSE3]>;
 class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE3]>;
 
 
 // SSSE3 Instruction Templates:
@@ -252,10 +271,12 @@
 
 class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSSE3]>;
 class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSSE3]>;
 
 // SSE4.1 Instruction Templates:
 // 
@@ -264,17 +285,20 @@
 //
 class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSE41]>;
 class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSE41]>;
 
 // SSE4.2 Instruction Templates:
 // 
 //   SS428I - SSE 4.2 instructions with T8 prefix.
 class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
              list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSE42]>;
 
 //   SS42FI - SSE 4.2 instructions with TF prefix.
 class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -284,7 +308,8 @@
 //   SS42AI = SSE 4.2 instructions with TA prefix
 class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
              list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSE42]>;
 
 // X86-64 Instruction templates...
 //

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.cpp Tue Mar 30 19:49:27 2010
@@ -2514,7 +2514,9 @@
     Alignment = (*LoadMI->memoperands_begin())->getAlignment();
   else
     switch (LoadMI->getOpcode()) {
-    case X86::V_SET0:
+    case X86::V_SET0PS:
+    case X86::V_SET0PD:
+    case X86::V_SET0PI:
     case X86::V_SETALLONES:
       Alignment = 16;
       break;
@@ -2544,11 +2546,13 @@
 
   SmallVector<MachineOperand,X86AddrNumOperands> MOs;
   switch (LoadMI->getOpcode()) {
-  case X86::V_SET0:
+  case X86::V_SET0PS:
+  case X86::V_SET0PD:
+  case X86::V_SET0PI:
   case X86::V_SETALLONES:
   case X86::FsFLD0SD:
   case X86::FsFLD0SS: {
-    // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+    // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
     // Create a constant-pool entry and operands to load from it.
 
     // Medium and large mode can't fold loads this way.
@@ -3657,3 +3661,51 @@
   X86FI->setGlobalBaseReg(GlobalBaseReg);
   return GlobalBaseReg;
 }
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+  //PackedInt       PackedSingle     PackedDouble
+  { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
+  { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
+  { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
+  { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
+  { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
+  { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
+  { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
+  { X86::ANDPSrm,    X86::ANDPDrm,   X86::PANDrm    },
+  { X86::ANDPSrr,    X86::ANDPDrr,   X86::PANDrr    },
+  { X86::ORPSrm,     X86::ORPDrm,    X86::PORrm     },
+  { X86::ORPSrr,     X86::ORPDrr,    X86::PORrr     },
+  { X86::V_SET0PS,   X86::V_SET0PD,  X86::V_SET0PI  },
+  { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
+  { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+    if (ReplaceableInstrs[i][domain-1] == opcode)
+      return ReplaceableInstrs[i];
+  return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+  uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  return std::make_pair(domain,
+                        domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+  assert(Domain>0 && Domain<4 && "Invalid execution domain");
+  uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  assert(dom && "Not an SSE instruction");
+  const unsigned *table = lookup(MI->getOpcode(), dom);
+  assert(table && "Cannot change domain");
+  MI->setDesc(get(table[Domain-1]));
+}

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrInfo.h Tue Mar 30 19:49:27 2010
@@ -398,7 +398,10 @@
     FS          = 1 << SegOvrShift,
     GS          = 2 << SegOvrShift,
 
-    // Bits 22 -> 23 are unused
+    // Execution domain for SSE instructions in bits 22, 23.
+    // 0 in bits 22-23 means normal, non-SSE instruction.
+    SSEDomainShift = 22,
+
     OpcodeShift   = 24,
     OpcodeMask    = 0xFF << OpcodeShift
   };
@@ -486,7 +489,7 @@
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
   DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
-  
+
 public:
   explicit X86InstrInfo(X86TargetMachine &tm);
 
@@ -716,6 +719,13 @@
   ///
   unsigned getGlobalBaseReg(MachineFunction *MF) const;
 
+  /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+  /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+  std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+  /// SetSSEDomain - Set the SSEDomain of MI.
+  void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86InstrSSE.td Tue Mar 30 19:49:27 2010
@@ -1115,15 +1115,19 @@
 // load of an all-zeros value if folding it would be beneficial.
 // FIXME: Change encoding to pseudo!
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+    isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
                  [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
 
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
 
 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
           (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
@@ -1937,6 +1941,7 @@
 
 //===---------------------------------------------------------------------===//
 // SSE integer instructions
+let ExeDomain = SSEPackedInt in {
 
 // Move Instructions
 let neverHasSideEffects = 1 in
@@ -2045,6 +2050,7 @@
 }
 
 } // Constraints = "$src1 = $dst"
+} // ExeDomain = SSEPackedInt
 
 // 128-bit Integer Arithmetic
 
@@ -2107,7 +2113,8 @@
                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
 
 // 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
+    ExeDomain = SSEPackedInt in {
   def PSLLDQri : PDIi8<0x73, MRM7r,
                        (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
                        "pslldq\t{$src2, $dst|$dst, $src2}", []>;
@@ -2141,7 +2148,7 @@
 defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
 defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
   def PANDNrr : PDI<0xDF, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "pandn\t{$src2, $dst|$dst, $src2}",
@@ -2195,6 +2202,8 @@
 defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
 defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
 
+let ExeDomain = SSEPackedInt in {
+
 // Shuffle and unpack instructions
 let AddedComplexity = 5 in {
 def PSHUFDri : PDIi8<0x70, MRMSrcReg,
@@ -2371,10 +2380,13 @@
                      "maskmovdqu\t{$mask, $src|$src, $mask}",
                      [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
 
+} // ExeDomain = SSEPackedInt
+
 // Non-temporal stores
 def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                         "movntpd\t{$src, $dst|$dst, $src}",
                         [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+let ExeDomain = SSEPackedInt in
 def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                         "movntdq\t{$src, $dst|$dst, $src}",
                         [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
@@ -2388,6 +2400,7 @@
                     "movntpd\t{$src, $dst|$dst, $src}",
                     [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
 
+let ExeDomain = SSEPackedInt in
 def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntdq\t{$src, $dst|$dst, $src}",
                     [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -2416,7 +2429,7 @@
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-ones value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1 in
+    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
   // FIXME: Change encoding to pseudo.
   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>;
@@ -3018,14 +3031,14 @@
 let AddedComplexity = 15 in {
 // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
-          (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+          (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-          (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+          (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-          (MOVSSrr (v4f32 (V_SET0)),
+          (MOVSSrr (v4f32 (V_SET0PS)),
                    (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-          (MOVSSrr (v4i32 (V_SET0)),
+          (MOVSSrr (v4i32 (V_SET0PI)),
                    (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
 }
 

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.cpp Tue Mar 30 19:49:27 2010
@@ -17,6 +17,7 @@
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -169,6 +170,15 @@
   return true;  // -print-machineinstr should print after this.
 }
 
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+    PM.add(createSSEDomainFixPass());
+    return true;
+  }
+  return false;
+}
+
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel,
                                       JITCodeEmitter &JCE) {

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86TargetMachine.h Tue Mar 30 19:49:27 2010
@@ -66,6 +66,7 @@
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                               JITCodeEmitter &JCE);
 };

Propchange: llvm/branches/Apple/Morbo/lib/Transforms/IPO/FunctionAttrs.cpp
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 30 19:49:27 2010
@@ -1 +1 @@
-/llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp:99492,99539,99699,99836,99846,99850,99910,99957
+/llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp:99492,99524,99539-99540,99699,99836,99846,99848,99850,99855,99910,99916,99952-99954,99957,99959,99974-99975

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-02-05-CoalescerBug.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss %t | count 2
+; RUN: grep movaps %t | count 2
+; RUN: grep movdqa %t | count 2
 
 define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
 newFuncRoot:

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/dagcombine-buildvector.ll Tue Mar 30 19:49:27 2010
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
-; RUN: grep unpcklpd %t | count 1
-; RUN: grep movapd %t | count 1
-; RUN: grep movaps %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
 
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
 
+; CHECK: test:
+; CHECK: unpcklpd
+; CHECK: movapd
 define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
 entry:
         %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
@@ -13,6 +13,8 @@
         ret void
 }
 
+; CHECK: test2:
+; CHECK: movdqa
 define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
 entry:
         %tmp1 = load <4 x i16>* %src

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/gather-addresses.ll Tue Mar 30 19:49:27 2010
@@ -4,7 +4,7 @@
 ; bounce the vector off of cache rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK: pand     (%rdx), %xmm0
+; CHECK: andps    (%rdx), %xmm0
 ; CHECK: movaps   %xmm0, -24(%rsp)
 ; CHECK: movslq   -24(%rsp), %rax
 ; CHECK: movsd    (%rdi,%rax,8), %xmm0

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-12.ll Tue Mar 30 19:49:27 2010
@@ -1,10 +1,8 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep unpck %t | count 2
-; RUN: grep shuf %t | count 2
-; RUN: grep ps %t | count 4
-; RUN: grep pd %t | count 4
-; RUN: grep movup %t | count 4
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
+; CHECK: a:
+; CHECK: movdqu
+; CHECK: pshufd
 define <4 x float> @a(<4 x float>* %y) nounwind {
   %x = load <4 x float>* %y, align 4
   %a = extractelement <4 x float> %x, i32 0
@@ -17,6 +15,10 @@
   %s = insertelement <4 x float> %r, float %a, i32 3
   ret <4 x float> %s
 }
+
+; CHECK: b:
+; CHECK: movups
+; CHECK: unpckhps
 define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
   %x = load <4 x float>* %y, align 4
   %a = extractelement <4 x float> %x, i32 2
@@ -29,6 +31,10 @@
   %s = insertelement <4 x float> %r, float %b, i32 3
   ret <4 x float> %s
 }
+
+; CHECK: c:
+; CHECK: movupd
+; CHECK: shufpd
 define <2 x double> @c(<2 x double>* %y) nounwind {
   %x = load <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 0
@@ -37,6 +43,10 @@
   %r = insertelement <2 x double> %p, double %a, i32 1
   ret <2 x double> %r
 }
+
+; CHECK: d:
+; CHECK: movupd
+; CHECK: unpckhpd
 define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
   %x = load <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 1

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/sse-align-6.ll Tue Mar 30 19:49:27 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep movups | count 1
+; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   %t = load <2 x i64>* %p, align 8

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/sse3.ll Tue Mar 30 19:49:27 2010
@@ -20,7 +20,7 @@
 ; X64:  pshuflw	$0, %xmm0, %xmm0
 ; X64:	xorl	%eax, %eax
 ; X64:	pinsrw	$0, %eax, %xmm0
-; X64:	movaps	%xmm0, (%rdi)
+; X64:	movdqa	%xmm0, (%rdi)
 ; X64:	ret
 }
 
@@ -32,7 +32,7 @@
         
 ; X64: t1:
 ; X64: 	movl	(%rsi), %eax
-; X64: 	movaps	(%rdi), %xmm0
+; X64: 	movdqa	(%rdi), %xmm0
 ; X64: 	pinsrw	$0, %eax, %xmm0
 ; X64: 	ret
 }
@@ -66,7 +66,7 @@
 ; X64: 	pshufhw	$100, %xmm0, %xmm2
 ; X64: 	pinsrw	$1, %eax, %xmm2
 ; X64: 	pextrw	$1, %xmm0, %eax
-; X64: 	movaps	%xmm2, %xmm0
+; X64: 	movdqa	%xmm2, %xmm0
 ; X64: 	pinsrw	$4, %eax, %xmm0
 ; X64: 	ret
 }
@@ -122,7 +122,7 @@
 ; X64: 	t8:
 ; X64: 		pshuflw	$-58, (%rsi), %xmm0
 ; X64: 		pshufhw	$-58, %xmm0, %xmm0
-; X64: 		movaps	%xmm0, (%rdi)
+; X64: 		movdqa	%xmm0, (%rdi)
 ; X64: 		ret
 }
 

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_compare.ll Tue Mar 30 19:49:27 2010
@@ -15,7 +15,7 @@
 ; CHECK: test2:
 ; CHECK: pcmp
 ; CHECK: pcmp
-; CHECK: xorps
+; CHECK: pxor
 ; CHECK: ret
 	%C = icmp sge <4 x i32> %A, %B
         %D = sext <4 x i1> %C to <4 x i32>
@@ -25,7 +25,7 @@
 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK: test3:
 ; CHECK: pcmpgtd
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: ret
 	%C = icmp slt <4 x i32> %A, %B
         %D = sext <4 x i1> %C to <4 x i32>
@@ -34,7 +34,7 @@
 
 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK: test4:
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: pcmpgtd
 ; CHECK: ret
 	%C = icmp ugt <4 x i32> %A, %B

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_return.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep xorps %t | count 1
+; RUN: grep pxor %t | count 1
 ; RUN: grep movaps %t | count 1
 ; RUN: not grep shuf %t
 

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-7.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep xorps %t | count 1
+; RUN: grep pxor %t | count 1
 ; RUN: not grep shufps %t
 
 define void @test() {

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_shuffle-9.ll Tue Mar 30 19:49:27 2010
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 define <4 x i32> @test(i8** %ptr) {
-; CHECK: xorps
+; CHECK: pxor
 ; CHECK: punpcklbw
 ; CHECK: punpcklwd
 

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; CHECK: xorps
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
         %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
@@ -7,6 +8,7 @@
         ret void
 }
 
+; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
         %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/vec_zero_cse.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
 ; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
 
 @M1 = external global <1 x i64>

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_arith-5.ll Tue Mar 30 19:49:27 2010
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx  | FileCheck %s
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: pmulld
 ; CHECK: psubd
 

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_cast-2.ll Tue Mar 30 19:49:27 2010
@@ -2,7 +2,7 @@
 ; CHECK: pextrd
 ; CHECK: pextrd
 ; CHECK: movd
-; CHECK: movaps
+; CHECK: movdqa
 
 
 ; bitcast v14i16 to v7i32

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/widen_load-2.ll Tue Mar 30 19:49:27 2010
@@ -5,7 +5,7 @@
 
 %i32vec3 = type <3 x i32>
 define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: movq
@@ -33,13 +33,13 @@
 
 %i32vec7 = type <7 x i32>
 define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
 ; CHECK: paddd
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: movq
-; CHECK: movaps
+; CHECK: movdqa
 	%a = load %i32vec7* %ap, align 16
 	%b = load %i32vec7* %bp, align 16
 	%x = add %i32vec7 %a, %b
@@ -49,15 +49,15 @@
 
 %i32vec12 = type <12 x i32>
 define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
 ; CHECK: paddd
 ; CHECK: paddd
 ; CHECK: paddd
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
 	%a = load %i32vec12* %ap, align 16
 	%b = load %i32vec12* %bp, align 16
 	%x = add %i32vec12 %a, %b
@@ -68,7 +68,7 @@
 
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: paddw
 ; CHECK: movd
 ; CHECK: pextrw
@@ -81,7 +81,7 @@
 
 %i16vec4 = type <4 x i16>
 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: paddw
 ; CHECK: movq
 	%a = load %i16vec4* %ap, align 16
@@ -93,12 +93,12 @@
 
 %i16vec12 = type <12 x i16>
 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
 ; CHECK: paddw
 ; CHECK: paddw
 ; CHECK: movq
-; CHECK: movaps
+; CHECK: movdqa
 	%a = load %i16vec12* %ap, align 16
 	%b = load %i16vec12* %bp, align 16
 	%x = add %i16vec12 %a, %b
@@ -108,15 +108,15 @@
 
 %i16vec18 = type <18 x i16>
 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
 ; CHECK: paddw
 ; CHECK: paddw
 ; CHECK: paddw
 ; CHECK: movd
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
 	%a = load %i16vec18* %ap, align 16
 	%b = load %i16vec18* %bp, align 16
 	%x = add %i16vec18 %a, %b
@@ -127,7 +127,7 @@
 
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: movaps
+; CHECK: movdqa
 ; CHECK: paddb
 ; CHECK: pextrb
 ; CHECK: movb
@@ -140,8 +140,8 @@
 
 %i8vec31 = type <31 x i8>
 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
-; CHECK: movaps
-; CHECK: movaps
+; CHECK: movdqa
+; CHECK: movdqa
 ; CHECK: paddb
 ; CHECK: paddb
 ; CHECK: movq

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll?rev=99976&r1=99975&r2=99976&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/xor.ll Tue Mar 30 19:49:27 2010
@@ -7,7 +7,7 @@
 	ret <4 x i32> %tmp
         
 ; X32: test1:
-; X32:	xorps	%xmm0, %xmm0
+; X32:	pxor	%xmm0, %xmm0
 ; X32:	ret
 }
 





More information about the llvm-branch-commits mailing list