[llvm-branch-commits] [llvm-branch] r107465 [2/2] - in /llvm/branches/wendling/eh: ./ autoconf/ bindings/ada/llvm/ cmake/ docs/ docs/tutorial/ examples/Kaleidoscope/Chapter3/ examples/Kaleidoscope/Chapter5/ examples/Kaleidoscope/Chapter6/ examples/Kaleidoscope/Chapter7/ include/llvm-c/ include/llvm/ include/llvm/ADT/ include/llvm/Analysis/ include/llvm/CodeGen/ include/llvm/Config/ include/llvm/MC/ include/llvm/MC/MCParser/ include/llvm/Support/ include/llvm/System/ include/llvm/Target/ include/llvm/Transforms/ include/llvm/T...

Bill Wendling isanbard at gmail.com
Fri Jul 2 02:57:15 PDT 2010


Modified: llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp Fri Jul  2 04:57:13 2010
@@ -138,7 +138,7 @@
         // MOVPC32r is basically a call plus a pop instruction.
         if (Desc.getOpcode() == X86::MOVPC32r)
           emitInstruction(*I, &II->get(X86::POP32r));
-        NumEmitted++;  // Keep track of the # of mi's emitted
+        ++NumEmitted;  // Keep track of the # of mi's emitted
       }
     }
   } while (MCE.finishFunction(MF));

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp Fri Jul  2 04:57:13 2010
@@ -342,6 +342,12 @@
   const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
   if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Don't walk into other basic blocks; it's possible we haven't
+    // visited them yet, so the instructions may not yet be assigned
+    // virtual registers.
+    if (MBBMap[I->getParent()] != MBB)
+      return false;
+
     Opcode = I->getOpcode();
     U = I;
   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
@@ -351,7 +357,8 @@
 
   if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
     if (Ty->getAddressSpace() > 255)
-      // Fast instruction selection doesn't support pointers through %fs or %gs
+      // Fast instruction selection doesn't support the special
+      // address spaces.
       return false;
 
   switch (Opcode) {
@@ -416,20 +423,33 @@
         Disp += SL->getElementOffset(Idx);
       } else {
         uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
-        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
-          // Constant-offset addressing.
-          Disp += CI->getSExtValue() * S;
-        } else if (IndexReg == 0 &&
-                   (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
-                   (S == 1 || S == 2 || S == 4 || S == 8)) {
-          // Scaled-index addressing.
-          Scale = S;
-          IndexReg = getRegForGEPIndex(Op).first;
-          if (IndexReg == 0)
-            return false;
-        } else
-          // Unsupported.
-          goto unsupported_gep;
+        SmallVector<const Value *, 4> Worklist;
+        Worklist.push_back(Op);
+        do {
+          Op = Worklist.pop_back_val();
+          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+            // Constant-offset addressing.
+            Disp += CI->getSExtValue() * S;
+          } else if (isa<AddOperator>(Op) &&
+                     isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+            // An add with a constant operand. Fold the constant.
+            ConstantInt *CI =
+              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+            Disp += CI->getSExtValue() * S;
+            // Add the other operand back to the work list.
+            Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+          } else if (IndexReg == 0 &&
+                     (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+                     (S == 1 || S == 2 || S == 4 || S == 8)) {
+            // Scaled-index addressing.
+            Scale = S;
+            IndexReg = getRegForGEPIndex(Op).first;
+            if (IndexReg == 0)
+              return false;
+          } else
+            // Unsupported.
+            goto unsupported_gep;
+        } while (!Worklist.empty());
       }
     }
     // Check for displacement overflow.
@@ -915,7 +935,7 @@
       if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
           CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
         const MachineInstr *SetMI = 0;
-        unsigned Reg = lookUpRegForValue(EI);
+        unsigned Reg = getRegForValue(EI);
 
         for (MachineBasicBlock::const_reverse_iterator
                RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
@@ -1179,8 +1199,8 @@
     // Emit code inline code to store the stack guard onto the stack.
     EVT PtrTy = TLI.getPointerTy();
 
-    const Value *Op1 = I.getOperand(1); // The guard's value.
-    const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+    const Value *Op1 = I.getArgOperand(0); // The guard's value.
+    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
 
     // Grab the frame index.
     X86AddressMode AM;
@@ -1191,7 +1211,7 @@
     return true;
   }
   case Intrinsic::objectsize: {
-    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
     const Type *Ty = I.getCalledFunction()->getReturnType();
     
     assert(CI && "Non-constant type in Intrinsic::objectsize?");
@@ -1246,8 +1266,8 @@
     if (!isTypeLegal(RetTy, VT))
       return false;
 
-    const Value *Op1 = I.getOperand(1);
-    const Value *Op2 = I.getOperand(2);
+    const Value *Op1 = I.getArgOperand(0);
+    const Value *Op2 = I.getArgOperand(1);
     unsigned Reg1 = getRegForValue(Op1);
     unsigned Reg2 = getRegForValue(Op2);
 
@@ -1290,7 +1310,7 @@
 
 bool X86FastISel::X86SelectCall(const Instruction *I) {
   const CallInst *CI = cast<CallInst>(I);
-  const Value *Callee = I->getOperand(0);
+  const Value *Callee = CI->getCalledValue();
 
   // Can't handle inline asm yet.
   if (isa<InlineAsm>(Callee))
@@ -1548,6 +1568,7 @@
   BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
 
   // Now handle call return value (if any).
+  SmallVector<unsigned, 4> UsedRegs;
   if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
     SmallVector<CCValAssign, 16> RVLocs;
     CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
@@ -1575,6 +1596,8 @@
                                     RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
     assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
     Emitted = true;
+    UsedRegs.push_back(RVLocs[0].getLocReg());
+
     if (CopyVT != RVLocs[0].getValVT()) {
       // Round the F80 the right size, which also moves to the appropriate xmm
       // register. This is accomplished by storing the F80 value in memory and
@@ -1602,6 +1625,9 @@
     UpdateValueMap(I, ResultReg);
   }
 
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
   return true;
 }
 

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp Fri Jul  2 04:57:13 2010
@@ -133,7 +133,7 @@
 
       // Emit an fxch to update the runtime processors version of the state.
       BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
-      NumFXCH++;
+      ++NumFXCH;
     }
 
     void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
@@ -1021,7 +1021,7 @@
     // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
     if (StackTop == 1) {
       BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
-      NumFXCH++;
+      ++NumFXCH;
       StackTop = 0;
       break;
     }
@@ -1058,7 +1058,7 @@
       // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
       if (StackTop == 1) {
         BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
-        NumFXCH++;
+        ++NumFXCH;
         StackTop = 0;
         break;
       }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Jul  2 04:57:13 2010
@@ -1646,6 +1646,26 @@
         SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
     }
 
+    // Prevent use of AH in a REX instruction by referencing AX instead.
+    if (HiReg == X86::AH && Subtarget->is64Bit() &&
+        !SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::AX, MVT::i16, InFlag);
+      InFlag = Result.getValue(2);
+      // Get the low part if needed. Don't use getCopyFromReg for aliasing
+      // registers.
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 1),
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+      // Shift AX down 8 bits.
+      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                              Result,
+                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
+      // Then truncate it down to i8.
+      ReplaceUses(SDValue(Node, 1),
+        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    }
     // Copy the low half of the result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1656,24 +1676,9 @@
     }
     // Copy the high half of the result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
-      SDValue Result;
-      if (HiReg == X86::AH && Subtarget->is64Bit()) {
-        // Prevent use of AH in a REX instruction by referencing AX instead.
-        // Shift it down 8 bits.
-        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                        X86::AX, MVT::i16, InFlag);
-        InFlag = Result.getValue(2);
-        Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
-                                                Result,
-                                   CurDAG->getTargetConstant(8, MVT::i8)), 0);
-        // Then truncate it down to i8.
-        Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
-                                                MVT::i8, Result);
-      } else {
-        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                        HiReg, NVT, InFlag);
-        InFlag = Result.getValue(2);
-      }
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              HiReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
       ReplaceUses(SDValue(Node, 1), Result);
       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
@@ -1786,6 +1791,29 @@
         SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
     }
 
+    // Prevent use of AH in a REX instruction by referencing AX instead.
+    // Shift it down 8 bits.
+    if (HiReg == X86::AH && Subtarget->is64Bit() &&
+        !SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::AX, MVT::i16, InFlag);
+      InFlag = Result.getValue(2);
+
+      // If we also need AL (the quotient), get it by extracting a subreg from
+      // Result. The fast register allocator does not like multiple CopyFromReg
+      // nodes using aliasing registers.
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 0),
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+      // Shift AX right by 8 bits instead of using AH.
+      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                         Result,
+                                         CurDAG->getTargetConstant(8, MVT::i8)),
+                       0);
+      ReplaceUses(SDValue(Node, 1),
+        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    }
     // Copy the division (low) result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1796,25 +1824,9 @@
     }
     // Copy the remainder (high) result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
-      SDValue Result;
-      if (HiReg == X86::AH && Subtarget->is64Bit()) {
-        // Prevent use of AH in a REX instruction by referencing AX instead.
-        // Shift it down 8 bits.
-        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                        X86::AX, MVT::i16, InFlag);
-        InFlag = Result.getValue(2);
-        Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
-                                      Result,
-                                      CurDAG->getTargetConstant(8, MVT::i8)),
-                         0);
-        // Then truncate it down to i8.
-        Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
-                                                MVT::i8, Result);
-      } else {
-        Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                        HiReg, NVT, InFlag);
-        InFlag = Result.getValue(2);
-      }
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              HiReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
       ReplaceUses(SDValue(Node, 1), Result);
       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp Fri Jul  2 04:57:13 2010
@@ -347,6 +347,12 @@
 
   if (!Subtarget->hasSSE2())
     setOperationAction(ISD::MEMBARRIER    , MVT::Other, Expand);
+  // On X86 and X86-64, atomic operations are lowered to locked instructions.
+  // Locked instructions, in turn, have implicit fence semantics (all memory
+  // operations are flushed before issuing the locked instruction, and they
+  // are not buffered), so we can fold away the common pattern of
+  // fence-atomic-fence.
+  setShouldFoldAtomicFences(true);
 
   // Expand certain atomics
   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
@@ -1012,7 +1018,6 @@
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::STORE);
-  setTargetDAGCombine(ISD::MEMBARRIER);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   if (Subtarget->is64Bit())
     setTargetDAGCombine(ISD::MUL);
@@ -2056,7 +2061,6 @@
                                      FPDiff, dl);
   }
 
-  bool WasGlobalOrExternal = false;
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
     // In the 64-bit large code model, we have to make all calls
@@ -2064,7 +2068,6 @@
     // pc-relative offset may not be large enough to hold the whole
     // address.
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    WasGlobalOrExternal = true;
     // If the callee is a GlobalAddress node (quite common, every direct call
     // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
     // it.
@@ -2096,7 +2099,6 @@
                                           G->getOffset(), OpFlags);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    WasGlobalOrExternal = true;
     unsigned char OpFlags = 0;
 
     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -2419,7 +2421,6 @@
         ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
-        EVT RegVT = VA.getLocVT();
         SDValue Arg = Outs[i].Val;
         ISD::ArgFlagsTy Flags = Outs[i].Flags;
         if (VA.getLocInfo() == CCValAssign::Indirect)
@@ -4457,7 +4458,6 @@
   unsigned NumElems = VT.getVectorNumElements();
   unsigned NewWidth = (NumElems == 4) ? 2 : 4;
   EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
-  EVT MaskEltVT = MaskVT.getVectorElementType();
   EVT NewVT = MaskVT;
   switch (VT.getSimpleVT().SimpleTy) {
   default: assert(false && "Unexpected!");
@@ -6021,6 +6021,7 @@
   bool NeedCF = false;
   bool NeedOF = false;
   switch (X86CC) {
+  default: break;
   case X86::COND_A: case X86::COND_AE:
   case X86::COND_B: case X86::COND_BE:
     NeedCF = true;
@@ -6030,120 +6031,129 @@
   case X86::COND_O: case X86::COND_NO:
     NeedOF = true;
     break;
-  default: break;
   }
 
   // See if we can use the EFLAGS value from the operand instead of
   // doing a separate TEST. TEST always sets OF and CF to 0, so unless
   // we prove that the arithmetic won't overflow, we can't use OF or CF.
-  if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
-    unsigned Opcode = 0;
-    unsigned NumOperands = 0;
-    switch (Op.getNode()->getOpcode()) {
-    case ISD::ADD:
-      // Due to an isel shortcoming, be conservative if this add is
-      // likely to be selected as part of a load-modify-store
-      // instruction. When the root node in a match is a store, isel
-      // doesn't know how to remap non-chain non-flag uses of other
-      // nodes in the match, such as the ADD in this case. This leads
-      // to the ADD being left around and reselected, with the result
-      // being two adds in the output.  Alas, even if none our users
-      // are stores, that doesn't prove we're O.K.  Ergo, if we have
-      // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
-      // A better fix seems to require climbing the DAG back to the
-      // root, and it doesn't seem to be worth the effort.
-      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-             UE = Op.getNode()->use_end(); UI != UE; ++UI)
-        if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
-          goto default_case;
-      if (ConstantSDNode *C =
-            dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
-        // An add of one will be selected as an INC.
-        if (C->getAPIntValue() == 1) {
-          Opcode = X86ISD::INC;
-          NumOperands = 1;
-          break;
-        }
-        // An add of negative one (subtract of one) will be selected as a DEC.
-        if (C->getAPIntValue().isAllOnesValue()) {
-          Opcode = X86ISD::DEC;
-          NumOperands = 1;
-          break;
-        }
+  if (Op.getResNo() != 0 || NeedOF || NeedCF)
+    // Emit a CMP with 0, which is the TEST pattern.
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, Op.getValueType()));
+
+  unsigned Opcode = 0;
+  unsigned NumOperands = 0;
+  switch (Op.getNode()->getOpcode()) {
+  case ISD::ADD:
+    // Due to an isel shortcoming, be conservative if this add is likely to be
+    // selected as part of a load-modify-store instruction. When the root node
+    // in a match is a store, isel doesn't know how to remap non-chain non-flag
+    // uses of other nodes in the match, such as the ADD in this case. This
+    // leads to the ADD being left around and reselected, with the result being
+    // two adds in the output.  Alas, even if none our users are stores, that
+    // doesn't prove we're O.K.  Ergo, if we have any parents that aren't
+    // CopyToReg or SETCC, eschew INC/DEC.  A better fix seems to require
+    // climbing the DAG back to the root, and it doesn't seem to be worth the
+    // effort.
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI)
+      if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+        goto default_case;
+
+    if (ConstantSDNode *C =
+        dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+      // An add of one will be selected as an INC.
+      if (C->getAPIntValue() == 1) {
+        Opcode = X86ISD::INC;
+        NumOperands = 1;
+        break;
       }
-      // Otherwise use a regular EFLAGS-setting add.
-      Opcode = X86ISD::ADD;
-      NumOperands = 2;
-      break;
-    case ISD::AND: {
-      // If the primary and result isn't used, don't bother using X86ISD::AND,
-      // because a TEST instruction will be better.
-      bool NonFlagUse = false;
-      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-             UE = Op.getNode()->use_end(); UI != UE; ++UI) {
-        SDNode *User = *UI;
-        unsigned UOpNo = UI.getOperandNo();
-        if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
-          // Look pass truncate.
-          UOpNo = User->use_begin().getOperandNo();
-          User = *User->use_begin();
-        }
-        if (User->getOpcode() != ISD::BRCOND &&
-            User->getOpcode() != ISD::SETCC &&
-            (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
-          NonFlagUse = true;
-          break;
-        }
+
+      // An add of negative one (subtract of one) will be selected as a DEC.
+      if (C->getAPIntValue().isAllOnesValue()) {
+        Opcode = X86ISD::DEC;
+        NumOperands = 1;
+        break;
       }
-      if (!NonFlagUse)
+    }
+
+    // Otherwise use a regular EFLAGS-setting add.
+    Opcode = X86ISD::ADD;
+    NumOperands = 2;
+    break;
+  case ISD::AND: {
+    // If the primary and result isn't used, don't bother using X86ISD::AND,
+    // because a TEST instruction will be better.
+    bool NonFlagUse = false;
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+      SDNode *User = *UI;
+      unsigned UOpNo = UI.getOperandNo();
+      if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+        // Look pass truncate.
+        UOpNo = User->use_begin().getOperandNo();
+        User = *User->use_begin();
+      }
+
+      if (User->getOpcode() != ISD::BRCOND &&
+          User->getOpcode() != ISD::SETCC &&
+          (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+        NonFlagUse = true;
         break;
+      }
     }
+
+    if (!NonFlagUse)
+      break;
+  }
     // FALL THROUGH
-    case ISD::SUB:
-    case ISD::OR:
-    case ISD::XOR:
-      // Due to the ISEL shortcoming noted above, be conservative if this op is
-      // likely to be selected as part of a load-modify-store instruction.
-      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+  case ISD::SUB:
+  case ISD::OR:
+  case ISD::XOR:
+    // Due to the ISEL shortcoming noted above, be conservative if this op is
+    // likely to be selected as part of a load-modify-store instruction.
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
            UE = Op.getNode()->use_end(); UI != UE; ++UI)
-        if (UI->getOpcode() == ISD::STORE)
-          goto default_case;
-      // Otherwise use a regular EFLAGS-setting instruction.
-      switch (Op.getNode()->getOpcode()) {
-      case ISD::SUB: Opcode = X86ISD::SUB; break;
-      case ISD::OR:  Opcode = X86ISD::OR;  break;
-      case ISD::XOR: Opcode = X86ISD::XOR; break;
-      case ISD::AND: Opcode = X86ISD::AND; break;
-      default: llvm_unreachable("unexpected operator!");
-      }
-      NumOperands = 2;
-      break;
-    case X86ISD::ADD:
-    case X86ISD::SUB:
-    case X86ISD::INC:
-    case X86ISD::DEC:
-    case X86ISD::OR:
-    case X86ISD::XOR:
-    case X86ISD::AND:
-      return SDValue(Op.getNode(), 1);
-    default:
-    default_case:
-      break;
-    }
-    if (Opcode != 0) {
-      SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-      SmallVector<SDValue, 4> Ops;
-      for (unsigned i = 0; i != NumOperands; ++i)
-        Ops.push_back(Op.getOperand(i));
-      SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
-      DAG.ReplaceAllUsesWith(Op, New);
-      return SDValue(New.getNode(), 1);
+      if (UI->getOpcode() == ISD::STORE)
+        goto default_case;
+
+    // Otherwise use a regular EFLAGS-setting instruction.
+    switch (Op.getNode()->getOpcode()) {
+    default: llvm_unreachable("unexpected operator!");
+    case ISD::SUB: Opcode = X86ISD::SUB; break;
+    case ISD::OR:  Opcode = X86ISD::OR;  break;
+    case ISD::XOR: Opcode = X86ISD::XOR; break;
+    case ISD::AND: Opcode = X86ISD::AND; break;
     }
+
+    NumOperands = 2;
+    break;
+  case X86ISD::ADD:
+  case X86ISD::SUB:
+  case X86ISD::INC:
+  case X86ISD::DEC:
+  case X86ISD::OR:
+  case X86ISD::XOR:
+  case X86ISD::AND:
+    return SDValue(Op.getNode(), 1);
+  default:
+  default_case:
+    break;
   }
 
-  // Otherwise just emit a CMP with 0, which is the TEST pattern.
-  return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
-                     DAG.getConstant(0, Op.getValueType()));
+  if (Opcode == 0)
+    // Emit a CMP with 0, which is the TEST pattern.
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, Op.getValueType()));
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 0; i != NumOperands; ++i)
+    Ops.push_back(Op.getOperand(i));
+
+  SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+  DAG.ReplaceAllUsesWith(Op, New);
+  return SDValue(New.getNode(), 1);
 }
 
 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
@@ -6170,15 +6180,21 @@
     Op1 = Op1.getOperand(0);
 
   SDValue LHS, RHS;
-  if (Op1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
-      if (And10C->getZExtValue() == 1) {
-        LHS = Op0;
-        RHS = Op1.getOperand(1);
-      }
-  } else if (Op0.getOpcode() == ISD::SHL) {
+  if (Op1.getOpcode() == ISD::SHL)
+    std::swap(Op0, Op1);
+  if (Op0.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
       if (And00C->getZExtValue() == 1) {
+        // If we looked past a truncate, check that it's only truncating away
+        // known zeros.
+        unsigned BitWidth = Op0.getValueSizeInBits();
+        unsigned AndBitWidth = And.getValueSizeInBits();
+        if (BitWidth > AndBitWidth) {
+          APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+          DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+          if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+            return SDValue();
+        }
         LHS = Op1;
         RHS = Op0.getOperand(1);
       }
@@ -6618,6 +6634,7 @@
             SDNode *NewBR =
               DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
             assert(NewBR == User);
+            (void)NewBR;
             Dest = FalseBB;
 
             Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -6689,7 +6706,6 @@
 
   SDValue Flag;
 
-  EVT IntPtr = getPointerTy();
   EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
 
   Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
@@ -6769,9 +6785,6 @@
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
   assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
-  SDValue Chain = Op.getOperand(0);
-  SDValue SrcPtr = Op.getOperand(1);
-  SDValue SrcSV = Op.getOperand(2);
 
   report_fatal_error("VAArgInst is not yet implemented for x86-64!");
   return SDValue();
@@ -9840,61 +9853,6 @@
   return SDValue();
 }
 
-// On X86 and X86-64, atomic operations are lowered to locked instructions.
-// Locked instructions, in turn, have implicit fence semantics (all memory
-// operations are flushed before issuing the locked instruction, and the
-// are not buffered), so we can fold away the common pattern of
-// fence-atomic-fence.
-static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
-  SDValue atomic = N->getOperand(0);
-  switch (atomic.getOpcode()) {
-    case ISD::ATOMIC_CMP_SWAP:
-    case ISD::ATOMIC_SWAP:
-    case ISD::ATOMIC_LOAD_ADD:
-    case ISD::ATOMIC_LOAD_SUB:
-    case ISD::ATOMIC_LOAD_AND:
-    case ISD::ATOMIC_LOAD_OR:
-    case ISD::ATOMIC_LOAD_XOR:
-    case ISD::ATOMIC_LOAD_NAND:
-    case ISD::ATOMIC_LOAD_MIN:
-    case ISD::ATOMIC_LOAD_MAX:
-    case ISD::ATOMIC_LOAD_UMIN:
-    case ISD::ATOMIC_LOAD_UMAX:
-      break;
-    default:
-      return SDValue();
-  }
-
-  SDValue fence = atomic.getOperand(0);
-  if (fence.getOpcode() != ISD::MEMBARRIER)
-    return SDValue();
-
-  switch (atomic.getOpcode()) {
-    case ISD::ATOMIC_CMP_SWAP:
-      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
-                                    fence.getOperand(0),
-                                    atomic.getOperand(1), atomic.getOperand(2),
-                                    atomic.getOperand(3)), atomic.getResNo());
-    case ISD::ATOMIC_SWAP:
-    case ISD::ATOMIC_LOAD_ADD:
-    case ISD::ATOMIC_LOAD_SUB:
-    case ISD::ATOMIC_LOAD_AND:
-    case ISD::ATOMIC_LOAD_OR:
-    case ISD::ATOMIC_LOAD_XOR:
-    case ISD::ATOMIC_LOAD_NAND:
-    case ISD::ATOMIC_LOAD_MIN:
-    case ISD::ATOMIC_LOAD_MAX:
-    case ISD::ATOMIC_LOAD_UMIN:
-    case ISD::ATOMIC_LOAD_UMAX:
-      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
-                                    fence.getOperand(0),
-                                    atomic.getOperand(1), atomic.getOperand(2)),
-                     atomic.getResNo());
-    default:
-      return SDValue();
-  }
-}
-
 static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
   // (i32 zext (and (i8  x86isd::setcc_carry), 1)) ->
   //           (and (i32 x86isd::setcc_carry), 1)
@@ -9942,7 +9900,6 @@
   case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
-  case ISD::MEMBARRIER:     return PerformMEMBARRIERCombine(N, DAG);
   case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
   }
 
@@ -10065,8 +10022,8 @@
   // so don't worry about this.
 
   // Verify this is a simple bswap.
-  if (CI->getNumOperands() != 2 ||
-      CI->getType() != CI->getOperand(1)->getType() ||
+  if (CI->getNumArgOperands() != 1 ||
+      CI->getType() != CI->getArgOperand(0)->getType() ||
       !CI->getType()->isIntegerTy())
     return false;
 
@@ -10079,7 +10036,7 @@
   Module *M = CI->getParent()->getParent()->getParent();
   Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
 
-  Value *Op = CI->getOperand(1);
+  Value *Op = CI->getArgOperand(0);
   Op = CallInst::Create(Int, Op, CI->getName(), CI);
 
   CI->replaceAllUsesWith(Op);
@@ -10212,7 +10169,6 @@
 /// vector.  If it is invalid, don't add anything to Ops.
 void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                      char Constraint,
-                                                     bool hasMemory,
                                                      std::vector<SDValue>&Ops,
                                                      SelectionDAG &DAG) const {
   SDValue Result(0, 0);
@@ -10286,6 +10242,13 @@
       break;
     }
 
+    // In any sort of PIC mode addresses need to be computed at runtime by
+    // adding in a register or some sort of table lookup.  These can't
+    // be used as immediates.
+    if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC() ||
+        Subtarget->isPICStyleRIPRel())
+      return;
+
     // If we are in non-pic codegen mode, we allow the address of a global (with
     // an optional displacement) to be used with 'i'.
     GlobalAddressSDNode *GA = 0;
@@ -10321,11 +10284,7 @@
                                                         getTargetMachine())))
       return;
 
-    if (hasMemory)
-      Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
-    else
-      Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
-    Result = Op;
+    Result = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
     break;
   }
   }
@@ -10334,8 +10293,7 @@
     Ops.push_back(Result);
     return;
   }
-  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
-                                                      Ops, DAG);
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
 std::vector<unsigned> X86TargetLowering::

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h Fri Jul  2 04:57:13 2010
@@ -500,7 +500,6 @@
     /// being processed is 'm'.
     virtual void LowerAsmOperandForConstraint(SDValue Op,
                                               char ConstraintLetter,
-                                              bool hasMemory,
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
     

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td Fri Jul  2 04:57:13 2010
@@ -1093,7 +1093,7 @@
 //  Logical Instructions...
 //
 
-let isTwoAddress = 1 , AddedComplexity = 15 in
+let Constraints = "$src = $dst" , AddedComplexity = 15 in
 def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
                 [(set GR64:$dst, (not GR64:$src))]>;
 def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
@@ -1103,7 +1103,7 @@
 def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
                      "and{q}\t{$src, %rax|%rax, $src}", []>;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
 let isCommutable = 1 in
 def AND64rr  : RI<0x21, MRMDestReg, 
                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1130,7 +1130,7 @@
                        "and{q}\t{$src2, $dst|$dst, $src2}",
                        [(set GR64:$dst, EFLAGS,
                              (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
 
 def AND64mr  : RI<0x21, MRMDestMem,
                   (outs), (ins i64mem:$dst, GR64:$src),
@@ -1148,7 +1148,7 @@
              [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
               (implicit EFLAGS)]>;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
 let isCommutable = 1 in
 def OR64rr   : RI<0x09, MRMDestReg, (outs GR64:$dst), 
                   (ins GR64:$src1, GR64:$src2),
@@ -1175,7 +1175,7 @@
                      "or{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
                         (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
 
 def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                 "or{q}\t{$src, $dst|$dst, $src}",
@@ -1193,7 +1193,7 @@
 def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
                     "or{q}\t{$src, %rax|%rax, $src}", []>;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
 let isCommutable = 1 in
 def XOR64rr  : RI<0x31, MRMDestReg,  (outs GR64:$dst), 
                   (ins GR64:$src1, GR64:$src2), 
@@ -1220,7 +1220,7 @@
                       "xor{q}\t{$src2, $dst|$dst, $src2}",
                       [(set GR64:$dst, EFLAGS,
                             (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
 
 def XOR64mr  : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                   "xor{q}\t{$src, $dst|$dst, $src}",
@@ -1362,7 +1362,7 @@
 } // Defs = [EFLAGS]
 
 // Conditional moves
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
 let isCommutable = 1 in {
 def CMOVB64rr : RI<0x42, MRMSrcReg,       // if <u, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1526,7 +1526,7 @@
                    "cmovno{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                      X86_COND_NO, EFLAGS))]>, TB;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
 
 // Use sbb to materialize carry flag into a GPR.
 // FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
@@ -1584,7 +1584,7 @@
                        "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
 def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
                            (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
                            "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
@@ -1597,7 +1597,7 @@
                            [(set VR128:$dst,
                              (int_x86_sse2_cvtsi642sd VR128:$src1,
                               (loadi64 addr:$src2)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
 
 // Signed i64 -> f32
 def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
@@ -1607,7 +1607,7 @@
                        "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
                        [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
   def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
                               (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
                               "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
@@ -1621,7 +1621,7 @@
                               [(set VR128:$dst,
                                 (int_x86_sse_cvtsi642ss VR128:$src1,
                                  (loadi64 addr:$src2)))]>;
-}
+} // Constraints = "$src1 = $dst"
 
 // f32 -> signed i64
 def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
@@ -1687,6 +1687,7 @@
 // Thread Local Storage Instructions
 //===----------------------------------------------------------------------===//
 
+// ELF TLS Support
 // All calls clobber the non-callee saved registers. RSP is marked as
 // a use to prevent stack-pointer assignments that appear immediately
 // before calls from potentially appearing dead.
@@ -1705,16 +1706,14 @@
                   [(X86tlsaddr tls64addr:$sym)]>,
                   Requires<[In64BitMode]>;
 
-// FIXME: Not true for darwin
-let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [RSP],
+// Darwin TLS Support
+// For x86_64, the address of the thunk is passed in %rdi, on return 
+// the address of the variable is in %rax.  All other registers are preserved.
+let Defs = [RAX],
+    Uses = [RDI],
     usesCustomInserter = 1 in
 def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
-                  "# Fixme into a call",
+                  "# TLSCall_64",
                   [(X86TLSCall addr:$sym)]>,
                   Requires<[In64BitMode]>;
 
@@ -1891,6 +1890,8 @@
           (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+          (MOV64ri tglobaltlsaddr :$dst)>, Requires<[FarData]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
@@ -1905,6 +1906,8 @@
           (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+          (MOV64ri64i32 tglobaltlsaddr :$dst)>, Requires<[SmallCode]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
@@ -1919,6 +1922,8 @@
           (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+          (MOV64ri32 tglobaltlsaddr :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
@@ -1936,6 +1941,9 @@
 def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
           Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaltlsaddr:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tglobaltlsaddr:$src)>,
+          Requires<[NearData, IsStatic]>;
 def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, texternalsym:$src)>,
           Requires<[NearData, IsStatic]>;
@@ -2363,7 +2371,7 @@
 
 defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
 
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
   multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
@@ -2380,6 +2388,6 @@
                      (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
                                        imm:$src3)))]>, OpSize, REX_W;
   }
-}
+} // Constraints = "$src1 = $dst"
 
 defm PINSRQ      : SS41I_insert64<0x22, "pinsrq">;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td Fri Jul  2 04:57:13 2010
@@ -371,7 +371,7 @@
                                         Requires<[HasCMov]>;
 }
 
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
 defm CMOVB  : FPCMov<X86_COND_B>;
 defm CMOVBE : FPCMov<X86_COND_BE>;
 defm CMOVE  : FPCMov<X86_COND_E>;
@@ -380,7 +380,7 @@
 defm CMOVNBE: FPCMov<X86_COND_A>;
 defm CMOVNE : FPCMov<X86_COND_NE>;
 defm CMOVNP : FPCMov<X86_COND_NP>;
-}
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
 
 let Predicates = [HasCMov] in {
 // These are not factored because there's no clean way to pass DA/DB.

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td Fri Jul  2 04:57:13 2010
@@ -83,7 +83,6 @@
 class OpSize { bit hasOpSizePrefix = 1; }
 class AdSize { bit hasAdSizePrefix = 1; }
 class REX_W  { bit hasREX_WPrefix = 1; }
-class VEX_4V { bit hasVEX_4VPrefix = 1; }
 class LOCK   { bit hasLockPrefix = 1; }
 class SegFS  { bits<2> SegOvrBits = 1; }
 class SegGS  { bits<2> SegOvrBits = 2; }
@@ -102,6 +101,9 @@
 class T8     { bits<4> Prefix = 13; }
 class TA     { bits<4> Prefix = 14; }
 class TF     { bits<4> Prefix = 15; }
+class VEX    { bit hasVEXPrefix = 1; }
+class VEX_W  { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr, Domain d = GenericDomain>
@@ -125,11 +127,13 @@
 
   bits<4> Prefix = 0;       // Which prefix byte does this inst have?
   bit hasREX_WPrefix  = 0;  // Does this inst requires the REX.W prefix?
-  bit hasVEX_4VPrefix  = 0;  // Does this inst requires the VEX.VVVV prefix?
   FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
   bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
   bits<2> SegOvrBits = 0;   // Segment override prefix.
   Domain ExeDomain = d;
+  bit hasVEXPrefix = 0;     // Does this inst requires a VEX prefix?
+  bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
+  bit hasVEX_4VPrefix = 0;  // Does this inst requires the VEX.VVVV field?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -143,7 +147,9 @@
   let TSFlags{21-20} = SegOvrBits;
   let TSFlags{23-22} = ExeDomain.Value;
   let TSFlags{31-24} = Opcode;
-  let TSFlags{32}    = hasVEX_4VPrefix;
+  let TSFlags{32}    = hasVEXPrefix;
+  let TSFlags{33}    = hasVEX_WPrefix;
+  let TSFlags{34}    = hasVEX_4VPrefix;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -217,9 +223,45 @@
 // SI - SSE 1 & 2 scalar instructions
 class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern> {
-  let Predicates = !if(hasVEX_4VPrefix /* VEX_4V */,
+  let Predicates = !if(hasVEXPrefix /* VEX_4V */,
             !if(!eq(Prefix, 11 /* XD */), [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
-            !if(!eq(Prefix, 12 /* XS */), [HasSSE2], [HasSSE1]));
+            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(hasVEXPrefix /* VEX_4V */,
+            !if(!eq(Prefix, 11 /* XD */), [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
+            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+         Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(hasVEXPrefix /* VEX_4V */,
+        !if(hasOpSizePrefix /* OpSize */, [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
+        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(hasVEX_4VPrefix /* VEX_4V */,
+        !if(hasOpSizePrefix /* OpSize */, [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
+        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
 
   // AVX instructions have a 'v' prefix in the mnemonic
   let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
@@ -247,12 +289,12 @@
         Requires<[HasSSE1]>;
 class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, VEX_4V,
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
         Requires<[HasAVX, HasSSE1]>;
 class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
-        VEX_4V, Requires<[HasAVX, HasSSE1]>;
+        Requires<[HasAVX, HasSSE1]>;
 
 // SSE2 Instruction Templates:
 // 
@@ -281,12 +323,12 @@
         Requires<[HasSSE2]>;
 class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, VEX_4V,
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
         Requires<[HasAVX, HasSSE2]>;
 class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
-        VEX_4V, OpSize, Requires<[HasAVX, HasSSE2]>;
+        OpSize, Requires<[HasAVX, HasSSE2]>;
 
 // SSE3 Instruction Templates:
 // 

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp Fri Jul  2 04:57:13 2010
@@ -3007,16 +3007,16 @@
 
   EVT VT = Load1->getValueType(0);
   switch (VT.getSimpleVT().SimpleTy) {
-  default: {
+  default:
     // XMM registers. In 64-bit mode we can be a bit more aggressive since we
     // have 16 of them to play with.
     if (TM.getSubtargetImpl()->is64Bit()) {
       if (NumLoads >= 3)
         return false;
-    } else if (NumLoads)
+    } else if (NumLoads) {
       return false;
+    }
     break;
-  }
   case MVT::i8:
   case MVT::i16:
   case MVT::i32:
@@ -3025,6 +3025,7 @@
   case MVT::f64:
     if (NumLoads)
       return false;
+    break;
   }
 
   return true;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h Fri Jul  2 04:57:13 2010
@@ -424,13 +424,14 @@
   // those enums below are used, TSFlags must be shifted right by 32 first.
   enum {
     //===------------------------------------------------------------------===//
-    // VEX_4V - VEX prefixes are instruction prefixes used in AVX.
+    // VEXPrefix - VEX prefixes are instruction prefixes used in AVX.
     // VEX_4V is used to specify an additional AVX/SSE register. Several 2
     // address instructions in SSE are represented as 3 address ones in AVX
     // and the additional register is encoded in VEX_VVVV prefix.
     //
-    VEXShift    = 0,
-    VEX_4V      = 1 << VEXShift
+    VEX         = 1,
+    VEX_W       = 1 << 1,
+    VEX_4V      = 1 << 2
   };
 
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td Fri Jul  2 04:57:13 2010
@@ -820,7 +820,18 @@
                Requires<[In32BitMode]>;
 }
 
-let isTwoAddress = 1 in                               // GR32 = bswap GR32
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+    mayLoad=1, neverHasSideEffects=1 in {
+def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+               Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+    mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+               Requires<[In32BitMode]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src = $dst" in     // GR32 = bswap GR32
   def BSWAP32r : I<0xC8, AddRegFrm,
                    (outs GR32:$dst), (ins GR32:$src),
                    "bswap{l}\t$dst", 
@@ -1244,7 +1255,7 @@
 //===----------------------------------------------------------------------===//
 //  Two address Instructions.
 //
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
 
 // Conditional moves
 let Uses = [EFLAGS] in {
@@ -1645,7 +1656,7 @@
 // i8 register pressure. Note that CMOV_GR8 is conservatively considered to
 // clobber EFLAGS, because if one of the operands is zero, the expansion
 // could involve an xor.
-let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in {
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
 def CMOV_GR8 : I<0, Pseudo,
                  (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
                  "#CMOV_GR8 PSEUDO!",
@@ -1664,86 +1675,106 @@
                     [(set GR16:$dst,
                       (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
 def CMOV_RFP32 : I<0, Pseudo,
-                    (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+                    (outs RFP32:$dst),
+                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
                     "#CMOV_RFP32 PSEUDO!",
-                    [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+                    [(set RFP32:$dst,
+                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
                                                   EFLAGS))]>;
 def CMOV_RFP64 : I<0, Pseudo,
-                    (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+                    (outs RFP64:$dst),
+                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
                     "#CMOV_RFP64 PSEUDO!",
-                    [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+                    [(set RFP64:$dst,
+                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
                                                   EFLAGS))]>;
 def CMOV_RFP80 : I<0, Pseudo,
-                    (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+                    (outs RFP80:$dst),
+                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
                     "#CMOV_RFP80 PSEUDO!",
-                    [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+                    [(set RFP80:$dst,
+                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
                                                   EFLAGS))]>;
 } // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] 
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] 
 } // Uses = [EFLAGS]
 
 
 // unary instructions
 let CodeSize = 2 in {
 let Defs = [EFLAGS] in {
-def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
-               [(set GR8:$dst, (ineg GR8:$src)),
+def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "neg{b}\t$dst",
+               [(set GR8:$dst, (ineg GR8:$src1)),
                 (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
-               [(set GR16:$dst, (ineg GR16:$src)),
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+               "neg{w}\t$dst",
+               [(set GR16:$dst, (ineg GR16:$src1)),
                 (implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
-               [(set GR32:$dst, (ineg GR32:$src)),
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+               "neg{l}\t$dst",
+               [(set GR32:$dst, (ineg GR32:$src1)),
                 (implicit EFLAGS)]>;
-let isTwoAddress = 0 in {
-  def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
+                
+let Constraints = "" in {
+  def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+                 "neg{b}\t$dst",
                  [(store (ineg (loadi8 addr:$dst)), addr:$dst),
                   (implicit EFLAGS)]>;
-  def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
+  def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+                 "neg{w}\t$dst",
                  [(store (ineg (loadi16 addr:$dst)), addr:$dst),
                   (implicit EFLAGS)]>, OpSize;
-  def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
+  def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+                 "neg{l}\t$dst",
                  [(store (ineg (loadi32 addr:$dst)), addr:$dst),
                   (implicit EFLAGS)]>;
-}
+} // Constraints = ""
 } // Defs = [EFLAGS]
 
 // Match xor -1 to not. Favors these over a move imm + xor to save code size.
 let AddedComplexity = 15 in {
-def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
-               [(set GR8:$dst, (not GR8:$src))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
-               [(set GR16:$dst, (not GR16:$src))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
-               [(set GR32:$dst, (not GR32:$src))]>;
-}
-let isTwoAddress = 0 in {
-  def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
+def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "not{b}\t$dst",
+               [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+               "not{w}\t$dst",
+               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+               "not{l}\t$dst",
+               [(set GR32:$dst, (not GR32:$src1))]>;
+}
+let Constraints = "" in {
+  def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+                 "not{b}\t$dst",
                  [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
-  def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
+  def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+                 "not{w}\t$dst",
                  [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
-  def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
+  def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+                 "not{l}\t$dst",
                  [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-}
+} // Constraints = ""
 } // CodeSize
 
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
 let Defs = [EFLAGS] in {
 let CodeSize = 2 in
-def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>;
+def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "inc{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
 
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), 
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
                "inc{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
+               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
              OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), 
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
                "inc{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
+               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
              Requires<[In32BitMode]>;
 }
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
   def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
                [(store (add (loadi8 addr:$dst), 1), addr:$dst),
                 (implicit EFLAGS)]>;
@@ -1755,23 +1786,24 @@
                [(store (add (loadi32 addr:$dst), 1), addr:$dst),
                 (implicit EFLAGS)]>,
                Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
 
 let CodeSize = 2 in
-def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>;
+def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "dec{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), 
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
                "dec{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
+               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
              OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), 
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
                "dec{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
+               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
              Requires<[In32BitMode]>;
-}
+} // CodeSize = 2
 
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
   def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
                [(store (add (loadi8 addr:$dst), -1), addr:$dst),
                 (implicit EFLAGS)]>;
@@ -1783,7 +1815,7 @@
                [(store (add (loadi32 addr:$dst), -1), addr:$dst),
                 (implicit EFLAGS)]>,
                Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
 } // Defs = [EFLAGS]
 
 // Logical operators...
@@ -1862,7 +1894,7 @@
                    [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
                                                          i32immSExt8:$src2))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   def AND8mr   : I<0x20, MRMDestMem,
                    (outs), (ins i8mem :$dst, GR8 :$src),
                    "and{b}\t{$src, $dst|$dst, $src}",
@@ -1914,7 +1946,7 @@
   def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
                       "and{l}\t{$src, %eax|%eax, $src}", []>;
 
-}
+} // Constraints = ""
 
 
 let isCommutable = 1 in {   // X = OR Y, Z   --> X = OR Z, Y
@@ -1988,7 +2020,7 @@
                    "or{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
                                                         i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   def OR8mr  : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                  "or{b}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), GR8:$src), addr:$dst),
@@ -2030,7 +2062,7 @@
                       "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
   def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
                       "or{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
 
 
 let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
@@ -2107,7 +2139,7 @@
                    [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
                                                          i32immSExt8:$src2))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   def XOR8mr   : I<0x30, MRMDestMem,
                    (outs), (ins i8mem :$dst, GR8 :$src),
                    "xor{b}\t{$src, $dst|$dst, $src}",
@@ -2158,26 +2190,27 @@
                       "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
   def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
                       "xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
 } // Defs = [EFLAGS]
 
 // Shift instructions
 let Defs = [EFLAGS] in {
 let Uses = [CL] in {
-def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shl{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (shl GR8:$src, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
+                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
                  "shl{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
+                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
                  "shl{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (shl GR32:$src, CL))]>;
+                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
 } // Uses = [CL]
 
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+                   
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -2198,7 +2231,7 @@
 
 } // isConvertibleToThreeAddress = 1
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   let Uses = [CL] in {
   def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
                    "shl{b}\t{%cl, $dst|$dst, CL}",
@@ -2232,18 +2265,18 @@
   def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
                    "shl{l}\t$dst",
                  [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
 
 let Uses = [CL] in {
-def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (srl GR8:$src, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
+                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
                  "shr{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
+                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
                  "shr{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (srl GR32:$src, CL))]>;
+                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
 }
 
 def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
@@ -2267,7 +2300,7 @@
                  "shr{l}\t$dst",
                  [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   let Uses = [CL] in {
   def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
                    "shr{b}\t{%cl, $dst|$dst, CL}",
@@ -2301,18 +2334,18 @@
   def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
                    "shr{l}\t$dst",
                  [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
 
 let Uses = [CL] in {
-def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
+def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (sra GR8:$src, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
+                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
                  "sar{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
+                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
                  "sar{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (sra GR32:$src, CL))]>;
+                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
 }
 
 def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2337,7 +2370,7 @@
                  "sar{l}\t$dst",
                  [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   let Uses = [CL] in {
   def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
                    "sar{b}\t{%cl, $dst|$dst, CL}",
@@ -2371,65 +2404,65 @@
   def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
                    "sar{l}\t$dst",
                  [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
 
 // Rotate instructions
 
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
                "rcl{b}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
                 "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
 }
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
                  "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
   
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
                 "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
 let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
                  "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
 }
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
                   "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
 
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
                 "rcl{l}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
                  "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
 }
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
                   "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
                   
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
                "rcr{b}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
                 "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
 }
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
                  "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
   
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
                 "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
 let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
                  "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
 }
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
                   "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
 
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
                 "rcr{l}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
                  "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
 }
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
 def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
                "rcl{b}\t{1, $dst|$dst, 1}", []>;
 def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -2469,19 +2502,19 @@
 def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
                  "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
 }
-}
+} // Constraints = ""
 
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
-def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "rol{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotl GR8:$src, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
+                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
                  "rol{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
+                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
                  "rol{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotl GR32:$src, CL))]>;
+                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
 }
 
 def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2506,7 +2539,7 @@
                  "rol{l}\t$dst",
                  [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   let Uses = [CL] in {
   def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
                    "rol{b}\t{%cl, $dst|$dst, CL}",
@@ -2540,18 +2573,18 @@
   def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
                    "rol{l}\t$dst",
                 [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
 
 let Uses = [CL] in {
-def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotr GR8:$src, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
+                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                  "ror{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                  "ror{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotr GR32:$src, CL))]>;
+                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
 }
 
 def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2576,7 +2609,7 @@
                  "ror{l}\t$dst",
                  [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   let Uses = [CL] in {
   def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
                    "ror{b}\t{%cl, $dst|$dst, CL}",
@@ -2610,8 +2643,7 @@
   def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
                    "ror{l}\t$dst",
                 [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
-
+} // Constraints = ""
 
 
 // Double shift instructions (generalizations of rotate)
@@ -2667,7 +2699,7 @@
                      TB, OpSize;
 }
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   let Uses = [CL] in {
   def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
                      "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -2713,7 +2745,7 @@
                       [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
                                         (i8 imm:$src3)), addr:$dst)]>,
                        TB, OpSize;
-}
+} // Constraints = ""
 } // Defs = [EFLAGS]
 
 
@@ -2799,7 +2831,7 @@
                          (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
 }
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   // Memory-Register Addition
   def ADD8mr   : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
                    "add{b}\t{$src2, $dst|$dst, $src2}",
@@ -2843,7 +2875,7 @@
                       "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
   def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
                       "add{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
 
 let Uses = [EFLAGS] in {
 let isCommutable = 1 in {  // X = ADC Y, Z --> X = ADC Z, Y
@@ -2905,7 +2937,7 @@
                    "adc{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   def ADC8mr   : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
                    "adc{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -2940,7 +2972,7 @@
                       "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
   def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
                       "adc{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
 } // Uses = [EFLAGS]
 
 // Register-Register Subtraction
@@ -3012,7 +3044,7 @@
                    [(set GR32:$dst, EFLAGS,
                          (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   // Memory-Register Subtraction
   def SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
                    "sub{b}\t{$src2, $dst|$dst, $src2}",
@@ -3057,7 +3089,7 @@
                       "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
   def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
                       "sub{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
 
 let Uses = [EFLAGS] in {
 def SBB8rr     : I<0x18, MRMDestReg, (outs GR8:$dst),
@@ -3073,7 +3105,7 @@
                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
 
-let isTwoAddress = 0 in {
+let Constraints = "" in {
   def SBB8mr   : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), 
                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -3108,7 +3140,7 @@
                       "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
   def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
                       "sbb{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
 
 let isCodeGenOnly = 1 in {
 def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
@@ -3816,6 +3848,7 @@
 // Thread Local Storage Instructions
 //
 
+// ELF TLS Support
 // All calls clobber the non-callee saved registers. ESP is marked as
 // a use to prevent stack-pointer assignments that appear immediately
 // before calls from potentially appearing dead.
@@ -3830,15 +3863,15 @@
                   [(X86tlsaddr tls32addr:$sym)]>,
                   Requires<[In32BitMode]>;
 
-// FIXME: Not true for darwin
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the 
+// address of the variable is in %eax.  %ecx is trashed during the function 
+// call.  All other registers are preserved.
+let Defs = [EAX, ECX],
     Uses = [ESP],
     usesCustomInserter = 1 in
 def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
-                "# Fixme into a call",
+                "# TLSCall_32",
                 [(X86TLSCall addr:$sym)]>,
                 Requires<[In32BitMode]>;
                 
@@ -4800,14 +4833,14 @@
 // Patterns for nodes that do not produce flags, for instructions that do.
 
 // Increment reg.
-def : Pat<(add GR8:$src ,  1), (INC8r  GR8:$src)>;
-def : Pat<(add GR16:$src,  1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src,  1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 ,  1), (INC8r  GR8:$src1)>;
+def : Pat<(add GR16:$src1,  1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1,  1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
 
 // Decrement reg.
-def : Pat<(add GR8:$src , -1), (DEC8r  GR8:$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , -1), (DEC8r  GR8:$src1)>;
+def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
 
 // or reg/reg.
 def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td Fri Jul  2 04:57:13 2010
@@ -368,1073 +368,593 @@
 }
 
 //===----------------------------------------------------------------------===//
-// SSE1 Instructions
+// SSE 1 & 2 Instructions Classes
 //===----------------------------------------------------------------------===//
 
-// Move Instructions. Register-to-register movss is not used for FR32
-// register copies because it's a partial register update; FsMOVAPSrr is
-// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movss for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSSrr : SSI<0x10, MRMSrcReg,
-                  (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
-                  "movss\t{$src2, $dst|$dst, $src2}",
-                  [(set (v4f32 VR128:$dst),
-                        (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           RegisterClass RC, X86MemOperand x86memop> {
+  let isCommutable = 1 in {
+    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+  }
+  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+              OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                               string asm, string SSEVer, string FPSizeStr,
+                               Operand memopr, ComplexPattern mem_cpat> {
+  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                  asm, [(set RC:$dst, (
+                                !nameconcat<Intrinsic>("int_x86_sse",
+                                !strconcat(SSEVer, !strconcat("_",
+                                !strconcat(OpcodeStr, FPSizeStr))))
+                         RC:$src1, RC:$src2))]>;
+  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+                  asm, [(set RC:$dst, (
+                                !nameconcat<Intrinsic>("int_x86_sse",
+                                !strconcat(SSEVer, !strconcat("_",
+                                !strconcat(OpcodeStr, FPSizeStr))))
+                         RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           RegisterClass RC, ValueType vt,
+                           X86MemOperand x86memop, PatFrag mem_frag,
+                           Domain d, bit MayLoad = 0> {
+  let isCommutable = 1 in
+    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                OpcodeStr, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))],d>;
+  let mayLoad = MayLoad in
+    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                OpcodeStr, [(set RC:$dst, (OpNode RC:$src1,
+                                                  (mem_frag addr:$src2)))],d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+                                      string OpcodeStr, X86MemOperand x86memop,
+                                      list<dag> pat_rr, list<dag> pat_rm> {
+  let isCommutable = 1 in
+    def rr : PI<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2), OpcodeStr, pat_rr, d>;
+  def rm : PI<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, x86memop:$src2), OpcodeStr, pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                               string asm, string SSEVer, string FPSizeStr,
+                               X86MemOperand x86memop, PatFrag mem_frag,
+                               Domain d> {
+  def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                  asm, [(set RC:$dst, (
+                                !nameconcat<Intrinsic>("int_x86_sse",
+                                !strconcat(SSEVer, !strconcat("_",
+                                !strconcat(OpcodeStr, FPSizeStr))))
+                         RC:$src1, RC:$src2))], d>;
+  def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                  asm, [(set RC:$dst, (
+                                !nameconcat<Intrinsic>("int_x86_sse",
+                                !strconcat(SSEVer, !strconcat("_",
+                                !strconcat(OpcodeStr, FPSizeStr))))
+                         RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+      SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+      [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+                    PatFrag mem_pat, string OpcodeStr> :
+      SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                        [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+let isAsmParserOnly = 1 in {
+  def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+                  "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+  def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+                  "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+  let canFoldAsLoad = 1, isReMaterializable = 1 in {
+    def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+    let AddedComplexity = 20 in
+      def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+  }
+}
+
+let Constraints = "$src1 = $dst" in {
+  def MOVSSrr : sse12_move_rr<FR32, v4f32,
+                          "movss\t{$src2, $dst|$dst, $src2}">, XS;
+  def MOVSDrr : sse12_move_rr<FR64, v2f64,
+                          "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+  let AddedComplexity = 20 in
+    def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
 
+let AddedComplexity = 15 in {
 // Extract the low 32-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
 def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
           (MOVSSrr (v4f32 VR128:$src1),
                    (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2f64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
 
 // Implicitly promote a 32-bit scalar to a vector.
 def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
           (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
 
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                  "movss\t{$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (loadf32 addr:$src))]>;
-
+let AddedComplexity = 20 in {
 // MOVSSrm zeros the high parts of the register; represent this
 // with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
           (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
           (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
           (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
 }
 
 // Store scalar value to memory.
 def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
                   "movss\t{$src, $dst|$dst, $src}",
                   [(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>;
+
+let isAsmParserOnly = 1 in {
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+                  "movss\t{$src, $dst|$dst, $src}",
+                  [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+}
 
 // Extract and store.
 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
                  addr:$dst),
           (MOVSSmr addr:$dst,
                    (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSDmr addr:$dst,
+                   (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
 
-// Conversion instructions
-def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
-                      "cvttss2si\t{$src, $dst|$dst, $src}",
-                      [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                      "cvttss2si\t{$src, $dst|$dst, $src}",
-                      [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def CVTSI2SSrr  : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
-                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SSrm  : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
-                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-
-// Match intrinsics which expect XMM operand(s).
-def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
-                    "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                    "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-
-def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                         "cvtss2si\t{$src, $dst|$dst, $src}",
-                         [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
-def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                         "cvtss2si\t{$src, $dst|$dst, $src}",
-                         [(set GR32:$dst, (int_x86_sse_cvtss2si
-                                           (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                         "cvtps2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
-def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                         "cvtps2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvtps2pi
-                                           (load addr:$src)))]>;
-def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                         "cvttps2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
-def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                         "cvttps2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvttps2pi
-                                           (load addr:$src)))]>;
-let Constraints = "$src1 = $dst" in {
-  def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
-                           (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
-                        "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
-                                           VR64:$src2))]>;
-  def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
-                           (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
-                        "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
-                                            (load addr:$src2)))]>;
-}
-
-// Aliases for intrinsics
-def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                          "cvttss2si\t{$src, $dst|$dst, $src}",
-                          [(set GR32:$dst,
-                            (int_x86_sse_cvttss2si VR128:$src))]>;
-def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                          "cvttss2si\t{$src, $dst|$dst, $src}",
-                          [(set GR32:$dst,
-                            (int_x86_sse_cvttss2si(load addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
-  def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
-                           (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
-                           "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
-                           [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
-                                              GR32:$src2))]>;
-  def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
-                           (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
-                           "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
-                           [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
-                                              (loadi32 addr:$src2)))]>;
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+                            X86MemOperand x86memop, PatFrag ld_frag,
+                            string asm, Domain d,
+                            bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                   [(set RC:$dst, (ld_frag addr:$src))], d>;
 }
 
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
-  def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
-                    (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
-                    "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-  def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
-                    (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
-                    "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+let isAsmParserOnly = 1 in {
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+                              "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+                              "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+                              "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+                              "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+}
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+                              "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+                              "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+                              "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+                              "movupd", SSEPackedDouble, 0>, TB, OpSize;
 
-  // Accept explicit immediate argument form instead of comparison code.
 let isAsmParserOnly = 1 in {
-  def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
-                    (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
-                    "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
-  def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
-                    (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
-                    "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-}
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
 }
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v2f64 VR128:$src), addr:$dst)]>;
 
-let Defs = [EFLAGS] in {
-def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
-                   "ucomiss\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>;
-def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
-                   "ucomiss\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
-def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                  "comiss\t{$src2, $src1|$src1, $src2}", []>;
-def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                  "comiss\t{$src2, $src1|$src1, $src2}", []>;
-
-} // Defs = [EFLAGS]
-
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
-  def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
-                        (outs VR128:$dst),
-                        (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                        "cmp${cc}ss\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse_cmp_ss
-                                             VR128:$src1,
-                                             VR128:$src, imm:$cc))]>;
-  def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
-                        (outs VR128:$dst),
-                        (ins VR128:$src1, f32mem:$src, SSECC:$cc),
-                        "cmp${cc}ss\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
-                                           (load addr:$src), imm:$cc))]>;
+// Intrinsic forms of MOVUPS/D load and store
+let isAsmParserOnly = 1 in {
+  let canFoldAsLoad = 1, isReMaterializable = 1 in
+  def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
+             (ins f128mem:$src),
+             "movups\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
+  def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
+             (ins f128mem:$src),
+             "movupd\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
+  def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+             (ins f128mem:$dst, VR128:$src),
+             "movups\t{$src, $dst|$dst, $src}",
+             [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+  def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+             (ins f128mem:$dst, VR128:$src),
+             "movupd\t{$src, $dst|$dst, $src}",
+             [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
 }
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                       "ucomiss\t{$src2, $src1|$src1, $src2}",
-                       [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
-                                               VR128:$src2))]>;
-def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
-                       "ucomiss\t{$src2, $src1|$src1, $src2}",
-                       [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
-                                               (load addr:$src2)))]>;
-
-def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                      "comiss\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
-                                             VR128:$src2))]>;
-def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                      "comiss\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
-                                             (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE1 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
-    canFoldAsLoad = 1 in
-  // FIXME: Set encoding to pseudo!
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
-                 [(set FR32:$dst, fp32imm0)]>,
-                 Requires<[HasSSE1]>, TB, OpSize;
-
-// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
-// disregarded.
-let neverHasSideEffects = 1 in
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                     "movaps\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
-// disregarded.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
-                     "movaps\t{$src, $dst|$dst, $src}",
-                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-
-/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
-///
-multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
-                                       SDNode OpNode, int NoPat = 0,
-                                       bit MayLoad = 0, bit Commutable = 1> {
-  def PSrr : PSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                      !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-                      !if(NoPat, []<dag>,
-                          [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))])> {
-    let isCommutable = Commutable;
-  }
+def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "movups\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "movupd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
 
-  def PDrr : PDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                      !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-                      !if(NoPat, []<dag>,
-                          [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))])> {
-    let isCommutable = Commutable;
-  }
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                       "movups\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                       "movupd\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
 
-  def PSrm : PSI<opc, MRMSrcMem, (outs FR32:$dst),
-                      (ins FR32:$src1, f128mem:$src2),
-                      !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-                      !if(NoPat, []<dag>,
-                          [(set FR32:$dst, (OpNode FR32:$src1,
-                                               (memopfsf32 addr:$src2)))])> {
-    let mayLoad = MayLoad;
-  }
-
-  def PDrm : PDI<opc, MRMSrcMem, (outs FR64:$dst),
-                      (ins FR64:$src1, f128mem:$src2),
-                      !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-                      !if(NoPat, []<dag>,
-                          [(set FR64:$dst, (OpNode FR64:$src1,
-                                               (memopfsf64 addr:$src2)))])> {
-    let mayLoad = MayLoad;
-  }
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+                                 PatFrag mov_frag, string base_opc,
+                                 string asm_opr> {
+  def PSrm : PI<opc, MRMSrcMem,
+         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+         !strconcat(!strconcat(base_opc,"s"), asm_opr),
+     [(set RC:$dst,
+       (mov_frag RC:$src1,
+              (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+              SSEPackedSingle>, TB;
+
+  def PDrm : PI<opc, MRMSrcMem,
+         (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+         !strconcat(!strconcat(base_opc,"d"), asm_opr),
+     [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+                              (scalar_to_vector (loadf64 addr:$src2)))))],
+              SSEPackedDouble>, TB, OpSize;
+}
+
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+                                   "\t{$src2, $dst|$dst, $src2}">;
+  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+                                   "\t{$src2, $dst|$dst, $src2}">;
 }
 
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let Constraints = "$src1 = $dst" in {
-  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
-  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
-  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
-
-  let neverHasSideEffects = 1 in
-    defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1, 1, 0>;
+let isAsmParserOnly = 1 in {
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                                 (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                                 (iPTR 0))), addr:$dst)]>, VEX;
 }
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                                 (iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                                 (iPTR 0))), addr:$dst)]>;
 
-/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
-multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           RegisterClass RC, X86MemOperand memop> {
-  let isCommutable = 1 in {
-    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-                OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
-  }
-  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memop:$src2),
-              OpcodeStr, [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+let isAsmParserOnly = 1 in {
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                         (undef)), (iPTR 0))), addr:$dst)]>,
+                   VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (v2f64 (unpckh VR128:$src, (undef))),
+                                 (iPTR 0))), addr:$dst)]>,
+                   VEX;
 }
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                         (undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (v2f64 (unpckh VR128:$src, (undef))),
+                                 (iPTR 0))), addr:$dst)]>;
 
-/// basic_sse12_fp_binop_rm - SSE 1 & 2 binops come in both scalar and
-/// vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation.  This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements unmodified (therefore these cannot be commuted).
-///
-/// These three forms can each be reg+reg or reg+mem, so there are a total of
-/// six "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass basic_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
-                                 SDNode OpNode, bit Commutable = 0> {
-
-  let Constraints = "", isAsmParserOnly = 1, hasVEX_4VPrefix = 1 in {
-    // Scalar operation, reg+reg.
-    let Prefix = 12 /* XS */ in
-      defm V#NAME#SS : sse12_fp_scalar<opc,
-        !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                   OpNode, FR32, f32mem>;
-
-    let Prefix = 11 /* XD */ in
-      defm V#NAME#SD : sse12_fp_scalar<opc,
-        !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                   OpNode, FR64, f64mem>;
-  }
-
-  let Constraints = "$src1 = $dst" in {
-    // Scalar operation, reg+reg.
-    let Prefix = 12 /* XS */ in
-      defm SS : sse12_fp_scalar<opc,
-                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      OpNode, FR32, f32mem>;
-    let Prefix = 11 /* XD */ in
-      defm SD : sse12_fp_scalar<opc,
-                      !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-                      OpNode, FR64, f64mem>;
-  }
-
-  // Vector operation, reg+reg.
-  def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
-                                 (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
-    let isCommutable = Commutable;
-  }
-
-  def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-                                 (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
-    let isCommutable = Commutable;
-  }
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+                      VEX_4V;
+  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+                      VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movlhps\t{$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movhlps\t{$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
 
-  def V#NAME#PSrr : VPSI<opc, MRMSrcReg, (outs VR128:$dst),
-               (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr,
-                          "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []> {
-    let isCommutable = Commutable;
-    let Constraints = "";
-    let isAsmParserOnly = 1;
-  }
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let AddedComplexity = 20 in {
+  def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+            (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+  def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+            (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+}
 
-  def V#NAME#PDrr : VPDI<opc, MRMSrcReg, (outs VR128:$dst),
-               (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr,
-                          "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []> {
-    let isCommutable = Commutable;
-    let Constraints = "";
-    let isAsmParserOnly = 1;
-  }
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
 
-  // Vector operation, reg+mem.
-  def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
-                                 (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-             [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
-  def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-                                 (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-             [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
-  def V#NAME#PSrm : VPSI<opc, MRMSrcMem, (outs VR128:$dst),
-               (ins VR128:$src1, f128mem:$src2),
-               !strconcat(OpcodeStr,
-                          "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []> {
-    let Constraints = "";
-    let isAsmParserOnly = 1;
-  }
-
-  def V#NAME#PDrm : VPDI<opc, MRMSrcMem, (outs VR128:$dst),
-               (ins VR128:$src1, f128mem:$src2),
-               !strconcat(OpcodeStr,
-                          "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []> {
-    let Constraints = "";
-    let isAsmParserOnly = 1;
-  }
-
-  // Intrinsic operation, reg+reg.
-  def V#NAME#SSrr_Int : VSSI<opc, MRMSrcReg, (outs VR128:$dst),
-                                             (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ss")) VR128:$src1,
-                                                      VR128:$src2))]> {
-                          // int_x86_sse_xxx_ss
-    let Constraints = "";
-  }
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                     string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                     string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+              asm, []>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+              (ins DstRC:$src1, x86memop:$src), asm, []>;
+}
 
-  def V#NAME#SDrr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
-                                             (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_sd")) VR128:$src1,
-                                                      VR128:$src2))]> {
-                          // int_x86_sse2_xxx_sd
-    let Constraints = "";
-  }
+let isAsmParserOnly = 1 in {
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+                      "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+                      "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSI2SS  : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
+                      VEX_4V;
+defm VCVTSI2SD  : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
+                      VEX_4V;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+                      "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+                      "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (Int SrcRC:$src))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+                   (ins DstRC:$src1, x86memop:$src2), asm,
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+                   (ins DstRC:$src1, x86memop:$src2), asm,
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+}
 
-  def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ss")) VR128:$src1,
-                                                      VR128:$src2))]>;
-                          // int_x86_sse_xxx_ss
+let isAsmParserOnly = 1 in {
+  defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                        f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
+                        VEX;
+  defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                        f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
+                        VEX;
+}
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                      f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                      f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
 
-  def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_sd")) VR128:$src1,
-                                                      VR128:$src2))]>;
-                          // int_x86_sse2_xxx_sd
-
-  // Intrinsic operation, reg+mem.
-  def V#NAME#SSrm_Int : VSSI<opc, MRMSrcMem, (outs VR128:$dst),
-                                             (ins VR128:$src1, ssmem:$src2),
-       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ss")) VR128:$src1,
-                                               sse_load_f32:$src2))]> {
-                          // int_x86_sse_xxx_ss
-    let Constraints = "";
-  }
 
-  def V#NAME#SDrm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
-                                             (ins VR128:$src1, sdmem:$src2),
-       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_sd")) VR128:$src1,
-                                               sse_load_f64:$src2))]> {
-                          // int_x86_sse2_xxx_sd
-    let Constraints = "";
-  }
+let Constraints = "$src1 = $dst" in {
+  defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        int_x86_sse_cvtsi2ss, i32mem, loadi32,
+                        "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+  defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+                        "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+// Instructions below don't have an AVX form.
+defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedSingle>, TB;
+defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedDouble>, TB, OpSize;
+defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedSingle>, TB;
+defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedDouble>, TB, OpSize;
+defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+  defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+}
 
-  def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                                     (ins VR128:$src1, ssmem:$src2),
-       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ss")) VR128:$src1,
-                                               sse_load_f32:$src2))]>;
-                          // int_x86_sse_xxx_ss
+/// SSE 1 Only
 
-  def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
-                                     (ins VR128:$src1, sdmem:$src2),
-       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_sd")) VR128:$src1,
-                                               sse_load_f64:$src2))]>;
-                          // int_x86_sse2_xxx_sd
-}
+// Aliases for intrinsics
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+                int_x86_sse_cvttss2si, f32mem, load,
+                "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
+defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+                int_x86_sse2_cvttsd2si, f128mem, load,
+                "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
+}
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+                          f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+                          XS;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+                          f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+                          XD;
+
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+                          "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle>, TB, VEX;
+}
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+                          "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
 }
 
-// Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_rm<0x58, "add", fadd, 1>;
-defm MUL : basic_sse12_fp_binop_rm<0x59, "mul", fmul, 1>;
+/// SSE 2 Only
 
-let isCommutable = 0 in {
-  defm SUB : basic_sse12_fp_binop_rm<0x5C, "sub", fsub>;
-  defm DIV : basic_sse12_fp_binop_rm<0x5E, "div", fdiv>;
+// Convert scalar double to scalar single
+let isAsmParserOnly = 1 in {
+def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+                       (ins FR64:$src1, FR64:$src2),
+                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+                       (ins FR64:$src1, f64mem:$src2),
+                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      []>, XD, Requires<[HasAVX, HasSSE2, OptForSize]>, VEX_4V;
 }
-
-/// sse12_fp_binop_rm - Other SSE 1 & 2 binops
-///
-/// This multiclass is like basic_sse12_fp_binop_rm, with the addition of
-/// instructions for a full-vector intrinsic form.  Operations that map
-/// onto C operators don't use this form since they just use the plain
-/// vector form instead of having a separate vector intrinsic form.
-///
-/// This provides a total of eight "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
-                            SDNode OpNode, bit Commutable = 0> {
-
-  let Constraints = "", isAsmParserOnly = 1, hasVEX_4VPrefix = 1 in {
-    // Scalar operation, reg+reg.
-    let Prefix = 12 /* XS */ in
-      defm V#NAME#SS : sse12_fp_scalar<opc,
-        !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                   OpNode, FR32, f32mem>;
-
-    let Prefix = 11 /* XD */ in
-      defm V#NAME#SD : sse12_fp_scalar<opc,
-        !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                   OpNode, FR64, f64mem>;
-  }
-
-  let Constraints = "$src1 = $dst" in {
-    // Scalar operation, reg+reg.
-    let Prefix = 12 /* XS */ in
-      defm SS : sse12_fp_scalar<opc,
-                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      OpNode, FR32, f32mem>;
-    let Prefix = 11 /* XD */ in
-      defm SD : sse12_fp_scalar<opc,
-                      !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-                      OpNode, FR64, f64mem>;
-  }
-
-  // Vector operation, reg+reg.
-  def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
-                                 (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
-    let isCommutable = Commutable;
-  }
-
-  def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-                                 (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Vector operation, reg+mem.
-  def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
-                                 (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-             [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
-  def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-                                 (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-             [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
-  // Intrinsic operation, reg+reg.
-  def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ss")) VR128:$src1,
-                                                      VR128:$src2))]> {
-                          // int_x86_sse_xxx_ss
-    let isCommutable = Commutable;
-  }
-
-  def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_sd")) VR128:$src1,
-                                                      VR128:$src2))]> {
-                          // int_x86_sse2_xxx_sd
-    let isCommutable = Commutable;
-  }
-
-  // Intrinsic operation, reg+mem.
-  def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                                     (ins VR128:$src1, ssmem:$src2),
-       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ss")) VR128:$src1,
-                                               sse_load_f32:$src2))]>;
-                          // int_x86_sse_xxx_ss
-
-  def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
-                                     (ins VR128:$src1, sdmem:$src2),
-       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_sd")) VR128:$src1,
-                                               sse_load_f64:$src2))]>;
-                          // int_x86_sse2_xxx_sd
-
-  // Vector intrinsic operation, reg+reg.
-  def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ps")) VR128:$src1,
-                                                      VR128:$src2))]> {
-                          // int_x86_sse_xxx_ps
-    let isCommutable = Commutable;
-  }
-
-  def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_pd")) VR128:$src1,
-                                                      VR128:$src2))]> {
-                          // int_x86_sse2_xxx_pd
-    let isCommutable = Commutable;
-  }
-
-  // Vector intrinsic operation, reg+mem.
-  def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
-                                     (ins VR128:$src1, f128mem:$src2),
-       !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
-                          !strconcat(OpcodeStr, "_ps")) VR128:$src1,
-                                                 (memopv4f32 addr:$src2)))]>;
-                          // int_x86_sse_xxx_ps
-
-  def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-                                     (ins VR128:$src1, f128mem:$src2),
-       !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-       [(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
-                          !strconcat(OpcodeStr, "_pd")) VR128:$src1,
-                                                 (memopv2f64 addr:$src2)))]>;
-                          // int_x86_sse2_xxx_pd
-}
-}
-
-let isCommutable = 0 in {
-  defm MAX : sse12_fp_binop_rm<0x5F, "max", X86fmax>;
-  defm MIN : sse12_fp_binop_rm<0x5D, "min", X86fmin>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movaps\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
-
-def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movaps\t{$src, $dst|$dst, $src}",
-                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movups\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (loadv4f32 addr:$src))]>;
-def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movups\t{$src, $dst|$dst, $src}",
-                   [(store (v4f32 VR128:$src), addr:$dst)]>;
-
-// Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "movups\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
-def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                       "movups\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
-
-let Constraints = "$src1 = $dst" in {
-  let AddedComplexity = 20 in {
-    def MOVLPSrm : PSI<0x12, MRMSrcMem,
-                       (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movlps\t{$src2, $dst|$dst, $src2}",
-       [(set VR128:$dst,
-         (movlp VR128:$src1,
-                (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
-    def MOVHPSrm : PSI<0x16, MRMSrcMem,
-                       (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movhps\t{$src2, $dst|$dst, $src2}",
-       [(set VR128:$dst,
-         (movlhps VR128:$src1,
-                (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
-  } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-
-def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
-          (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
-
-def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movlps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
-                                 (iPTR 0))), addr:$dst)]>;
-
-// v2f64 extract element 1 is always custom lowered to unpack high to low
-// and extract element 0 so the non-store version isn't too horrible.
-def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movhps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract
-                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
-                                         (undef)), (iPTR 0))), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let AddedComplexity = 20 in {
-def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-                    "movlhps\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
-
-def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
-                                     (ins VR128:$src1, VR128:$src2),
-                    "movhlps\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
-} // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-let AddedComplexity = 20 in {
-def : Pat<(v4f32 (movddup VR128:$src, (undef))),
-          (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
-def : Pat<(v2i64 (movddup VR128:$src, (undef))),
-          (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
-}
-
-
-
-// Arithmetic
-
-/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation.  This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a
-/// scalar) and leaves the top elements undefined.
-///
-/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
-                           SDNode OpNode,
-                           Intrinsic F32Int,
-                           Intrinsic V4F32Int,
-                           bit Commutable = 0> {
-  // Scalar operation, reg.
-  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode FR32:$src))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Scalar operation, mem.
-  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
-            Requires<[HasSSE1, OptForSize]>;
-
-  // Vector operation, reg.
-  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Vector operation, mem.
-  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
-
-  // Intrinsic operation, reg.
-  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int VR128:$src))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Intrinsic operation, mem.
-  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
-                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
-
-  // Vector intrinsic operation, reg
-  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V4F32Int VR128:$src))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Vector intrinsic operation, mem
-  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
-}
-
-// Square root.
-defm SQRT  : sse1_fp_unop_rm<0x51, "sqrt",  fsqrt,
-                             int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
-
-// Reciprocal approximations. Note that these typically require refinement
-// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
-                             int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
-defm RCP   : sse1_fp_unop_rm<0x53, "rcp",   X86frcp,
-                             int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
-
-/// sse12_fp_pack_logical - SSE 1 & 2 packed FP logical ops
-///
-multiclass sse12_fp_pack_logical<bits<8> opc, string OpcodeStr,
-                                 SDNode OpNode, int HasPat = 0,
-                                 bit Commutable = 1,
-                                 list<list<dag>> Pattern = []> {
-  def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-       !if(HasPat, Pattern[0],
-                   [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
-                                                    VR128:$src2)))])>
-       { let isCommutable = Commutable; }
-
-  def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
-       !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-       !if(HasPat, Pattern[1],
-                   [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                             (bc_v2i64 (v2f64 VR128:$src2))))])>
-       { let isCommutable = Commutable; }
-
-  def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, f128mem:$src2),
-       !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
-       !if(HasPat, Pattern[2],
-                   [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                             (memopv2i64 addr:$src2)))])>;
-
-  def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, f128mem:$src2),
-       !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
-       !if(HasPat, Pattern[3],
-                   [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                             (memopv2i64 addr:$src2)))])>;
-}
-
-// Logical
-let Constraints = "$src1 = $dst" in {
-  defm AND  : sse12_fp_pack_logical<0x54, "and", and>;
-  defm OR   : sse12_fp_pack_logical<0x56, "or", or>;
-  defm XOR  : sse12_fp_pack_logical<0x57, "xor", xor>;
-  defm ANDN : sse12_fp_pack_logical<0x55, "andn", undef /* dummy */, 1, 0, [
-    // single r+r
-    [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
-                                       (bc_v2i64 (v4i32 immAllOnesV))),
-                                   VR128:$src2)))],
-    // double r+r
-    [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
-                                 (bc_v2i64 (v2f64 VR128:$src2))))],
-    // single r+m
-    [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
-                                       (bc_v2i64 (v4i32 immAllOnesV))),
-                                  (memopv2i64 addr:$src2))))],
-    // double r+m
-    [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
-                           (memopv2i64 addr:$src2)))]]>;
-}
-
-let Constraints = "$src1 = $dst" in {
-  def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                    "cmp${cc}ps\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
-                                                        VR128:$src, imm:$cc))]>;
-  def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
-                  (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
-                  "cmp${cc}ps\t{$src, $dst|$dst, $src}",
-                  [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
-                                            (memop addr:$src), imm:$cc))]>;
-
-  // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
-  def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
-                    "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
-  def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
-                  (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
-                  "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
-}
-}
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
-          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
-          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
-  let isConvertibleToThreeAddress = 1 in // Convert to pshufd
-    def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
-                          (outs VR128:$dst), (ins VR128:$src1,
-                           VR128:$src2, i8imm:$src3),
-                          "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                          [(set VR128:$dst,
-                            (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
-  def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1,
-                         f128mem:$src2, i8imm:$src3),
-                        "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                        [(set VR128:$dst,
-                          (v4f32 (shufp:$src3
-                                  VR128:$src1, (memopv4f32 addr:$src2))))]>;
-
-  let AddedComplexity = 10 in {
-    def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpckhps\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
-    def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
-                         (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpckhps\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (v4f32 (unpckh VR128:$src1,
-                                          (memopv4f32 addr:$src2))))]>;
-
-    def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpcklps\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
-    def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
-                         (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpcklps\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
-  } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-// Mask creation
-def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                     "movmskps\t{$src, $dst|$dst, $src}",
-                     [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
-def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                     "movmskpd\t{$src, $dst|$dst, $src}",
-                     [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
-
-// Prefetch intrinsic.
-def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
-    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
-def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
-    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
-def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
-    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
-def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
-    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
-
-// Non-temporal stores
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                    "movntps\t{$src, $dst|$dst, $src}",
-                    [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
-
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntps\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-
-def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntdq\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
-
-def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 "movnti\t{$src, $dst|$dst, $src}",
-                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
-               TB, Requires<[HasSSE2]>;
-
-def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                     "movnti\t{$src, $dst|$dst, $src}",
-                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
-                  TB, Requires<[HasSSE2]>;
-}
-
-// Load, store, and memory fence
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
-             TB, Requires<[HasSSE1]>;
-
-// MXCSR register
-def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
-                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
-def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
-                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
-}
-
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
-
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-
-//===---------------------------------------------------------------------===//
-// SSE2 Instructions
-//===---------------------------------------------------------------------===//
-
-// Move Instructions. Register-to-register movsd is not used for FR64
-// register copies because it's a partial register update; FsMOVAPDrr is
-// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movsd for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSDrr : SDI<0x10, MRMSrcReg,
-                  (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
-                  "movsd\t{$src2, $dst|$dst, $src2}",
-                  [(set (v2f64 VR128:$dst),
-                        (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
-
-// Extract the low 64-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
-          (MOVSDrr (v2f64 VR128:$src1),
-                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
-          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
-def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
-                  "movsd\t{$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (loadf64 addr:$src))]>;
-
-// MOVSDrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzload addr:$src)),
-          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-}
-
-// Store scalar value to memory.
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
-                  "movsd\t{$src, $dst|$dst, $src}",
-                  [(store FR64:$src, addr:$dst)]>;
-
-// Extract and store.
-def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
-                 addr:$dst),
-          (MOVSDmr addr:$dst,
-                   (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
-// Conversion instructions
-def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
-                      "cvttsd2si\t{$src, $dst|$dst, $src}",
-                      [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
-                      "cvttsd2si\t{$src, $dst|$dst, $src}",
-                      [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround FR64:$src))]>;
@@ -1442,35 +962,28 @@
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
                   Requires<[HasSSE2, OptForSize]>;
-def CVTSI2SDrr  : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
-                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
-                      [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SDrm  : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
-                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
-                      [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
 
-def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                  "comisd\t{$src2, $src1|$src1, $src2}", []>;
-def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                      "comisd\t{$src2, $src1|$src1, $src2}", []>;
-
-// SSE2 instructions with XS prefix
+let isAsmParserOnly = 1 in
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+                    int_x86_sse2_cvtsd2ss, f64mem, load,
+                    "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+                    XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+             int_x86_sse2_cvtsd2ss, f64mem, load,
+             "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+
+// Convert scalar single to scalar double
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+                    (ins FR32:$src1, FR32:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, XS, Requires<[HasAVX, HasSSE2]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+                    (ins FR32:$src1, f32mem:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, XS, VEX_4V, Requires<[HasAVX, HasSSE2, OptForSize]>;
+}
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -1480,208 +993,51 @@
                    [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
                  Requires<[HasSSE2, OptForSize]>;
 
-def : Pat<(extloadf32 addr:$src),
-          (CVTSS2SDrr (MOVSSrm addr:$src))>,
-      Requires<[HasSSE2, OptForSpeed]>;
-
-// Match intrinsics which expect XMM operand(s).
-def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                         "cvtsd2si\t{$src, $dst|$dst, $src}",
-                         [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
-def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
-                         "cvtsd2si\t{$src, $dst|$dst, $src}",
-                         [(set GR32:$dst, (int_x86_sse2_cvtsd2si
-                                           (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                         "cvtpd2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
-def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
-                         "cvtpd2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvtpd2pi
-                                           (memop addr:$src)))]>;
-def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                         "cvttpd2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
-def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
-                         "cvttpd2pi\t{$src, $dst|$dst, $src}",
-                         [(set VR64:$dst, (int_x86_sse_cvttpd2pi
-                                           (memop addr:$src)))]>;
-def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                         "cvtpi2pd\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
-def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                         "cvtpi2pd\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst, (int_x86_sse_cvtpi2pd
-                                            (load addr:$src)))]>;
-
-// Aliases for intrinsics
-def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                          "cvttsd2si\t{$src, $dst|$dst, $src}",
-                          [(set GR32:$dst,
-                            (int_x86_sse2_cvttsd2si VR128:$src))]>;
-def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
-                          "cvttsd2si\t{$src, $dst|$dst, $src}",
-                          [(set GR32:$dst, (int_x86_sse2_cvttsd2si
-                                            (load addr:$src)))]>;
-
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
-  def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
-                    (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
-                    "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-  def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
-                    (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
-                    "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-
-  // Accept explicit immediate argument form instead of comparison code.
 let isAsmParserOnly = 1 in {
-  def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
-                    (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
-                    "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
-  def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
-                    (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
-                    "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       VR128:$src2))]>, XS, VEX_4V,
+                    Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       (load addr:$src2)))]>, XS, VEX_4V,
+                    Requires<[HasAVX, HasSSE2]>;
 }
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       VR128:$src2))]>, XS,
+                    Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       (load addr:$src2)))]>, XS,
+                    Requires<[HasSSE2]>;
 }
 
-let Defs = [EFLAGS] in {
-def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
-                   "ucomisd\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>;
-def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
-                   "ucomisd\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>;
-} // Defs = [EFLAGS]
+def : Pat<(extloadf32 addr:$src),
+          (CVTSS2SDrr (MOVSSrm addr:$src))>,
+      Requires<[HasSSE2, OptForSpeed]>;
 
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
-  def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
-                        (outs VR128:$dst),
-                        (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                        "cmp${cc}sd\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
-                                           VR128:$src, imm:$cc))]>;
-  def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
-                        (outs VR128:$dst),
-                        (ins VR128:$src1, f64mem:$src, SSECC:$cc),
-                        "cmp${cc}sd\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
-                                           (load addr:$src), imm:$cc))]>;
+// Convert doubleword to packed single/double fp
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                     TB, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                      "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     TB, VEX, Requires<[HasAVX, HasSSE2]>;
 }
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                       "ucomisd\t{$src2, $src1|$src1, $src2}",
-                       [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
-                                               VR128:$src2))]>;
-def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
-                       "ucomisd\t{$src2, $src1|$src1, $src2}",
-                       [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
-                                               (load addr:$src2)))]>;
-
-def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                      "comisd\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
-                                             VR128:$src2))]>;
-def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                      "comisd\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
-                                             (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE2 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
-    canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
-                 [(set FR64:$dst, fpimm0)]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
-// disregarded.
-let neverHasSideEffects = 1 in
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                     "movapd\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
-// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
-                     "movapd\t{$src, $dst|$dst, $src}",
-                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
-
-//===---------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movapd\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
-
-def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movapd\t{$src, $dst|$dst, $src}",
-                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movupd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
-def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movupd\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (loadv2f64 addr:$src))]>;
-def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movupd\t{$src, $dst|$dst, $src}",
-                   [(store (v2f64 VR128:$src), addr:$dst)]>;
-
-// Intrinsic forms of MOVUPD load and store
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "movupd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
-def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                       "movupd\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-
-let Constraints = "$src1 = $dst" in {
-  let AddedComplexity = 20 in {
-    def MOVLPDrm : PDI<0x12, MRMSrcMem,
-                       (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movlpd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (v2f64 (movlp VR128:$src1,
-                                 (scalar_to_vector (loadf64 addr:$src2)))))]>;
-    def MOVHPDrm : PDI<0x16, MRMSrcMem,
-                       (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movhpd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (v2f64 (movlhps VR128:$src1,
-                                 (scalar_to_vector (loadf64 addr:$src2)))))]>;
-  } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movlpd\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract (v2f64 VR128:$src),
-                                 (iPTR 0))), addr:$dst)]>;
-
-// v2f64 extract element 1 is always custom lowered to unpack high to low
-// and extract element 0 so the non-store version isn't too horrible.
-def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movhpd\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract
-                                 (v2f64 (unpckh VR128:$src, (undef))),
-                                 (iPTR 0))), addr:$dst)]>;
-
-// SSE2 instructions without OpSize prefix
 def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -1692,7 +1048,18 @@
                                         (bitconvert (memopv2i64 addr:$src))))]>,
                      TB, Requires<[HasSSE2]>;
 
-// SSE2 instructions with XS prefix
+// FIXME: why the non-intrinsic version is described as SSE3?
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                     XS, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     XS, VEX, Requires<[HasAVX, HasSSE2]>;
+}
 def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -1703,6 +1070,29 @@
                                         (bitconvert (memopv2i64 addr:$src))))]>,
                      XS, Requires<[HasSSE2]>;
 
+// Convert packed single/double fp to doubleword
+let isAsmParserOnly = 1 in {
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+                        VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+                         (ins f128mem:$src),
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+                                            (memop addr:$src)))]>, VEX;
+}
 def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -1710,12 +1100,54 @@
                          "cvtps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq
                                             (memop addr:$src)))]>;
-// SSE2 packed instructions with XS prefix
+
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                     XD, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+                                          (memop addr:$src)))]>,
+                     XD, VEX, Requires<[HasAVX, HasSSE2]>;
+}
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                     XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+                                          (memop addr:$src)))]>,
+                     XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
 def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
 
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                              (int_x86_sse2_cvttps2dq VR128:$src))]>,
+                      XS, VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+                                           (memop addr:$src)))]>,
+                      XS, VEX, Requires<[HasAVX, HasSSE2]>;
+}
 def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
@@ -1727,17 +1159,18 @@
                                            (memop addr:$src)))]>,
                       XS, Requires<[HasSSE2]>;
 
-// SSE2 packed instructions with XD prefix
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
-                     XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
-                                          (memop addr:$src)))]>,
-                     XD, Requires<[HasSSE2]>;
-
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+                            (ins VR128:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+                       VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+                          (ins f128mem:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                             (memop addr:$src)))]>, VEX;
+}
 def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                           "cvttpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -1746,12 +1179,31 @@
                           [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                              (memop addr:$src)))]>;
 
-// SSE2 instructions without OpSize prefix
+// Convert packed single to packed double
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+                       Requires<[HasAVX]>;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+                       Requires<[HasAVX]>;
+}
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
 
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                     VEX, Requires<[HasAVX, HasSSE2]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+                                          (load addr:$src)))]>,
+                     VEX, Requires<[HasAVX, HasSSE2]>;
+}
 def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1762,12 +1214,29 @@
                                           (load addr:$src)))]>,
                      TB, Requires<[HasSSE2]>;
 
+// Convert packed double to packed single
+let isAsmParserOnly = 1 in {
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+// FIXME: the memory form of this instruction should described using
+// use extra asm syntax
+}
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
 
 
+let isAsmParserOnly = 1 in {
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+                         (ins f128mem:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+                                            (memop addr:$src)))]>;
+}
 def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1776,214 +1245,1001 @@
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
                                             (memop addr:$src)))]>;
 
-// Match intrinsics which expect XMM operand(s).
-// Aliases for intrinsics
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+                            string asm, string asm_alt> {
+  def rr : SIi8<0xC2, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+                    asm, []>;
+  let mayLoad = 1 in
+  def rm : SIi8<0xC2, MRMSrcMem,
+                    (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+                    asm, []>;
+  // Accept explicit immediate argument form instead of comparison code.
+  let isAsmParserOnly = 1 in {
+    def rr_alt : SIi8<0xC2, MRMSrcReg,
+                  (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+                  asm_alt, []>;
+    let mayLoad = 1 in
+    def rm_alt : SIi8<0xC2, MRMSrcMem,
+                  (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+                  asm_alt, []>;
+  }
+}
+
+let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+  defm VCMPSS  : sse12_cmp_scalar<FR32, f32mem,
+                  "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+                  "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+                  XS, VEX_4V;
+  defm VCMPSD  : sse12_cmp_scalar<FR64, f64mem,
+                  "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                  "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+                  XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+  defm CMPSS  : sse12_cmp_scalar<FR32, f32mem,
+                    "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+                    "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
+  defm CMPSD  : sse12_cmp_scalar<FR64, f64mem,
+                    "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+                    "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+                         Intrinsic Int, string asm> {
+  def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+                        [(set VR128:$dst, (Int VR128:$src1,
+                                               VR128:$src, imm:$cc))]>;
+  def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+                        [(set VR128:$dst, (Int VR128:$src1,
+                                               (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isAsmParserOnly = 1 in {
+  defm Int_VCMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                       "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                       XS, VEX_4V;
+  defm Int_VCMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                       "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                       XD, VEX_4V;
+}
 let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
-                        (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
-                        "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
-                                           GR32:$src2))]>;
-def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
-                        "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
-                                           (loadi32 addr:$src2)))]>;
-def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
-                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                   "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
-                   [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
-                                      VR128:$src2))]>;
-def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                   "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
-                   [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
-                                      (load addr:$src2)))]>;
-def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
-                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       VR128:$src2))]>, XS,
-                    Requires<[HasSSE2]>;
-def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
-                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
-                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       (load addr:$src2)))]>, XS,
-                    Requires<[HasSSE2]>;
+  defm Int_CMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                       "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+  defm Int_CMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                       "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+                            ValueType vt, X86MemOperand x86memop,
+                            PatFrag ld_frag, string OpcodeStr, Domain d> {
+  def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+  def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1),
+                                           (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+  let isAsmParserOnly = 1 in {
+    defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                    "ucomiss", SSEPackedSingle>, VEX;
+    defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                    "ucomisd", SSEPackedDouble>, OpSize, VEX;
+    let Pattern = []<dag> in {
+      defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                      "comiss", SSEPackedSingle>, VEX;
+      defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                      "comisd", SSEPackedDouble>, OpSize, VEX;
+    }
+
+    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                              load, "ucomiss", SSEPackedSingle>, VEX;
+    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                              load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+                              load, "comiss", SSEPackedSingle>, VEX;
+    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+                              load, "comisd", SSEPackedDouble>, OpSize, VEX;
+  }
+  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                  "ucomiss", SSEPackedSingle>, TB;
+  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                  "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+  let Pattern = []<dag> in {
+    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                    "comiss", SSEPackedSingle>, TB;
+    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                    "comisd", SSEPackedDouble>, TB, OpSize;
+  }
+
+  defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                              load, "ucomiss", SSEPackedSingle>, TB;
+  defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                              load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+  defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+                                  "comiss", SSEPackedSingle>, TB;
+  defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+                                  "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+                            Intrinsic Int, string asm, string asm_alt,
+                            Domain d> {
+  def rri : PIi8<0xC2, MRMSrcReg,
+             (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+             [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+  def rmi : PIi8<0xC2, MRMSrcMem,
+             (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+             [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+  // Accept explicit immediate argument form instead of comparison code.
+  let isAsmParserOnly = 1 in {
+    def rri_alt : PIi8<0xC2, MRMSrcReg,
+               (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+               asm_alt, [], d>;
+    def rmi_alt : PIi8<0xC2, MRMSrcMem,
+               (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+               asm_alt, [], d>;
+  }
+}
+
+let isAsmParserOnly = 1 in {
+  defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+                 "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedSingle>, VEX_4V;
+  defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedDouble>, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+  defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+                 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+                 "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+                 SSEPackedSingle>, TB;
+  defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+                 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+                 "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+                 SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+                         ValueType vt, string asm, PatFrag mem_frag,
+                         Domain d, bit IsConvertibleToThreeAddress = 0> {
+  def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
+                   (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
+                   [(set VR128:$dst, (vt (shufp:$src3
+                            VR128:$src1, (mem_frag addr:$src2))))], d>;
+  let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+    def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
+                   [(set VR128:$dst,
+                            (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+            "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+            memopv4f32, SSEPackedSingle>, VEX_4V;
+  defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+            memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+                    TB;
+  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+                                   PatFrag mem_frag, RegisterClass RC,
+                                   X86MemOperand x86memop, string asm,
+                                   Domain d> {
+    def rr : PI<opc, MRMSrcReg,
+                (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                asm, [(set RC:$dst,
+                           (vt (OpNode RC:$src1, RC:$src2)))], d>;
+    def rm : PI<opc, MRMSrcMem,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                asm, [(set RC:$dst,
+                           (vt (OpNode RC:$src1,
+                                       (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+  let isAsmParserOnly = 1 in {
+    defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+          VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedSingle>, VEX_4V;
+    defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+          VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedDouble>, OpSize, VEX_4V;
+    defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+          VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedSingle>, VEX_4V;
+    defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+          VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedDouble>, OpSize, VEX_4V;
+  }
+
+  let Constraints = "$src1 = $dst" in {
+    defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+          VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+    defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+          VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedDouble>, TB, OpSize;
+    defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+          VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+    defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+          VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedDouble>, TB, OpSize;
+  } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+                                Domain d> {
+  def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                     [(set GR32:$dst, (Int RC:$src))], d>;
+}
+
+// Mask creation
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+                                     SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+                                     SSEPackedDouble>, TB, OpSize;
+
+let isAsmParserOnly = 1 in {
+  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+                                        "movmskps", SSEPackedSingle>, VEX;
+  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+                                        "movmskpd", SSEPackedDouble>, OpSize,
+                                        VEX;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+    canFoldAsLoad = 1 in {
+  // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                 [(set FR32:$dst, fp32imm0)]>,
+                 Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                 [(set FR64:$dst, fpimm0)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                     "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+                     "movapd\t{$src, $dst|$dst, $src}", []>;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+                     "movaps\t{$src, $dst|$dst, $src}",
+                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+                     "movapd\t{$src, $dst|$dst, $src}",
+                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+                                       SDNode OpNode, bit MayLoad = 0> {
+  let isAsmParserOnly = 1 in {
+    defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode, FR32,
+                f32, f128mem, memopfsf32, SSEPackedSingle, MayLoad>, VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode, FR64,
+                f64, f128mem, memopfsf64, SSEPackedDouble, MayLoad>, OpSize,
+                VEX_4V;
+  }
+
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "ps\t{$src2, $dst|$dst, $src2}"), OpNode, FR32, f32,
+                f128mem, memopfsf32, SSEPackedSingle, MayLoad>, TB;
+
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "pd\t{$src2, $dst|$dst, $src2}"), OpNode, FR64, f64,
+                f128mem, memopfsf64, SSEPackedDouble, MayLoad>, TB, OpSize;
+  }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+  defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+                                 SDNode OpNode, int HasPat = 0,
+                                 list<list<dag>> Pattern = []> {
+  let isAsmParserOnly = 1 in {
+    defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+         !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+         f128mem,
+         !if(HasPat, Pattern[0], // rr
+                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+                                                      VR128:$src2)))]),
+         !if(HasPat, Pattern[2], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))])>,
+                                               VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+         !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+         f128mem,
+         !if(HasPat, Pattern[1], // rr
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (bc_v2i64 (v2f64
+                                               VR128:$src2))))]),
+         !if(HasPat, Pattern[3], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))])>,
+                                                               OpSize, VEX_4V;
+  }
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+         !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), f128mem,
+         !if(HasPat, Pattern[0], // rr
+                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+                                                      VR128:$src2)))]),
+         !if(HasPat, Pattern[2], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))])>, TB;
+
+    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+         !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), f128mem,
+         !if(HasPat, Pattern[1], // rr
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (bc_v2i64 (v2f64
+                                               VR128:$src2))))]),
+         !if(HasPat, Pattern[3], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))])>,
+                                                                    TB, OpSize;
+  }
+}
+
+defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+  defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
+    // single r+r
+    [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
+                                       (bc_v2i64 (v4i32 immAllOnesV))),
+                                   VR128:$src2)))],
+    // double r+r
+    [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+                                 (bc_v2i64 (v2f64 VR128:$src2))))],
+    // single r+m
+    [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
+                                       (bc_v2i64 (v4i32 immAllOnesV))),
+                                  (memopv2i64 addr:$src2))))],
+    // double r+m
+    [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+                           (memopv2i64 addr:$src2)))]]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_rm - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation.  This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+multiclass basic_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
+                                   SDNode OpNode> {
+
+  let isAsmParserOnly = 1 in {
+    defm V#NAME#SS : sse12_fp_scalar<opc,
+        !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                   OpNode, FR32, f32mem>, XS, VEX_4V;
+
+    defm V#NAME#SD : sse12_fp_scalar<opc,
+        !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                   OpNode, FR64, f64mem>, XD, VEX_4V;
+
+    defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                      "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+                      VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle>,
+                      VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                      "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+                      VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble>,
+                      OpSize, VEX_4V;
+
+    defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  "", "_ss", ssmem, sse_load_f32>, XS, VEX_4V;
+
+    defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  "2", "_sd", sdmem, sse_load_f64>, XD, VEX_4V;
+  }
+
+  let Constraints = "$src1 = $dst" in {
+    defm SS : sse12_fp_scalar<opc,
+                    !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                    OpNode, FR32, f32mem>, XS;
+
+    defm SD : sse12_fp_scalar<opc,
+                    !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+                    OpNode, FR64, f64mem>, XD;
+
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "ps\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v4f32,
+                f128mem, memopv4f32, SSEPackedSingle>, TB;
+
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "pd\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v2f64,
+                f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
+
+    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                  "", "_ss", ssmem, sse_load_f32>, XS;
+
+    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+                  "2", "_sd", sdmem, sse_load_f64>, XD;
+  }
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_rm<0x58, "add", fadd>;
+defm MUL : basic_sse12_fp_binop_rm<0x59, "mul", fmul>;
+
+let isCommutable = 0 in {
+  defm SUB : basic_sse12_fp_binop_rm<0x5C, "sub", fsub>;
+  defm DIV : basic_sse12_fp_binop_rm<0x5E, "div", fdiv>;
+}
+
+/// sse12_fp_binop_rm - Other SSE 1 & 2 binops
+///
+/// This multiclass is like basic_sse12_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form.  Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
+                             SDNode OpNode> {
+
+  let isAsmParserOnly = 1 in {
+    // Scalar operation, reg+reg.
+    defm V#NAME#SS : sse12_fp_scalar<opc,
+      !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 OpNode, FR32, f32mem>, XS, VEX_4V;
+
+    defm V#NAME#SD : sse12_fp_scalar<opc,
+      !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 OpNode, FR64, f64mem>, XD, VEX_4V;
+
+    defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                      "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+                      VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle>,
+                      VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                      "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
+                      VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble>,
+                      OpSize, VEX_4V;
+
+    defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  "", "_ss", ssmem, sse_load_f32>, XS, VEX_4V;
+
+    defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  "2", "_sd", sdmem, sse_load_f64>, XD, VEX_4V;
+
+    defm V#NAME#PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  "", "_ps", f128mem, memopv4f32, SSEPackedSingle>, VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  "2", "_pd", f128mem, memopv2f64, SSEPackedDouble>, OpSize,
+                  VEX_4V;
+  }
+
+  let Constraints = "$src1 = $dst" in {
+    // Scalar operation, reg+reg.
+    defm SS : sse12_fp_scalar<opc,
+                    !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                    OpNode, FR32, f32mem>, XS;
+    defm SD : sse12_fp_scalar<opc,
+                    !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+                    OpNode, FR64, f64mem>, XD;
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "ps\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v4f32,
+                f128mem, memopv4f32, SSEPackedSingle>, TB;
+
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
+                "pd\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v2f64,
+                f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
+
+    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                  "", "_ss", ssmem, sse_load_f32>, XS;
+
+    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+                  "2", "_sd", sdmem, sse_load_f64>, XD;
+
+    defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+                  "", "_ps", f128mem, memopv4f32, SSEPackedSingle>, TB;
+
+    defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+       !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+                  "2", "_pd", f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
+  }
 }
 
-// Arithmetic
+let isCommutable = 0 in {
+  defm MAX : sse12_fp_binop_rm<0x5F, "max", X86fmax>;
+  defm MIN : sse12_fp_binop_rm<0x5D, "min", X86fmin>;
+}
 
-/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
-///
+/// Unop Arithmetic
 /// In addition, we also have a special variant of the scalar form here to
 /// represent the associated intrinsic operation.  This form is unlike the
 /// plain scalar form, in that it takes an entire vector (instead of a
 /// scalar) and leaves the top elements undefined.
 ///
 /// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
-                           SDNode OpNode,
-                           Intrinsic F64Int,
-                           Intrinsic V2F64Int,
-                           bit Commutable = 0> {
-  // Scalar operation, reg.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic F32Int> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode FR32:$src))]>;
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+            Requires<[HasSSE1, OptForSize]>;
+  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F32Int VR128:$src))]>;
+  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic V4F32Int> {
+  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+                              SDNode OpNode, Intrinsic F32Int> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+                !strconcat(!strconcat("v", OpcodeStr),
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+                !strconcat(!strconcat("v", OpcodeStr),
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                []>, XS, Requires<[HasAVX, HasSSE1, OptForSize]>;
+  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+                (ins VR128:$src1, VR128:$src2),
+                !strconcat(!strconcat("v", OpcodeStr),
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+                (ins VR128:$src1, ssmem:$src2),
+                !strconcat(!strconcat("v", OpcodeStr),
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic F64Int> {
   def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode FR64:$src))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Scalar operation, mem.
+                [(set FR64:$dst, (OpNode FR64:$src))]>;
   def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
-
-  // Vector operation, reg.
-  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Vector operation, mem.
-  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
-
-  // Intrinsic operation, reg.
   def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int VR128:$src))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Intrinsic operation, mem.
+                    [(set VR128:$dst, (F64Int VR128:$src))]>;
   def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
 
-  // Vector intrinsic operation, reg
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic V2F64Int> {
+  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
   def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V2F64Int VR128:$src))]> {
-    let isCommutable = Commutable;
-  }
-
-  // Vector intrinsic operation, mem
+                    [(set VR128:$dst, (V2F64Int VR128:$src))]>;
   def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
 }
 
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+                              SDNode OpNode, Intrinsic F64Int> {
+  def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+                !strconcat(OpcodeStr,
+                           "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
+                (ins FR64:$src1, f64mem:$src2),
+                !strconcat(OpcodeStr,
+                           "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>;
+  def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, sdmem:$src2),
+           !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>;
+}
+
+let isAsmParserOnly = 1 in {
+  // Square root.
+  let Predicates = [HasAVX, HasSSE2] in {
+  defm VSQRT  : sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+                  VEX_4V;
+
+  defm VSQRT  : sse2_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_pd>, VEX;
+  }
+
+  let Predicates = [HasAVX, HasSSE1] in {
+  defm VSQRT  : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+                  VEX_4V;
+  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ps>, VEX;
+  // Reciprocal approximations. Note that these typically require refinement
+  // in order to obtain suitable precision.
+  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+                                   int_x86_sse_rsqrt_ss>, VEX_4V;
+  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>,
+                                   VEX;
+  defm VRCP   : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+                                   VEX_4V;
+  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ps>,
+                                   VEX;
+  }
+}
+
 // Square root.
-defm SQRT  : sse2_fp_unop_rm<0x51, "sqrt",  fsqrt,
-                             int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss>,
+             sse1_fp_unop_p<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ps>,
+             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd>,
+             sse2_fp_unop_p<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>;
+defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+             sse1_fp_unop_p<0x53, "rcp", X86frcp, int_x86_sse_rcp_ps>;
 
 // There is no f64 version of the reciprocal approximation instructions.
 
-let Constraints = "$src1 = $dst" in {
-  def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                    "cmp${cc}pd\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
-                                                        VR128:$src, imm:$cc))]>;
-  def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
-                  (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
-                  "cmp${cc}pd\t{$src, $dst|$dst, $src}",
-                  [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
-                                                 (memop addr:$src), imm:$cc))]>;
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
 
-  // Accept explicit immediate argument form instead of comparison code.
 let isAsmParserOnly = 1 in {
-  def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
-                    "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-  def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
-                  (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
-                  "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+  def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+                         (ins i128mem:$dst, VR128:$src),
+                         "movntps\t{$src, $dst|$dst, $src}",
+                         [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+  def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+                         (ins i128mem:$dst, VR128:$src),
+                         "movntpd\t{$src, $dst|$dst, $src}",
+                         [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+
+  let ExeDomain = SSEPackedInt in
+    def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+                       (ins f128mem:$dst, VR128:$src),
+                       "movntdq\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+  let AddedComplexity = 400 in { // Prefer non-temporal versions
+    def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+                         (ins f128mem:$dst, VR128:$src),
+                         "movntps\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v4f32 VR128:$src),
+                                                   addr:$dst)]>, VEX;
+    def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+                         (ins f128mem:$dst, VR128:$src),
+                         "movntpd\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v2f64 VR128:$src),
+                                                   addr:$dst)]>, VEX;
+    def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+                          (ins f128mem:$dst, VR128:$src),
+                          "movntdq\t{$src, $dst|$dst, $src}",
+                          [(alignednontemporalstore (v2f64 VR128:$src),
+                                                    addr:$dst)]>, VEX;
+    let ExeDomain = SSEPackedInt in
+    def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+                        (ins f128mem:$dst, VR128:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(alignednontemporalstore (v4f32 VR128:$src),
+                                                  addr:$dst)]>, VEX;
+  }
 }
+
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                    "movntps\t{$src, $dst|$dst, $src}",
+                    [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                        "movntpd\t{$src, $dst|$dst, $src}",
+                        [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntps\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntpd\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "movnti\t{$src, $dst|$dst, $src}",
+                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+               TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                     "movnti\t{$src, $dst|$dst, $src}",
+                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+                  TB, Requires<[HasSSE2]>;
+
 }
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
-          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
-          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+def MOVNTImr_Int  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                    "movnti\t{$src, $dst|$dst, $src}",
+                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+                  TB, Requires<[HasSSE2]>;
 
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
-  def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
-                 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
-                 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                 [(set VR128:$dst,
-                   (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
-  def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1,
-                         f128mem:$src2, i8imm:$src3),
-                        "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                        [(set VR128:$dst,
-                          (v2f64 (shufp:$src3
-                                  VR128:$src1, (memopv2f64 addr:$src2))))]>;
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
 
-  let AddedComplexity = 10 in {
-    def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpckhpd\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
-    def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
-                         (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpckhpd\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (v2f64 (unpckh VR128:$src1,
-                                          (memopv2f64 addr:$src2))))]>;
+// Prefetch intrinsic.
+def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
 
-    def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpcklpd\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
-    def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
-                         (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpcklpd\t{$src2, $dst|$dst, $src2}",
-                         [(set VR128:$dst,
-                           (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
-  } // AddedComplexity
-} // Constraints = "$src1 = $dst"
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+             TB, Requires<[HasSSE1]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
 
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+  def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                    "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+  def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                    "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+}
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
 
 //===---------------------------------------------------------------------===//
-// SSE integer instructions
-let ExeDomain = SSEPackedInt in {
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let isAsmParserOnly = 1 in {
+  let neverHasSideEffects = 1 in
+  def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+  let canFoldAsLoad = 1, mayLoad = 1 in {
+  def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}",
+                     [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
+                     VEX;
+  def VMOVDQUrm :  I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                     "vmovdqu\t{$src, $dst|$dst, $src}",
+                     [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+                   XS, VEX, Requires<[HasAVX, HasSSE2]>;
+  }
+
+  let mayStore = 1 in {
+  def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+                     (ins i128mem:$dst, VR128:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}",
+                     [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
+  def VMOVDQUmr :  I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                     "vmovdqu\t{$src, $dst|$dst, $src}",
+                     [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+                   XS, VEX, Requires<[HasAVX, HasSSE2]>;
+  }
+}
 
-// Move Instructions
 let neverHasSideEffects = 1 in
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, mayLoad = 1 in
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
-let mayStore = 1 in
-def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                   "movdqa\t{$src, $dst|$dst, $src}",
-                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
-let canFoldAsLoad = 1, mayLoad = 1 in
 def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqu\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
                  XS, Requires<[HasSSE2]>;
-let mayStore = 1 in
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}",
+                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
 def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                    "movdqu\t{$src, $dst|$dst, $src}",
                    [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
                  XS, Requires<[HasSSE2]>;
+}
 
 // Intrinsic forms of MOVDQU load and store
+let isAsmParserOnly = 1 in {
+let canFoldAsLoad = 1 in
+def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "vmovdqu\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+                     XS, VEX, Requires<[HasAVX, HasSSE2]>;
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                       "vmovdqu\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                     XS, VEX, Requires<[HasAVX, HasSSE2]>;
+}
+
 let canFoldAsLoad = 1 in
 def MOVDQUrm_Int :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movdqu\t{$src, $dst|$dst, $src}",
@@ -1994,55 +2250,72 @@
                        [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
                      XS, Requires<[HasSSE2]>;
 
-let Constraints = "$src1 = $dst" in {
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
 multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
-                            bit Commutable = 0> {
+                            bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-                               (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
-    let isCommutable = Commutable;
-  }
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-                               (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (IntId VR128:$src1,
-                                        (bitconvert (memopv2i64
-                                                     addr:$src2))))]>;
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1,
+                                (bitconvert (memopv2i64 addr:$src2))))]>;
 }
 
 multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
-                             string OpcodeStr,
-                             Intrinsic IntId, Intrinsic IntId2> {
+                             string OpcodeStr, Intrinsic IntId,
+                             Intrinsic IntId2, bit Is2Addr = 1> {
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-                               (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-                               (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (IntId VR128:$src1,
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1,
                                       (bitconvert (memopv2i64 addr:$src2))))]>;
   def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
-                                (ins VR128:$src1, i32i8imm:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+       (ins VR128:$src1, i32i8imm:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
 }
 
 /// PDI_binop_rm - Simple SSE2 binary operator.
 multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                        ValueType OpVT, bit Commutable = 0> {
+                        ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-                               (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
-    let isCommutable = Commutable;
-  }
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-                               (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
                                      (bitconvert (memopv2i64 addr:$src2)))))]>;
 }
 
@@ -2052,64 +2325,177 @@
 /// to collapse (bitconvert VT to VT) into its operand.
 ///
 multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                              bit Commutable = 0> {
+                              bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-               (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
-    let isCommutable = Commutable;
-  }
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-               (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (OpNode VR128:$src1,
-               (memopv2i64 addr:$src2)))]>;
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
 }
 
-} // Constraints = "$src1 = $dst"
 } // ExeDomain = SSEPackedInt
 
 // 128-bit Integer Arithmetic
 
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
-defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
-
-defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
-defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ  : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+                                 VEX_4V;
+defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+                                 VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+                                 VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+                                 VEX_4V;
+defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+                                 VEX_4V;
+defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+                                 VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+                                 VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+                                 VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+                                 VEX_4V;
+defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+                                 VEX_4V;
+defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+                                 VEX_4V;
+defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+                                 VEX_4V;
+defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+                                 VEX_4V;
+defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+                                 VEX_4V;
+defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+                                 VEX_4V;
+defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+                                 VEX_4V;
+}
 
+let Constraints = "$src1 = $dst" in {
+defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ  : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
 defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
 defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
 defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
 defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
 
+// Intrinsic forms
 defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
 defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
 defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
 defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
-
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
-
+defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
 defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
-defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
 defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
-
 defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
 
-defm PAVGB  : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
-defm PAVGW  : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
 
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+                                VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+                                int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+                                VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+                                int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+                                VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+                                int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+                                VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+                                int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+                                VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+                                int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+                                VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+                                int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+                                VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+                                VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR  : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
 
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def VPSLLDQri : PDIi8<0x73, MRM7r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    def VPSRLDQri : PDIi8<0x73, MRM3r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def VPANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              VR128:$src2)))]>, VEX_4V;
 
+  def VPANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              (memopv2i64 addr:$src2))))]>,
+                                              VEX_4V;
+}
+}
 
+let Constraints = "$src1 = $dst" in {
 defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
 defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
@@ -2129,17 +2515,34 @@
 defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
 
-// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
-    ExeDomain = SSEPackedInt in {
-  def PSLLDQri : PDIi8<0x73, MRM7r,
-                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                       "pslldq\t{$src2, $dst|$dst, $src2}", []>;
-  def PSRLDQri : PDIi8<0x73, MRM3r,
-                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                       "psrldq\t{$src2, $dst|$dst, $src2}", []>;
-  // PSRADQri doesn't exist in SSE[1-3].
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def PSLLDQri : PDIi8<0x73, MRM7r,
+                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                         "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+    def PSRLDQri : PDIi8<0x73, MRM3r,
+                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                         "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def PANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "pandn\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              VR128:$src2)))]>;
+
+  def PANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "pandn\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              (memopv2i64 addr:$src2))))]>;
 }
+} // Constraints = "$src1 = $dst"
 
 let Predicates = [HasSSE2] in {
   def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
@@ -2160,32 +2563,33 @@
             (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
 }
 
-// Logical
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-
-let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
-  def PANDNrr : PDI<0xDF, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "pandn\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
-                                              VR128:$src2)))]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
 
-  def PANDNrm : PDI<0xDF, MRMSrcMem,
-                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                    "pandn\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
-                                              (memopv2i64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+  defm VPCMPEQB  : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+                                    0>, VEX_4V;
+  defm VPCMPEQW  : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+                                    0>, VEX_4V;
+  defm VPCMPEQD  : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+                                    0>, VEX_4V;
+  defm VPCMPGTB  : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+                                    0>, VEX_4V;
+  defm VPCMPGTW  : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+                                    0>, VEX_4V;
+  defm VPCMPGTD  : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+                                    0>, VEX_4V;
 }
 
-// SSE2 Integer comparison
-defm PCMPEQB  : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
-defm PCMPEQW  : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
-defm PCMPEQD  : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
-defm PCMPGTB  : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
-defm PCMPGTW  : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
-defm PCMPGTD  : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+let Constraints = "$src1 = $dst" in {
+  defm PCMPEQB  : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+  defm PCMPEQW  : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+  defm PCMPEQD  : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+  defm PCMPGTB  : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+  defm PCMPGTW  : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+  defm PCMPGTD  : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
 
 def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
           (PCMPEQBrr VR128:$src1, VR128:$src2)>;
@@ -2213,72 +2617,138 @@
 def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
           (PCMPGTDrm VR128:$src1, addr:$src2)>;
 
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+                                  0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+                                  0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+                                  0, 0>, VEX_4V;
+}
 
-// Pack instructions
+let Constraints = "$src1 = $dst" in {
 defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
 defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
 defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
 
 let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+                         PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+              (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+              !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+                                                      (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+              (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+              !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (pshuf_frag:$src2
+                                      (bc_frag (memopv2i64 addr:$src1)),
+                                      (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
 
-// Shuffle and unpack instructions
-let AddedComplexity = 5 in {
-def PSHUFDri : PDIi8<0x70, MRMSrcReg,
-                     (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
-                     "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set VR128:$dst, (v4i32 (pshufd:$src2
-                                               VR128:$src1, (undef))))]>;
-def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
-                     (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
-                     "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set VR128:$dst, (v4i32 (pshufd:$src2
-                                             (bc_v4i32 (memopv2i64 addr:$src1)),
-                                             (undef))))]>;
-}
-
-// SSE2 with ImmT == Imm8 and XS prefix.
-def PSHUFHWri : Ii8<0x70, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
-                    "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
-                                                            (undef))))]>,
-                XS, Requires<[HasSSE2]>;
-def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
-                    (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
-                    "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (v8i16 (pshufhw:$src2
-                                            (bc_v8i16 (memopv2i64 addr:$src1)),
-                                            (undef))))]>,
-                XS, Requires<[HasSSE2]>;
-
-// SSE2 with ImmT == Imm8 and XD prefix.
-def PSHUFLWri : Ii8<0x70, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
-                    "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
-                                                            (undef))))]>,
-                XD, Requires<[HasSSE2]>;
-def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
-                    (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
-                    "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (v8i16 (pshuflw:$src2
-                                             (bc_v8i16 (memopv2i64 addr:$src1)),
-                                             (undef))))]>,
-                XD, Requires<[HasSSE2]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+  let AddedComplexity = 5 in
+  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+                               VEX;
+
+  // SSE2 with ImmT == Imm8 and XS prefix.
+  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+                               VEX;
+
+  // SSE2 with ImmT == Imm8 and XD prefix.
+  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+                               VEX;
+}
+
+let Predicates = [HasSSE2] in {
+  let AddedComplexity = 5 in
+  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+  // SSE2 with ImmT == Imm8 and XS prefix.
+  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+  // SSE2 with ImmT == Imm8 and XD prefix.
+  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
 
-// Unpack instructions
+let ExeDomain = SSEPackedInt in {
 multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
-                       PatFrag unp_frag, PatFrag bc_frag> {
+                       PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
   def rr : PDI<opc, MRMSrcReg,
-               (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
   def rm : PDI<opc, MRMSrcMem,
-               (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
-               [(set VR128:$dst, (unp_frag VR128:$src1,
-                                           (bc_frag (memopv2i64
-                                                        addr:$src2))))]>;
+      (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set VR128:$dst, (unp_frag VR128:$src1,
+                                  (bc_frag (memopv2i64
+                                               addr:$src2))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+                                 0>, VEX_4V;
+  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+                                 0>, VEX_4V;
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+                                 0>, VEX_4V;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+  def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                         "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+                                 0>, VEX_4V;
+  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+                                 0>, VEX_4V;
+  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+                                 0>, VEX_4V;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+  def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                        "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -2319,102 +2789,117 @@
                                          (memopv2i64 addr:$src2))))]>;
 }
 
-// Extract / Insert
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+  def rri : Ii8<0xC4, MRMSrcReg,
+       (outs VR128:$dst), (ins VR128:$src1,
+        GR32:$src2, i32i8imm:$src3),
+       !if(Is2Addr,
+           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+       [(set VR128:$dst,
+         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+  def rmi : Ii8<0xC4, MRMSrcMem,
+                       (outs VR128:$dst), (ins VR128:$src1,
+                        i16mem:$src2, i32i8imm:$src3),
+       !if(Is2Addr,
+           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+       [(set VR128:$dst,
+         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+                    imm:$src3))]>;
+}
+
+// Extract
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+                                                imm:$src2))]>, OpSize, VEX;
 def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
                                                 imm:$src2))]>;
-let Constraints = "$src1 = $dst" in {
-  def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
-                       (outs VR128:$dst), (ins VR128:$src1,
-                        GR32:$src2, i32i8imm:$src3),
-                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                       [(set VR128:$dst,
-                         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
-  def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
-                       (outs VR128:$dst), (ins VR128:$src1,
-                        i16mem:$src2, i32i8imm:$src3),
-                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                       [(set VR128:$dst,
-                         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
-                                    imm:$src3))]>;
-}
 
-// Mask creation
-def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                     "pmovmskb\t{$src, $dst|$dst, $src}",
-                     [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
-
-// Conditional store
-let Uses = [EDI] in
-def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
-                     "maskmovdqu\t{$mask, $src|$src, $mask}",
-                     [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+// Insert
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in
+  defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
 
-let Uses = [RDI] in
-def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
-                     "maskmovdqu\t{$mask, $src|$src, $mask}",
-                     [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+let Constraints = "$src1 = $dst" in
+  defm VPINSRW : sse2_pinsrw, TB, OpSize;
 
 } // ExeDomain = SSEPackedInt
 
-// Non-temporal stores
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                        "movntpd\t{$src, $dst|$dst, $src}",
-                        [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                        "movntdq\t{$src, $dst|$dst, $src}",
-                        [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr_Int  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                    "movnti\t{$src, $dst|$dst, $src}",
-                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
-                  TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
 
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntpd\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in {
 
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntdq\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-}
+let isAsmParserOnly = 1 in
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
 
-// Flush cache
-def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
-               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
-              TB, Requires<[HasSSE2]>;
+} // ExeDomain = SSEPackedInt
 
-// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
-               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
-               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
 
-// Pause. This "instruction" is encoded as "rep; nop", so even though it
-// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+let ExeDomain = SSEPackedInt in {
 
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
-           (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
-           (i8 1)), (MFENCE)>;
+let isAsmParserOnly = 1 in {
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+           (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+           (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+}
 
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
-  // FIXME: Change encoding to pseudo.
-  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
-                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
 
+// Move Int Doubleword to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+                      VEX;
+}
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -2424,6 +2909,18 @@
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
 
+
+// Move Int Doubleword to Single Scalar
+let isAsmParserOnly = 1 in {
+def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+                      VEX;
+}
 def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2432,20 +2929,18 @@
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
 
-// SSE2 instructions with XS prefix
-def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                    "movq\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst,
-                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
-                  Requires<[HasSSE2]>;
-def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
-                      "movq\t{$src, $dst|$dst, $src}",
-                      [(store (i64 (vector_extract (v2i64 VR128:$src),
-                                    (iPTR 0))), addr:$dst)]>;
-
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
-          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
+// Move Packed Doubleword Int to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                                        (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
+                       (ins i32mem:$dst, VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                                     (iPTR 0))), addr:$dst)]>, VEX;
+}
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2455,6 +2950,15 @@
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)]>;
 
+// Move Scalar Single to Double Int
+let isAsmParserOnly = 1 in {
+def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+}
 def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -2462,44 +2966,107 @@
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
 
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15, isAsmParserOnly = 1 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (v4i32 (X86vzmovl
+                                      (v4i32 (scalar_to_vector GR32:$src)))))]>,
+                                      VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       [(set VR128:$dst, (v2i64 (X86vzmovl
+                                      (v2i64 (scalar_to_vector GR64:$src)))))]>,
+                                      VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (v4i32 (X86vzmovl
+                                      (v4i32 (scalar_to_vector GR32:$src)))))]>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       [(set VR128:$dst, (v2i64 (X86vzmovl
+                                      (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+let isAsmParserOnly = 1 in
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+                                                   (loadi32 addr:$src))))))]>,
+                                                   VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+                                                   (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+            (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                    "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                    VEX, Requires<[HasAVX, HasSSE2]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                    "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                    Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}",
+                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                                    (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}",
+                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                                    (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
 // Store / copy lower 64-bits of a XMM register.
+let isAsmParserOnly = 1 in
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
 
-// movd / movq to XMM register zero-extends
-let AddedComplexity = 15 in {
-def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
-                       "movd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (v4i32 (X86vzmovl
-                                      (v4i32 (scalar_to_vector GR32:$src)))))]>;
-// This is X86-64 only.
-def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (v2i64 (X86vzmovl
-                                      (v2i64 (scalar_to_vector GR64:$src)))))]>;
-}
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "vmovq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+                                                 (loadi64 addr:$src))))))]>,
+                     XS, VEX, Requires<[HasAVX, HasSSE2]>;
 
 let AddedComplexity = 20 in {
-def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
-                       "movd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst,
-                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
-                                                   (loadi32 addr:$src))))))]>;
-
-def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
-            (MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
-            (MOVZDI2PDIrm addr:$src)>;
-def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
-            (MOVZDI2PDIrm addr:$src)>;
-
 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
-                                                 (loadi64 addr:$src))))))]>, XS,
-                   Requires<[HasSSE2]>;
+                                                 (loadi64 addr:$src))))))]>,
+                     XS, Requires<[HasSSE2]>;
 
 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
             (MOVZQI2PQIrm addr:$src)>;
@@ -2510,12 +3077,23 @@
 
 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
 // IA32 document. movq xmm1, xmm2 does clear the high bits.
+let isAsmParserOnly = 1, AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                      XS, VEX, Requires<[HasAVX, HasSSE2]>;
 let AddedComplexity = 15 in
 def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
                       XS, Requires<[HasSSE2]>;
 
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                        "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl
+                                             (loadv2i64 addr:$src))))]>,
+                      XS, VEX, Requires<[HasAVX, HasSSE2]>;
 let AddedComplexity = 20 in {
 def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
@@ -2527,49 +3105,136 @@
             (MOVZPQILo2PQIrm addr:$src)>;
 }
 
+// Instructions to match in the assembler
+let isAsmParserOnly = 1 in {
+// This instructions is in fact an alias to movd with 64 bit dst
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+}
+
 // Instructions for the disassembler
 // xr = XMM register
 // xm = mem64
 
+let isAsmParserOnly = 1 in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
 def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "movq\t{$src, $dst|$dst, $src}", []>, XS;
 
 //===---------------------------------------------------------------------===//
-// SSE3 Instructions
+// SSE2 - Misc Instructions
 //===---------------------------------------------------------------------===//
 
-// Move Instructions
-def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movshdup\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (v4f32 (movshdup
-                                                VR128:$src, (undef))))]>;
-def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "movshdup\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (movshdup
-                                         (memopv4f32 addr:$src), (undef)))]>;
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+              TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+           (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+           (i8 1)), (MFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+  // FIXME: Change encoding to pseudo.
+  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
+def VCVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
 
-def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movsldup\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (v4f32 (movsldup
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set VR128:$dst, (v4f32 (rep_frag
                                                 VR128:$src, (undef))))]>;
-def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "movsldup\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (movsldup
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set VR128:$dst, (rep_frag
                                          (memopv4f32 addr:$src), (undef)))]>;
+}
 
-def MOVDDUPrr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movddup\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
-def MOVDDUPrm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                      "movddup\t{$src, $dst|$dst, $src}",
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
+defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
                                       (undef))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in
+  defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let isAsmParserOnly = 1 in
+  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                     "vlddqu\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "lddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
 
 def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
                    (undef)),
           (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
 
+// Several Move patterns
 let AddedComplexity = 5 in {
 def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
           (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@@ -2581,52 +3246,98 @@
           (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
 }
 
-// Arithmetic
-let Constraints = "$src1 = $dst" in {
-  def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
-                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "addsubps\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
-                                           VR128:$src2))]>;
-  def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                        "addsubps\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
-                                           (memop addr:$src2)))]>;
-  def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
-                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                       "addsubpd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
-                                          VR128:$src2))]>;
-  def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
-                       (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                       "addsubpd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
-                                          (memop addr:$src2)))]>;
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+          (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+          (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+  def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+            (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+  def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+            (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+  def rr : I<0xD0, MRMSrcReg,
+       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (Int VR128:$src1,
+                          VR128:$src2))]>;
+  def rm : I<0xD0, MRMSrcMem,
+       (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (Int VR128:$src1,
+                          (memop addr:$src2)))]>;
+
 }
 
-def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                   "lddqu\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+let isAsmParserOnly = 1, Predicates = [HasSSE3, HasAVX],
+  ExeDomain = SSEPackedDouble in {
+  defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
+                              VEX_4V;
+  defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
+                              VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+    ExeDomain = SSEPackedDouble in {
+  defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
+  defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
 
 // Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
   : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
          [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
   : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+       !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
          [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
   : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
         [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
   : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
 
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
+  def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+  def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+  def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+  def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+  def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+  def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+  def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+  def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+}
+
 let Constraints = "$src1 = $dst" in {
   def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
   def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
@@ -2638,35 +3349,14 @@
   def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
 }
 
-// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
-                [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT   : I<0x01, MRM_C9, (outs), (ins), "mwait",
-                [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <1, 1, 3, 3>
-let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
-          (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
-          (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <0, 0, 2, 2>
-let AddedComplexity = 15 in
-  def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
-            (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-  def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
-            (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
 //===---------------------------------------------------------------------===//
-// SSSE3 Instructions
+// SSSE3 - Packed Absolute Instructions
 //===---------------------------------------------------------------------===//
 
-/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
-multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
-                              Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+                            PatFrag mem_frag64, PatFrag mem_frag128,
+                            Intrinsic IntId64, Intrinsic IntId128> {
   def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                    [(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -2674,7 +3364,7 @@
   def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                    [(set VR64:$dst,
-                     (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
+                     (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
 
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src),
@@ -2687,220 +3377,113 @@
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (IntId128
-                       (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
+                       (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
 }
 
-/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
-multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
-                               Intrinsic IntId64, Intrinsic IntId128> {
-  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-                   (ins VR64:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
-  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-                   (ins i64mem:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst,
-                     (IntId64
-                      (bitconvert (memopv4i16 addr:$src))))]>;
-
-  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
-                    (ins VR128:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (IntId128 VR128:$src))]>,
-                    OpSize;
+defm PABSB       : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
+                                    int_x86_ssse3_pabs_b,
+                                    int_x86_ssse3_pabs_b_128>;
+defm PABSW       : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
+                                    int_x86_ssse3_pabs_w,
+                                    int_x86_ssse3_pabs_w_128>;
+defm PABSD       : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
+                                    int_x86_ssse3_pabs_d,
+                                    int_x86_ssse3_pabs_d_128>;
 
-  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
-                    (ins i128mem:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst,
-                      (IntId128
-                       (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
 
-/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
-multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
-                               Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                             PatFrag mem_frag64, PatFrag mem_frag128,
+                             Intrinsic IntId64, Intrinsic IntId128,
+                             bit Is2Addr = 1> {
+  let isCommutable = 1 in
   def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-                   (ins VR64:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
+       (ins VR64:$src1, VR64:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
   def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-                   (ins i64mem:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst,
-                     (IntId64
-                      (bitconvert (memopv2i32 addr:$src))))]>;
+       (ins VR64:$src1, i64mem:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (memopv8i8 addr:$src2))))]>;
 
+  let isCommutable = 1 in
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
-                    (ins VR128:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (IntId128 VR128:$src))]>,
-                    OpSize;
-
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
   def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
-                    (ins i128mem:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst,
-                      (IntId128
-                       (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
-}
-
-defm PABSB       : SS3I_unop_rm_int_8 <0x1C, "pabsb",
-                                       int_x86_ssse3_pabs_b,
-                                       int_x86_ssse3_pabs_b_128>;
-defm PABSW       : SS3I_unop_rm_int_16<0x1D, "pabsw",
-                                       int_x86_ssse3_pabs_w,
-                                       int_x86_ssse3_pabs_w_128>;
-defm PABSD       : SS3I_unop_rm_int_32<0x1E, "pabsd",
-                                       int_x86_ssse3_pabs_d,
-                                       int_x86_ssse3_pabs_d_128>;
-
-/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
-let Constraints = "$src1 = $dst" in {
-  multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
-                                 Intrinsic IntId64, Intrinsic IntId128,
-                                 bit Commutable = 0> {
-    def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-                     (ins VR64:$src1, VR64:$src2),
-                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
-      let isCommutable = Commutable;
-    }
-    def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-                     (ins VR64:$src1, i64mem:$src2),
-                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR64:$dst,
-                       (IntId64 VR64:$src1,
-                        (bitconvert (memopv8i8 addr:$src2))))]>;
-
-    def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src2),
-                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                      [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
-                      OpSize {
-      let isCommutable = Commutable;
-    }
-    def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, i128mem:$src2),
-                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                      [(set VR128:$dst,
-                        (IntId128 VR128:$src1,
-                         (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
-  }
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
 }
 
-/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
-let Constraints = "$src1 = $dst" in {
-  multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
-                                  Intrinsic IntId64, Intrinsic IntId128,
-                                  bit Commutable = 0> {
-    def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-                     (ins VR64:$src1, VR64:$src2),
-                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
-      let isCommutable = Commutable;
-    }
-    def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-                     (ins VR64:$src1, i64mem:$src2),
-                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR64:$dst,
-                       (IntId64 VR64:$src1,
-                        (bitconvert (memopv4i16 addr:$src2))))]>;
-
-    def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src2),
-                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                      [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
-                      OpSize {
-      let isCommutable = Commutable;
-    }
-    def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, i128mem:$src2),
-                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                      [(set VR128:$dst,
-                        (IntId128 VR128:$src1,
-                         (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
-  }
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
+                                     int_x86_ssse3_phadd_w,
+                                     int_x86_ssse3_phadd_w_128>;
+  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
+                                     int_x86_ssse3_phadd_d,
+                                     int_x86_ssse3_phadd_d_128>;
+  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
+                                     int_x86_ssse3_phadd_sw,
+                                     int_x86_ssse3_phadd_sw_128>;
+  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
+                                     int_x86_ssse3_phsub_w,
+                                     int_x86_ssse3_phsub_w_128>;
+  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
+                                     int_x86_ssse3_phsub_d,
+                                     int_x86_ssse3_phsub_d_128>;
+  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
+                                     int_x86_ssse3_phsub_sw,
+                                     int_x86_ssse3_phsub_sw_128>;
+  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
+                                     int_x86_ssse3_pmadd_ub_sw,
+                                     int_x86_ssse3_pmadd_ub_sw_128>;
+  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
+                                     int_x86_ssse3_pshuf_b,
+                                     int_x86_ssse3_pshuf_b_128>;
+  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
+                                     int_x86_ssse3_psign_b,
+                                     int_x86_ssse3_psign_b_128>;
+  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
+                                     int_x86_ssse3_psign_w,
+                                     int_x86_ssse3_psign_w_128>;
+  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
+                                       int_x86_ssse3_psign_d,
+                                       int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
+                                     int_x86_ssse3_pmul_hr_sw,
+                                     int_x86_ssse3_pmul_hr_sw_128>;
 }
 
-/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
-let Constraints = "$src1 = $dst" in {
-  multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
-                                  Intrinsic IntId64, Intrinsic IntId128,
-                                  bit Commutable = 0> {
-    def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-                     (ins VR64:$src1, VR64:$src2),
-                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
-      let isCommutable = Commutable;
-    }
-    def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-                     (ins VR64:$src1, i64mem:$src2),
-                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR64:$dst,
-                       (IntId64 VR64:$src1,
-                        (bitconvert (memopv2i32 addr:$src2))))]>;
-
-    def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src2),
-                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                      [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
-                      OpSize {
-      let isCommutable = Commutable;
-    }
-    def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, i128mem:$src2),
-                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                      [(set VR128:$dst,
-                        (IntId128 VR128:$src1,
-                         (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
-  }
-}
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+          (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+          (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
 
-let ImmT = NoImm in {  // None of these have i8 immediate fields.
-defm PHADDW      : SS3I_binop_rm_int_16<0x01, "phaddw",
-                                        int_x86_ssse3_phadd_w,
-                                        int_x86_ssse3_phadd_w_128>;
-defm PHADDD      : SS3I_binop_rm_int_32<0x02, "phaddd",
-                                        int_x86_ssse3_phadd_d,
-                                        int_x86_ssse3_phadd_d_128>;
-defm PHADDSW     : SS3I_binop_rm_int_16<0x03, "phaddsw",
-                                        int_x86_ssse3_phadd_sw,
-                                        int_x86_ssse3_phadd_sw_128>;
-defm PHSUBW      : SS3I_binop_rm_int_16<0x05, "phsubw",
-                                        int_x86_ssse3_phsub_w,
-                                        int_x86_ssse3_phsub_w_128>;
-defm PHSUBD      : SS3I_binop_rm_int_32<0x06, "phsubd",
-                                        int_x86_ssse3_phsub_d,
-                                        int_x86_ssse3_phsub_d_128>;
-defm PHSUBSW     : SS3I_binop_rm_int_16<0x07, "phsubsw",
-                                        int_x86_ssse3_phsub_sw,
-                                        int_x86_ssse3_phsub_sw_128>;
-defm PMADDUBSW   : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
-                                        int_x86_ssse3_pmadd_ub_sw,
-                                        int_x86_ssse3_pmadd_ub_sw_128>;
-defm PMULHRSW    : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
-                                        int_x86_ssse3_pmul_hr_sw,
-                                        int_x86_ssse3_pmul_hr_sw_128, 1>;
-
-defm PSHUFB      : SS3I_binop_rm_int_8 <0x00, "pshufb",
-                                        int_x86_ssse3_pshuf_b,
-                                        int_x86_ssse3_pshuf_b_128>;
-defm PSIGNB      : SS3I_binop_rm_int_8 <0x08, "psignb",
-                                        int_x86_ssse3_psign_b,
-                                        int_x86_ssse3_psign_b_128>;
-defm PSIGNW      : SS3I_binop_rm_int_16<0x09, "psignw",
-                                        int_x86_ssse3_psign_w,
-                                        int_x86_ssse3_psign_w_128>;
-defm PSIGND      : SS3I_binop_rm_int_32<0x0A, "psignd",
-                                        int_x86_ssse3_psign_d,
-                                        int_x86_ssse3_psign_d_128>;
-}
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
 
-// palignr patterns.
 let Constraints = "$src1 = $dst" in {
   def PALIGNR64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
                            (ins VR64:$src1, VR64:$src2, i8imm:$src3),
@@ -2962,10 +3545,15 @@
       Requires<[HasSSSE3]>;
 }
 
-def : Pat<(X86pshufb VR128:$src, VR128:$mask),
-          (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
-def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
-          (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
+                [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT   : I<0x01, MRM_C9, (outs), (ins), "mwait",
+                [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
 
 //===---------------------------------------------------------------------===//
 // Non-Instruction Patterns

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp Fri Jul  2 04:57:13 2010
@@ -60,6 +60,27 @@
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86RegisterInfo::getX86RegNum(MO.getReg());
   }
+
+  // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+  // 0-7 and the difference between the 2 groups is given by the REX prefix.
+  // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+  // in 1's complement form, example:
+  //
+  //  ModRM field => XMM9 => 1
+  //  VEX.VVVV    => XMM9 => ~9
+  //
+  // See table 4-35 of Intel AVX Programming Reference for details.
+  static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+                                              unsigned OpNum) {
+    unsigned SrcReg = MI.getOperand(OpNum).getReg();
+    unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+    if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
+      SrcRegNum += 8;
+  
+    // The registers represented through VEX_VVVV should
+    // be encoded in 1's complement form.
+    return (~SrcRegNum) & 0xf;
+  }
   
   void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
     OS << (char)C;
@@ -99,6 +120,9 @@
   }
   
   
+  void EmitSegmentOverridePrefix(const MCOperand &Op, unsigned TSFlags,
+                                 unsigned &CurByte, raw_ostream &OS) const;
+
   void EmitMemModRMByte(const MCInst &MI, unsigned Op,
                         unsigned RegOpcodeField, 
                         uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
@@ -131,7 +155,6 @@
   return new X86MCCodeEmitter(TM, Ctx, true);
 }
 
-
 /// isDisp8 - Return true if this signed displacement fits in a 8-bit 
 /// sign-extended field. 
 static bool isDisp8(int Value) {
@@ -188,6 +211,26 @@
   EmitConstant(0, Size, CurByte, OS);
 }
 
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(const MCOperand &Op,
+                                                 unsigned TSFlags,
+                                                 unsigned &CurByte,
+                                                 raw_ostream &OS) const {
+  // If no segment register is present, we don't need anything.
+  if (Op.getReg() == 0)
+    return;
+
+  // Check if we need an override.
+  switch (Op.getReg()) {
+  case X86::CS: EmitByte(0x2E, CurByte, OS); return;
+  case X86::SS: EmitByte(0x36, CurByte, OS); return;
+  case X86::DS: EmitByte(0x3E, CurByte, OS); return;
+  case X86::ES: EmitByte(0x26, CurByte, OS); return;
+  case X86::FS: EmitByte(0x64, CurByte, OS); return;
+  case X86::GS: EmitByte(0x65, CurByte, OS); return;
+  }
+
+  assert(0 && "Invalid segment register!");
+}
 
 void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
@@ -341,6 +384,10 @@
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
     return;
 
+  bool HasVEX_4V = false;
+  if ((TSFlags >> 32) & X86II::VEX_4V)
+    HasVEX_4V = true;
+
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
   //
@@ -402,9 +449,11 @@
   if (TSFlags & X86II::OpSize)
     VEX_PP = 0x01;
 
+  if ((TSFlags >> 32) & X86II::VEX_W)
+    VEX_W = 1;
+
   switch (TSFlags & X86II::Op0Mask) {
   default: assert(0 && "Invalid prefix!");
-  case 0: break;  // No prefix!
   case X86II::T8:  // 0F 38
     VEX_5M = 0x2;
     break;
@@ -421,52 +470,63 @@
   case X86II::XD:  // F2 0F
     VEX_PP = 0x3;
     break;
+  case X86II::TB:  // Bypass: Not used by VEX
+  case 0:
+    break;  // No prefix!
   }
 
   unsigned NumOps = MI.getNumOperands();
-  unsigned i = 0;
-  unsigned SrcReg = 0, SrcRegNum = 0;
-  bool IsSrcMem = false;
+  unsigned CurOp = 0;
 
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRMDestMem:
+    NumOps = CurOp = X86AddrNumOperands;
   case X86II::MRMSrcMem:
-    IsSrcMem = true;
   case X86II::MRMSrcReg:
-    if (MI.getOperand(0).isReg() &&
-        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+    if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_R = 0x0;
 
-    // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
-    // range 0-7 and the difference between the 2 groups is given by the
-    // REX prefix. In the VEX prefix, registers are seen sequencially
-    // from 0-15 and encoded in 1's complement form, example:
-    //
-    //  ModRM field => XMM9 => 1
-    //  VEX.VVVV    => XMM9 => ~9
-    //
-    // See table 4-35 of Intel AVX Programming Reference for details.
-    SrcReg = MI.getOperand(1).getReg();
-    SrcRegNum = GetX86RegNum(MI.getOperand(1));
-    if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
-      SrcRegNum += 8;
-
-    // The registers represented through VEX_VVVV should
-    // be encoded in 1's complement form.
-    if ((TSFlags >> 32) & X86II::VEX_4V)
-      VEX_4V = (~SrcRegNum) & 0xf;
+    // CurOp and NumOps are equal when VEX_R represents a register used
+    // to index a memory destination (which is the last operand)
+    CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+
+    if (HasVEX_4V) {
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+      CurOp++;
+    }
 
-    i = 2; // Skip the VEX.VVVV operand.
-    for (; i != NumOps; ++i) {
-      const MCOperand &MO = MI.getOperand(i);
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
       if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
         VEX_B = 0x0;
-      if (!VEX_B && MO.isReg() && IsSrcMem &&
+      if (!VEX_B && MO.isReg() &&
+          ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
           X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
         VEX_X = 0x0;
     }
     break;
-  default:
+  default: // MRMDestReg, MRM0r-MRM7r
+    if (MI.getOperand(CurOp).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+      VEX_B = 0;
+
+    if (HasVEX_4V)
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+    CurOp++;
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
+      if (MO.isReg() && !HasVEX_4V &&
+          X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_R = 0x0;
+    }
+    break;
     assert(0 && "Not implemented!");
   }
 
@@ -483,7 +543,7 @@
   //
   unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
 
-  if (VEX_B && VEX_X) { // 2 byte VEX prefix
+  if (VEX_B && VEX_X && !VEX_W) { // 2 byte VEX prefix
     EmitByte(0xC5, CurByte, OS);
     EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
     return;
@@ -491,7 +551,7 @@
 
   // 3 byte VEX prefix
   EmitByte(0xC4, CurByte, OS);
-  EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_5M, CurByte, OS);
+  EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
   EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
 }
 
@@ -691,15 +751,21 @@
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
   
-  // Is this instruction encoded in AVX form?
-  bool IsAVXForm = false;
+  // Is this instruction encoded using the AVX VEX prefix?
+  bool HasVEXPrefix = false;
+
+  // It uses the VEX.VVVV field?
+  bool HasVEX_4V = false;
+
+  if ((TSFlags >> 32) & X86II::VEX)
+    HasVEXPrefix = true;
   if ((TSFlags >> 32) & X86II::VEX_4V)
-    IsAVXForm = true;
+    HasVEX_4V = true;
 
   // FIXME: We should emit the prefixes in exactly the same order as GAS does,
   // in order to provide diffability.
 
-  if (!IsAVXForm)
+  if (!HasVEXPrefix)
     EmitOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
   else
     EmitVEXOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
@@ -737,6 +803,7 @@
     break;
   
   case X86II::MRMDestMem:
+    EmitSegmentOverridePrefix(MI.getOperand(CurOp + 4), TSFlags, CurByte, OS);
     EmitByte(BaseOpcode, CurByte, OS);
     EmitMemModRMByte(MI, CurOp,
                      GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
@@ -748,7 +815,7 @@
     EmitByte(BaseOpcode, CurByte, OS);
     SrcRegNum = CurOp + 1;
 
-    if (IsAVXForm) // Skip 1st src (which is encoded in VEX_VVVV)
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
       SrcRegNum++;
 
     EmitRegModRMByte(MI.getOperand(SrcRegNum),
@@ -757,22 +824,25 @@
     break;
     
   case X86II::MRMSrcMem: {
-    EmitByte(BaseOpcode, CurByte, OS);
+    int AddrOperands = X86AddrNumOperands;
+    unsigned FirstMemOp = CurOp+1;
+    if (HasVEX_4V) {
+      ++AddrOperands;
+      ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
+    }
 
     // FIXME: Maybe lea should have its own form?  This is a horrible hack.
-    int AddrOperands;
     if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
         Opcode == X86::LEA16r || Opcode == X86::LEA32r)
-      AddrOperands = X86AddrNumOperands - 1; // No segment register
+      --AddrOperands; // No segment register
     else
-      AddrOperands = X86AddrNumOperands;
+      EmitSegmentOverridePrefix(MI.getOperand(FirstMemOp+4),
+                                TSFlags, CurByte, OS);
 
-    if (IsAVXForm)
-      AddrOperands++;
+    EmitByte(BaseOpcode, CurByte, OS);
 
-    // Skip the register source (which is encoded in VEX_VVVV)
-    EmitMemModRMByte(MI, IsAVXForm ? CurOp+2 : CurOp+1,
-                     GetX86RegNum(MI.getOperand(CurOp)),
+    
+    EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
                      TSFlags, CurByte, OS, Fixups);
     CurOp += AddrOperands + 1;
     break;
@@ -782,6 +852,8 @@
   case X86II::MRM2r: case X86II::MRM3r:
   case X86II::MRM4r: case X86II::MRM5r:
   case X86II::MRM6r: case X86II::MRM7r:
+    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+      CurOp++;
     EmitByte(BaseOpcode, CurByte, OS);
     EmitRegModRMByte(MI.getOperand(CurOp++),
                      (TSFlags & X86II::FormMask)-X86II::MRM0r,
@@ -791,6 +863,7 @@
   case X86II::MRM2m: case X86II::MRM3m:
   case X86II::MRM4m: case X86II::MRM5m:
   case X86II::MRM6m: case X86II::MRM7m:
+    EmitSegmentOverridePrefix(MI.getOperand(CurOp+4), TSFlags, CurByte, OS);
     EmitByte(BaseOpcode, CurByte, OS);
     EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
                      TSFlags, CurByte, OS, Fixups);

Modified: llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp Fri Jul  2 04:57:13 2010
@@ -138,7 +138,6 @@
     // FALL THROUGH
   case GlobalValue::InternalLinkage:
   case GlobalValue::PrivateLinkage:
-  case GlobalValue::LinkerPrivateLinkage:
     break;
   case GlobalValue::DLLImportLinkage:
     llvm_unreachable("DLLImport linkage is not supported by this target!");

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp Fri Jul  2 04:57:13 2010
@@ -1379,7 +1379,6 @@
     SDValue Mul0, Mul1, Addend0, Addend1;
     if (N->getValueType(0) == MVT::i32 &&
         isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
-      SDValue Zero = DAG.getConstant(0, MVT::i32);
       SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
                                     DAG.getVTList(MVT::i32, MVT::i32), Mul0,
                                     Mul1, Addend0, Addend1);

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td Fri Jul  2 04:57:13 2010
@@ -733,7 +733,7 @@
 // TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
 // in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
 // tsetmr, sext (reg), zext (reg)
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
 let neverHasSideEffects = 1 in
 def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
                  "sext $dst, $src2",

Modified: llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp Fri Jul  2 04:57:13 2010
@@ -28,7 +28,7 @@
     Hello() : FunctionPass(&ID) {}
 
     virtual bool runOnFunction(Function &F) {
-      HelloCounter++;
+      ++HelloCounter;
       errs() << "Hello: ";
       errs().write_escaped(F.getName()) << '\n';
       return false;
@@ -46,7 +46,7 @@
     Hello2() : FunctionPass(&ID) {}
 
     virtual bool runOnFunction(Function &F) {
-      HelloCounter++;
+      ++HelloCounter;
       errs() << "Hello: ";
       errs().write_escaped(F.getName()) << '\n';
       return false;

Modified: llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Hello/Makefile Fri Jul  2 04:57:13 2010
@@ -12,5 +12,13 @@
 LOADABLE_MODULE = 1
 USEDLIBS =
 
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports
+endif
+endif
+
 include $(LEVEL)/Makefile.common
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp Fri Jul  2 04:57:13 2010
@@ -221,13 +221,16 @@
           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
         GS.HasPHIUser = true;
       } else if (isa<CmpInst>(I)) {
+        // Nothing to analyse.
       } else if (isa<MemTransferInst>(I)) {
-        if (I->getOperand(1) == V)
+        const MemTransferInst *MTI = cast<MemTransferInst>(I);
+        if (MTI->getArgOperand(0) == V)
           GS.StoredType = GlobalStatus::isStored;
-        if (I->getOperand(2) == V)
+        if (MTI->getArgOperand(1) == V)
           GS.isLoaded = true;
       } else if (isa<MemSetInst>(I)) {
-        assert(I->getOperand(1) == V && "Memset only takes one pointer!");
+        assert(cast<MemSetInst>(I)->getArgOperand(0) == V &&
+               "Memset only takes one pointer!");
         GS.StoredType = GlobalStatus::isStored;
       } else {
         return true;  // Any other non-load instruction might take address!
@@ -1323,8 +1326,8 @@
   //      if (F2) { free(F2); F2 = 0; }
   //    }
   // The malloc can also fail if its argument is too large.
-  Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0);
-  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1),
+  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
                                   ConstantZero, "isneg");
   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
     Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
@@ -1511,10 +1514,10 @@
 
   // If this is an allocation of a fixed size array of structs, analyze as a
   // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
-  if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
+  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
     if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
       AllocTy = AT->getElementType();
-  
+
   const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
   if (!AllocSTy)
     return false;
@@ -1641,7 +1644,7 @@
         // bool.
         Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
 
-        // If we're already replaced the input, StoredVal will be a cast or
+        // If we've already replaced the input, StoredVal will be a cast or
         // select instruction.  If not, it will be a load of the original
         // global.
         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
@@ -2260,8 +2263,7 @@
                                          getVal(Values, CI->getOperand(0)),
                                          CI->getType());
     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
-      InstResult =
-            ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
                                            getVal(Values, SI->getOperand(1)),
                                            getVal(Values, SI->getOperand(2)));
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
@@ -2302,7 +2304,8 @@
       if (!Callee) return false;  // Cannot resolve.
 
       SmallVector<Constant*, 8> Formals;
-      for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
+      CallSite CS(CI);
+      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
            i != e; ++i)
         Formals.push_back(getVal(Values, *i));
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp Fri Jul  2 04:57:13 2010
@@ -42,6 +42,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -262,8 +263,8 @@
   // char*. It returns "void", so it doesn't need to replace any of
   // Inst's uses and doesn't get a name.
   CastInst* CI = 
-    new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst);
-  Value *Args[] = { CI, Inst->getOperand(2) };
+    new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+  Value *Args[] = { CI, Inst->getArgOperand(1) };
   CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
 
   SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
@@ -378,7 +379,7 @@
   const Type* SBPTy =
           Type::getInt8PtrTy(Inst->getContext());
   CastInst* BufPtr = 
-    new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
+    new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
   Value *Args[] = {
     GetSetJmpMap(Func), BufPtr,
     ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
@@ -473,7 +474,8 @@
 
   // Construct the new "invoke" instruction.
   TerminatorInst* Term = OldBB->getTerminator();
-  std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end());
+  CallSite CS(&CI);
+  std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
   InvokeInst* II =
     InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
                        0, 0, 0, 0, // EH-FIXME!

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp Fri Jul  2 04:57:13 2010
@@ -535,6 +535,7 @@
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
   case GlobalValue::ExternalWeakLinkage:
+  case GlobalValue::LinkerPrivateWeakLinkage:
     return ExternalWeak;
 
   case GlobalValue::ExternalLinkage:

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp Fri Jul  2 04:57:13 2010
@@ -66,13 +66,13 @@
     return 0;
   
   // Clone the function, so that we can hack away on it.
-  DenseMap<const Value*, Value*> ValueMap;
-  Function* duplicateFunction = CloneFunction(F, ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Function* duplicateFunction = CloneFunction(F, VMap);
   duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
   F->getParent()->getFunctionList().push_back(duplicateFunction);
-  BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]);
-  BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]);
-  BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
   
   // Go ahead and update all uses to the duplicate, so that we can just
   // use the inliner functionality when we're done hacking.

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp Fri Jul  2 04:57:13 2010
@@ -64,10 +64,10 @@
 // a call to the specialized function.  Returns the specialized function
 static Function* 
 SpecializeFunction(Function* F, 
-                   DenseMap<const Value*, Value*>& replacements) {
+                   ValueMap<const Value*, Value*>& replacements) {
   // arg numbers of deleted arguments
   DenseMap<unsigned, const Argument*> deleted;
-  for (DenseMap<const Value*, Value*>::iterator 
+  for (ValueMap<const Value*, Value*>::iterator 
          repb = replacements.begin(), repe = replacements.end();
        repb != repe; ++repb) {
     Argument const *arg = cast<const Argument>(repb->first);
@@ -155,7 +155,7 @@
                ee = distribution.end(); ii != ee; ++ii)
           if (total > ii->second && ii->first &&
                ii->second > total * ConstValPercent) {
-            DenseMap<const Value*, Value*> m;
+            ValueMap<const Value*, Value*> m;
             Function::arg_iterator arg = F.arg_begin();
             for (int y = 0; y < interestingArgs[x]; ++y)
               ++arg;

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp Fri Jul  2 04:57:13 2010
@@ -73,6 +73,19 @@
       AU.setPreservesAll();
     }
   };
+
+  class StripDeadDebugInfo : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDeadDebugInfo()
+      : ModulePass(&ID) {}
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
 }
 
 char StripSymbols::ID = 0;
@@ -99,6 +112,14 @@
   return new StripDebugDeclare();
 }
 
+char StripDeadDebugInfo::ID = 0;
+static RegisterPass<StripDeadDebugInfo>
+A("strip-dead-debug-info", "Strip debug info for unused symbols");
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+  return new StripDeadDebugInfo();
+}
+
 /// OnlyUsedBy - Return true if V is only used by Usr.
 static bool OnlyUsedBy(Value *V, Value *Usr) {
   for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
@@ -223,27 +244,27 @@
     Changed = true;
   }
 
-  NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
-  if (NMD) {
-    Changed = true;
-    NMD->eraseFromParent();
-  }
-  
-  NMD = M.getNamedMetadata("llvm.dbg.lv");
-  if (NMD) {
-    Changed = true;
-    NMD->eraseFromParent();
+  for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+         NME = M.named_metadata_end(); NMI != NME;) {
+    NamedMDNode *NMD = NMI;
+    ++NMI;
+    if (NMD->getName().startswith("llvm.dbg.")) {
+      NMD->eraseFromParent();
+      Changed = true;
+    }
   }
-  
+
   unsigned MDDbgKind = M.getMDKindID("dbg");
-  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) 
+  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
          ++FI)
       for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
-           ++BI) 
+           ++BI) {
+        Changed = true; // FIXME: Only set if there was debug metadata.
         BI->setMetadata(MDDbgKind, 0);
+      }
 
-  return true;
+  return Changed;
 }
 
 bool StripSymbols::runOnModule(Module &M) {
@@ -266,8 +287,8 @@
   if (Declare) {
     while (!Declare->use_empty()) {
       CallInst *CI = cast<CallInst>(Declare->use_back());
-      Value *Arg1 = CI->getOperand(1);
-      Value *Arg2 = CI->getOperand(2);
+      Value *Arg1 = CI->getArgOperand(0);
+      Value *Arg2 = CI->getArgOperand(1);
       assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
       if (Arg1->use_empty()) {
@@ -295,3 +316,83 @@
 
   return true;
 }
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm 
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Debugging infomration is encoded in llvm IR using metadata. This is designed
+  // such a way that debug info for symbols preserved even if symbols are
+  // optimized away by the optimizer. This special pass removes debug info for 
+  // such symbols.
+
+  // llvm.dbg.gv keeps track of debug info for global variables.
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+    SmallVector<MDNode *, 8> MDs;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+        MDs.push_back(NMD->getOperand(i));
+      else
+        Changed = true;
+    NMD->eraseFromParent();
+    NMD = NULL;
+
+    for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+           E = MDs.end(); I != E; ++I) {
+      if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), 
+                              true)) {
+        if (!NMD)
+          NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+        NMD->addOperand(*I);
+      }
+      else
+        Changed = true;
+    }
+  }
+
+  // llvm.dbg.sp keeps track of debug info for subprograms.
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+    SmallVector<MDNode *, 8> MDs;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      if (DISubprogram(NMD->getOperand(i)).Verify())
+        MDs.push_back(NMD->getOperand(i));
+      else
+        Changed = true;
+    NMD->eraseFromParent();
+    NMD = NULL;
+
+    for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+           E = MDs.end(); I != E; ++I) {
+      bool FnIsLive = false;
+      if (Function *F = DISubprogram(*I).getFunction())
+        if (M.getFunction(F->getName()))
+          FnIsLive = true;
+      if (FnIsLive) {
+          if (!NMD)
+            NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+          NMD->addOperand(*I);
+      } else {
+        // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+        // to hold debug info for dead function's local variables.
+        StringRef FName = DISubprogram(*I).getLinkageName();
+        if (FName.empty())
+          FName = DISubprogram(*I).getName();
+        if (NamedMDNode *LVNMD = 
+            M.getNamedMetadata(Twine("llvm.dbg.lv.", 
+                                     getRealLinkageName(FName)))) 
+          LVNMD->eraseFromParent();
+      }
+    }
+  }
+
+  return Changed;
+}

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp Fri Jul  2 04:57:13 2010
@@ -107,12 +107,12 @@
   // Check if it is ok to perform this promotion.
   if (isSafeToUpdateAllCallers(F) == false) {
     DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
-    NumRejectedSRETUses++;
+    ++NumRejectedSRETUses;
     return 0;
   }
 
   DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
-  NumSRET++;
+  ++NumSRET;
   // [1] Replace use of sret parameter 
   AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", 
                                          F->getEntryBlock().begin());

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h Fri Jul  2 04:57:13 2010
@@ -179,7 +179,7 @@
   Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
   Instruction *visitAllocaInst(AllocaInst &AI);
   Instruction *visitMalloc(Instruction &FI);
-  Instruction *visitFree(Instruction &FI);
+  Instruction *visitFree(CallInst &FI);
   Instruction *visitLoadInst(LoadInst &LI);
   Instruction *visitStoreInst(StoreInst &SI);
   Instruction *visitBranchInst(BranchInst &BI);

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp Fri Jul  2 04:57:13 2010
@@ -112,8 +112,8 @@
 }
 
 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
-  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
-  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
+  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
   unsigned MinAlign = std::min(DstAlign, SrcAlign);
   unsigned CopyAlign = MI->getAlignment();
 
@@ -125,7 +125,7 @@
   
   // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
   // load/store.
-  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
   if (MemOpLength == 0) return 0;
   
   // Source and destination pointer types are always "i8*" for intrinsic.  See
@@ -140,9 +140,9 @@
   
   // Use an integer load+store unless we can find something better.
   unsigned SrcAddrSp =
-    cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace();
+    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
   unsigned DstAddrSp =
-    cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace();
+    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
 
   const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
   Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
@@ -154,8 +154,8 @@
   // an i64 load+store, here because this improves the odds that the source or
   // dest address will be promotable.  See if we can find a better type than the
   // integer datatype.
-  Value *StrippedDest = MI->getOperand(1)->stripPointerCasts();
-  if (StrippedDest != MI->getOperand(1)) {
+  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+  if (StrippedDest != MI->getArgOperand(0)) {
     const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
                                     ->getElementType();
     if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
@@ -189,15 +189,15 @@
   SrcAlign = std::max(SrcAlign, CopyAlign);
   DstAlign = std::max(DstAlign, CopyAlign);
   
-  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy);
-  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy);
+  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
   Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
   InsertNewInstBefore(L, *MI);
   InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
                       *MI);
 
   // Set the size of the copy to 0, it will be deleted on the next iteration.
-  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
   return MI;
 }
 
@@ -263,7 +263,7 @@
   
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
   if (!II) return visitCallSite(&CI);
-  
+
   // Intrinsics cannot occur in an invoke, so handle them here instead of in
   // visitCallSite.
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
@@ -289,11 +289,10 @@
         if (GVSrc->isConstant()) {
           Module *M = CI.getParent()->getParent()->getParent();
           Intrinsic::ID MemCpyID = Intrinsic::memcpy;
-          const Type *Tys[3] = { CI.getOperand(1)->getType(),
-                                 CI.getOperand(2)->getType(),
-                                 CI.getOperand(3)->getType() };
-          CI.setCalledFunction( 
-                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+          const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+                                 CI.getArgOperand(1)->getType(),
+                                 CI.getArgOperand(2)->getType() };
+          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
           Changed = true;
         }
     }
@@ -313,7 +312,7 @@
       if (Instruction *I = SimplifyMemSet(MSI))
         return I;
     }
-          
+
     if (Changed) return II;
   }
   
@@ -324,10 +323,10 @@
     if (!TD) break;
     
     const Type *ReturnTy = CI.getType();
-    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
 
     // Get to the real allocated thing and offset as fast as possible.
-    Value *Op1 = II->getOperand(1)->stripPointerCasts();
+    Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
     
     // If we've stripped down to a single global variable that we
     // can know the size of then just return that.
@@ -395,7 +394,6 @@
       
       Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
       return ReplaceInstUsesWith(CI, RetVal);
-      
     } 
 
     // Do not return "I don't know" here. Later optimization passes could
@@ -404,45 +402,45 @@
   }
   case Intrinsic::bswap:
     // bswap(bswap(x)) -> x
-    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
       if (Operand->getIntrinsicID() == Intrinsic::bswap)
-        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
       
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
-    if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
+    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
       if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
         if (Operand->getIntrinsicID() == Intrinsic::bswap) {
           unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
                        TI->getType()->getPrimitiveSizeInBits();
           Value *CV = ConstantInt::get(Operand->getType(), C);
-          Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
+          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
           return new TruncInst(V, TI->getType());
         }
     }
       
     break;
   case Intrinsic::powi:
-    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
+    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // powi(x, 0) -> 1.0
       if (Power->isZero())
         return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
       // powi(x, 1) -> x
       if (Power->isOne())
-        return ReplaceInstUsesWith(CI, II->getOperand(1));
+        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
       // powi(x, -1) -> 1/x
       if (Power->isAllOnesValue())
         return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
-                                          II->getOperand(1));
+                                          II->getArgOperand(0));
     }
     break;
   case Intrinsic::cttz: {
     // If all bits below the first known one are known zero,
     // this value is constant.
-    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
                       KnownZero, KnownOne);
     unsigned TrailingZeros = KnownOne.countTrailingZeros();
     APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
@@ -455,11 +453,11 @@
   case Intrinsic::ctlz: {
     // If all bits above the first known one are known zero,
     // this value is constant.
-    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
                       KnownZero, KnownOne);
     unsigned LeadingZeros = KnownOne.countLeadingZeros();
     APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
@@ -470,8 +468,8 @@
     }
     break;
   case Intrinsic::uadd_with_overflow: {
-    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
-    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
     uint32_t BitWidth = IT->getBitWidth();
     APInt Mask = APInt::getSignBit(BitWidth);
     APInt LHSKnownZero(BitWidth, 0);
@@ -515,19 +513,19 @@
   // FALL THROUGH uadd into sadd
   case Intrinsic::sadd_with_overflow:
     // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getOperand(1)) &&
-        !isa<Constant>(II->getOperand(2))) {
-      Value *LHS = II->getOperand(1);
-      II->setOperand(1, II->getOperand(2));
-      II->setOperand(2, LHS);
+    if (isa<Constant>(II->getArgOperand(0)) &&
+        !isa<Constant>(II->getArgOperand(1))) {
+      Value *LHS = II->getArgOperand(0);
+      II->setArgOperand(0, II->getArgOperand(1));
+      II->setArgOperand(1, LHS);
       return II;
     }
 
     // X + undef -> undef
-    if (isa<UndefValue>(II->getOperand(2)))
+    if (isa<UndefValue>(II->getArgOperand(1)))
       return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
       
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // X + 0 -> {X, false}
       if (RHS->isZero()) {
         Constant *V[] = {
@@ -535,7 +533,7 @@
           ConstantInt::getFalse(II->getContext())
         };
         Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
-        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
     break;
@@ -543,38 +541,38 @@
   case Intrinsic::ssub_with_overflow:
     // undef - X -> undef
     // X - undef -> undef
-    if (isa<UndefValue>(II->getOperand(1)) ||
-        isa<UndefValue>(II->getOperand(2)))
+    if (isa<UndefValue>(II->getArgOperand(0)) ||
+        isa<UndefValue>(II->getArgOperand(1)))
       return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
       
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // X - 0 -> {X, false}
       if (RHS->isZero()) {
         Constant *V[] = {
-          UndefValue::get(II->getOperand(1)->getType()),
+          UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
         Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
-        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
     break;
   case Intrinsic::umul_with_overflow:
   case Intrinsic::smul_with_overflow:
     // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getOperand(1)) &&
-        !isa<Constant>(II->getOperand(2))) {
-      Value *LHS = II->getOperand(1);
-      II->setOperand(1, II->getOperand(2));
-      II->setOperand(2, LHS);
+    if (isa<Constant>(II->getArgOperand(0)) &&
+        !isa<Constant>(II->getArgOperand(1))) {
+      Value *LHS = II->getArgOperand(0);
+      II->setArgOperand(0, II->getArgOperand(1));
+      II->setArgOperand(1, LHS);
       return II;
     }
 
     // X * undef -> undef
-    if (isa<UndefValue>(II->getOperand(2)))
+    if (isa<UndefValue>(II->getArgOperand(1)))
       return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
       
-    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // X*0 -> {0, false}
       if (RHSI->isZero())
         return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
@@ -582,11 +580,11 @@
       // X * 1 -> {X, false}
       if (RHSI->equalsInt(1)) {
         Constant *V[] = {
-          UndefValue::get(II->getOperand(1)->getType()),
+          UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
         Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
-        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
     break;
@@ -597,8 +595,8 @@
   case Intrinsic::x86_sse2_loadu_dq:
     // Turn PPC lvx     -> load if the pointer is known aligned.
     // Turn X86 loadups -> load if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
-      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+    if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
     }
@@ -606,22 +604,22 @@
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
     // Turn stvx -> store if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+    if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
       const Type *OpPtrTy = 
-        PointerType::getUnqual(II->getOperand(1)->getType());
-      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
-      return new StoreInst(II->getOperand(1), Ptr);
+        PointerType::getUnqual(II->getArgOperand(0)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      return new StoreInst(II->getArgOperand(0), Ptr);
     }
     break;
   case Intrinsic::x86_sse_storeu_ps:
   case Intrinsic::x86_sse2_storeu_pd:
   case Intrinsic::x86_sse2_storeu_dq:
     // Turn X86 storeu -> store if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+    if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
       const Type *OpPtrTy = 
-        PointerType::getUnqual(II->getOperand(2)->getType());
-      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
-      return new StoreInst(II->getOperand(2), Ptr);
+        PointerType::getUnqual(II->getArgOperand(1)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+      return new StoreInst(II->getArgOperand(1), Ptr);
     }
     break;
     
@@ -629,12 +627,12 @@
     // These intrinsics only demands the 0th element of its input vector.  If
     // we can simplify the input based on that, do so now.
     unsigned VWidth =
-      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
+      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
     APInt DemandedElts(VWidth, 1);
     APInt UndefElts(VWidth, 0);
-    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                               UndefElts)) {
-      II->setOperand(1, V);
+      II->setArgOperand(0, V);
       return II;
     }
     break;
@@ -642,7 +640,7 @@
     
   case Intrinsic::ppc_altivec_vperm:
     // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
-    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
       assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
       
       // Check that all of the elements are integer constants or undefs.
@@ -657,8 +655,8 @@
       
       if (AllEltsOk) {
         // Cast the input vectors to byte vectors.
-        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
-        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
+        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType());
         Value *Result = UndefValue::get(Op0->getType());
         
         // Only extract each element once.
@@ -691,7 +689,7 @@
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
-    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
       if (SS->getIntrinsicID() == Intrinsic::stacksave) {
         BasicBlock::iterator BI = SS;
         if (&*++BI == II)
@@ -774,13 +772,13 @@
     NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
   }
   bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
-    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) {
+    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) {
       if (SizeCI->isAllOnesValue())
         return true;
       if (isString)
         return SizeCI->getZExtValue() >=
-               GetStringLength(CI->getOperand(SizeArgOp));
-      if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp)))
+               GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset));
+      if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)))
         return SizeCI->getZExtValue() >= Arg->getZExtValue();
     }
     return false;
@@ -848,7 +846,7 @@
                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
                   CS.getInstruction());
 
-    // If CS dues not return void then replaceAllUsesWith undef.
+    // If CS does not return void then replaceAllUsesWith undef.
     // This allows ValueHandlers and custom metadata to adjust itself.
     if (!CS.getInstruction()->getType()->isVoidTy())
       CS.getInstruction()->
@@ -1145,7 +1143,7 @@
   IntrinsicInst *Tramp =
     cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
 
-  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
+  Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
   const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
   const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
 
@@ -1186,7 +1184,7 @@
         do {
           if (Idx == NestIdx) {
             // Add the chain argument and attributes.
-            Value *NestVal = Tramp->getOperand(3);
+            Value *NestVal = Tramp->getArgOperand(2);
             if (NestVal->getType() != NestTy)
               NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
             NewArgs.push_back(NestVal);

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCompares.cpp Fri Jul  2 04:57:13 2010
@@ -1423,7 +1423,7 @@
       switch (II->getIntrinsicID()) {
       case Intrinsic::bswap:
         Worklist.Add(II);
-        ICI.setOperand(0, II->getOperand(1));
+        ICI.setOperand(0, II->getArgOperand(0));
         ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
         return &ICI;
       case Intrinsic::ctlz:
@@ -1431,7 +1431,7 @@
         // ctz(A) == bitwidth(a)  ->  A == 0 and likewise for !=
         if (RHSV == RHS->getType()->getBitWidth()) {
           Worklist.Add(II);
-          ICI.setOperand(0, II->getOperand(1));
+          ICI.setOperand(0, II->getArgOperand(0));
           ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
           return &ICI;
         }
@@ -1440,7 +1440,7 @@
         // popcount(A) == 0  ->  A == 0 and likewise for !=
         if (RHS->isZero()) {
           Worklist.Add(II);
-          ICI.setOperand(0, II->getOperand(1));
+          ICI.setOperand(0, II->getArgOperand(0));
           ICI.setOperand(1, RHS);
           return &ICI;
         }

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp Fri Jul  2 04:57:13 2010
@@ -404,7 +404,7 @@
           isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
         bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
         Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
-        Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS);
+        Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
         return new ZExtInst(Cmp, II->getType());
       }
     }

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Fri Jul  2 04:57:13 2010
@@ -732,10 +732,10 @@
           // the right place.
           Instruction *NewVal;
           if (InputBit > ResultBit)
-            NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
                     ConstantInt::get(I->getType(), InputBit-ResultBit));
           else
-            NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
                     ConstantInt::get(I->getType(), ResultBit-InputBit));
           NewVal->takeName(I);
           return InsertNewInstBefore(NewVal, *I);
@@ -1052,12 +1052,12 @@
     case Intrinsic::x86_sse2_mul_sd:
     case Intrinsic::x86_sse2_min_sd:
     case Intrinsic::x86_sse2_max_sd:
-      TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                         UndefElts, Depth+1);
-      if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
-      TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
                                         UndefElts2, Depth+1);
-      if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
 
       // If only the low elt is demanded and this is a scalarizable intrinsic,
       // scalarize it now.
@@ -1069,8 +1069,8 @@
         case Intrinsic::x86_sse2_sub_sd:
         case Intrinsic::x86_sse2_mul_sd:
           // TODO: Lower MIN/MAX/ABS/etc
-          Value *LHS = II->getOperand(1);
-          Value *RHS = II->getOperand(2);
+          Value *LHS = II->getArgOperand(0);
+          Value *RHS = II->getArgOperand(1);
           // Extract the element as scalars.
           LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 
             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jul  2 04:57:13 2010
@@ -756,8 +756,8 @@
 
 
 
-Instruction *InstCombiner::visitFree(Instruction &FI) {
-  Value *Op = FI.getOperand(1);
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+  Value *Op = FI.getArgOperand(0);
 
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
@@ -925,7 +925,7 @@
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
     // We're extracting from an intrinsic, see if we're the only user, which
     // allows us to simplify multiple result intrinsics to simpler things that
-    // just get one value..
+    // just get one value.
     if (II->hasOneUse()) {
       // Check if we're grabbing the overflow bit or the result of a 'with
       // overflow' intrinsic.  If it's the latter we can remove the intrinsic
@@ -934,7 +934,7 @@
       case Intrinsic::uadd_with_overflow:
       case Intrinsic::sadd_with_overflow:
         if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
           II->replaceAllUsesWith(UndefValue::get(II->getType()));
           EraseInstFromFunction(*II);
           return BinaryOperator::CreateAdd(LHS, RHS);
@@ -943,7 +943,7 @@
       case Intrinsic::usub_with_overflow:
       case Intrinsic::ssub_with_overflow:
         if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
           II->replaceAllUsesWith(UndefValue::get(II->getType()));
           EraseInstFromFunction(*II);
           return BinaryOperator::CreateSub(LHS, RHS);
@@ -952,7 +952,7 @@
       case Intrinsic::umul_with_overflow:
       case Intrinsic::smul_with_overflow:
         if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
           II->replaceAllUsesWith(UndefValue::get(II->getType()));
           EraseInstFromFunction(*II);
           return BinaryOperator::CreateMul(LHS, RHS);

Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp Fri Jul  2 04:57:13 2010
@@ -143,7 +143,7 @@
     ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
     if (!std::binary_search(MST.begin(), MST.end(), edge)) {
       printEdgeCounter(edge,entry,i);
-      IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++;
+      IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
       Initializer[i++] = (Zero);
     } else{
       Initializer[i++] = (Uncounted);
@@ -166,7 +166,7 @@
         ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
         if (!std::binary_search(MST.begin(), MST.end(), edge)) {
           printEdgeCounter(edge,BB,i);
-          IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+          IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
           Initializer[i++] = (Zero);
         } else{
           Initializer[i++] = (Uncounted);
@@ -189,11 +189,11 @@
           if (TI->getNumSuccessors() == 1) {
             // Insert counter at the start of the block
             printEdgeCounter(edge,BB,i);
-            IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+            IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
           } else {
             // Insert counter at the start of the block
             printEdgeCounter(edge,Succ,i);
-            IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++;
+            IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
           }
           Initializer[i++] = (Zero);
         } else {

Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp Fri Jul  2 04:57:13 2010
@@ -61,8 +61,8 @@
   }
   Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
 
-  Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
-                                           "newargc", InsertPos);
+  CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
+                                        "newargc", InsertPos);
 
   // If argc or argv are not available in main, just pass null values in.
   Function::arg_iterator AI;
@@ -73,10 +73,10 @@
     if (AI->getType() != ArgVTy) {
       Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, 
                                                             false);
-      InitCall->setOperand(2, 
+      InitCall->setArgOperand(1, 
           CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
     } else {
-      InitCall->setOperand(2, AI);
+      InitCall->setArgOperand(1, AI);
     }
     /* FALL THROUGH */
 
@@ -93,12 +93,12 @@
       }
       opcode = CastInst::getCastOpcode(AI, true,
                                        Type::getInt32Ty(Context), true);
-      InitCall->setOperand(1, 
+      InitCall->setArgOperand(0, 
           CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
                            "argc.cast", InitCall));
     } else {
       AI->replaceAllUsesWith(InitCall);
-      InitCall->setOperand(1, AI);
+      InitCall->setArgOperand(0, AI);
     }
 
   case 0: break;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp Fri Jul  2 04:57:13 2010
@@ -83,7 +83,7 @@
   
   for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
        E = worklist.end(); I != E; ++I) {
-    NumRemoved++;
+    ++NumRemoved;
     (*I)->eraseFromParent();
   }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp Fri Jul  2 04:57:13 2010
@@ -548,7 +548,8 @@
     CI->eraseFromParent();
   }
   bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
-    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp)))
+      if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
+                                                        - CallInst::ArgOffset)))
       return SizeCI->isAllOnesValue();
     return false;
   }
@@ -559,7 +560,7 @@
   // Lower all uses of llvm.objectsize.*
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
-    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
     const Type *ReturnTy = CI->getType();
     Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);    
     CI->replaceAllUsesWith(RetVal);
@@ -759,8 +760,7 @@
     }
 
     // Compute the constraint code and ConstraintType to use.
-    TLI->ComputeConstraintToUse(OpInfo, SDValue(),
-                             OpInfo.ConstraintType == TargetLowering::C_Memory);
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
 
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         OpInfo.isIndirect) {

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp Fri Jul  2 04:57:13 2010
@@ -56,7 +56,8 @@
     }
     
     bool runOnBasicBlock(BasicBlock &BB);
-    bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep);
+    bool handleFreeWithNonTrivialDependency(const CallInst *F,
+                                            MemDepResult Dep);
     bool handleEndBlock(BasicBlock &BB);
     bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
                               BasicBlock::iterator &BBI,
@@ -123,14 +124,15 @@
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getPointerOperand();
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
-    return MI->getOperand(1);
-  
-  switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+    return MI->getArgOperand(0);
+
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
   default: assert(false && "Unexpected intrinsic!");
   case Intrinsic::init_trampoline:
-    return I->getOperand(1);
+    return II->getArgOperand(0);
   case Intrinsic::lifetime_end:
-    return I->getOperand(2);
+    return II->getArgOperand(1);
   }
 }
 
@@ -147,12 +149,13 @@
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
     Len = MI->getLength();
   } else {
-    switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+    IntrinsicInst *II = cast<IntrinsicInst>(I);
+    switch (II->getIntrinsicID()) {
     default: assert(false && "Unexpected intrinsic!");
     case Intrinsic::init_trampoline:
       return -1u;
     case Intrinsic::lifetime_end:
-      Len = I->getOperand(1);
+      Len = II->getArgOperand(0);
       break;
     }
   }
@@ -201,8 +204,8 @@
     if (InstDep.isNonLocal()) continue;
   
     // Handle frees whose dependencies are non-trivial.
-    if (isFreeCall(Inst)) {
-      MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep);
+    if (const CallInst *F = isFreeCall(Inst)) {
+      MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
       continue;
     }
     
@@ -218,7 +221,7 @@
           isElidable(DepStore)) {
         // Delete the store and now-dead instructions that feed it.
         DeleteDeadInstruction(DepStore);
-        NumFastStores++;
+        ++NumFastStores;
         MadeChange = true;
 
         // DeleteDeadInstruction can delete the current instruction in loop
@@ -249,7 +252,7 @@
             BBI = BB.begin();
           else if (BBI != BB.begin())  // Revisit this instruction if possible.
             --BBI;
-          NumFastStores++;
+          ++NumFastStores;
           MadeChange = true;
           continue;
         }
@@ -270,7 +273,7 @@
           BBI = BB.begin();
         else if (BBI != BB.begin())  // Revisit this instruction if possible.
           --BBI;
-        NumFastStores++;
+        ++NumFastStores;
         MadeChange = true;
         continue;
       }
@@ -287,7 +290,8 @@
 
 /// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
 /// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
+bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
+                                             MemDepResult Dep) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   
   Instruction *Dependency = Dep.getInst();
@@ -297,13 +301,13 @@
   Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
 
   // Check for aliasing.
-  if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
+  if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
          AliasAnalysis::MustAlias)
     return false;
   
   // DCE instructions only used to calculate that store
   DeleteDeadInstruction(Dependency);
-  NumFastStores++;
+  ++NumFastStores;
   return true;
 }
 
@@ -349,9 +353,9 @@
         if (deadPointers.count(pointerOperand)) {
           // DCE instructions only used to calculate that store.
           Instruction *Dead = BBI;
-          BBI++;
+          ++BBI;
           DeleteDeadInstruction(Dead, &deadPointers);
-          NumFastStores++;
+          ++NumFastStores;
           MadeChange = true;
           continue;
         }
@@ -371,9 +375,9 @@
       // However, if this load is unused and not volatile, we can go ahead and
       // remove it, and not have to worry about it making our pointer undead!
       if (L->use_empty() && !L->isVolatile()) {
-        BBI++;
+        ++BBI;
         DeleteDeadInstruction(L, &deadPointers);
-        NumFastOther++;
+        ++NumFastOther;
         MadeChange = true;
         continue;
       }
@@ -391,9 +395,9 @@
       
       // Dead alloca's can be DCE'd when we reach them
       if (A->use_empty()) {
-        BBI++;
+        ++BBI;
         DeleteDeadInstruction(A, &deadPointers);
-        NumFastOther++;
+        ++NumFastOther;
         MadeChange = true;
       }
       
@@ -426,9 +430,9 @@
                                                          getPointerSize(*I));
         
         if (A == AliasAnalysis::ModRef)
-          modRef++;
+          ++modRef;
         else
-          other++;
+          ++other;
         
         if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
           dead.push_back(*I);
@@ -442,9 +446,9 @@
     } else if (isInstructionTriviallyDead(BBI)) {
       // For any non-memory-affecting non-terminators, DCE them as we reach them
       Instruction *Inst = BBI;
-      BBI++;
+      ++BBI;
       DeleteDeadInstruction(Inst, &deadPointers);
-      NumFastOther++;
+      ++NumFastOther;
       MadeChange = true;
       continue;
     }
@@ -497,7 +501,7 @@
       // Remove it!
       ++BBI;
       DeleteDeadInstruction(S, &deadPointers);
-      NumFastStores++;
+      ++NumFastStores;
       MadeChange = true;
 
       continue;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp Fri Jul  2 04:57:13 2010
@@ -272,7 +272,8 @@
   e.function = C->getCalledFunction();
   e.opcode = Expression::CALL;
 
-  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+  CallSite CS(C);
+  for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
        I != E; ++I)
     e.varargs.push_back(lookup_or_add(*I));
 
@@ -448,14 +449,14 @@
     if (local_dep.isDef()) {
       CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
 
-      if (local_cdep->getNumOperands() != C->getNumOperands()) {
+      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
         valueNumbering[C] = nextValueNumber;
         return nextValueNumber++;
       }
 
-      for (unsigned i = 1; i < C->getNumOperands(); ++i) {
-        uint32_t c_vn = lookup_or_add(C->getOperand(i));
-        uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
         if (c_vn != cd_vn) {
           valueNumbering[C] = nextValueNumber;
           return nextValueNumber++;
@@ -505,13 +506,13 @@
       return nextValueNumber++;
     }
 
-    if (cdep->getNumOperands() != C->getNumOperands()) {
+    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
       valueNumbering[C] = nextValueNumber;
       return nextValueNumber++;
     }
-    for (unsigned i = 1; i < C->getNumOperands(); ++i) {
-      uint32_t c_vn = lookup_or_add(C->getOperand(i));
-      uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
       if (c_vn != cd_vn) {
         valueNumbering[C] = nextValueNumber;
         return nextValueNumber++;
@@ -1501,7 +1502,7 @@
       MD->invalidateCachedPointerInfo(V);
     VN.erase(LI);
     toErase.push_back(LI);
-    NumGVNLoad++;
+    ++NumGVNLoad;
     return true;
   }
 
@@ -1724,7 +1725,7 @@
     MD->invalidateCachedPointerInfo(V);
   VN.erase(LI);
   toErase.push_back(LI);
-  NumPRELoad++;
+  ++NumPRELoad;
   return true;
 }
 
@@ -1785,7 +1786,7 @@
         MD->invalidateCachedPointerInfo(AvailVal);
       VN.erase(L);
       toErase.push_back(L);
-      NumGVNLoad++;
+      ++NumGVNLoad;
       return true;
     }
         
@@ -1831,7 +1832,7 @@
       MD->invalidateCachedPointerInfo(StoredVal);
     VN.erase(L);
     toErase.push_back(L);
-    NumGVNLoad++;
+    ++NumGVNLoad;
     return true;
   }
 
@@ -1861,7 +1862,7 @@
       MD->invalidateCachedPointerInfo(DepLI);
     VN.erase(L);
     toErase.push_back(L);
-    NumGVNLoad++;
+    ++NumGVNLoad;
     return true;
   }
 
@@ -1872,7 +1873,7 @@
     L->replaceAllUsesWith(UndefValue::get(L->getType()));
     VN.erase(L);
     toErase.push_back(L);
-    NumGVNLoad++;
+    ++NumGVNLoad;
     return true;
   }
   
@@ -1883,7 +1884,7 @@
       L->replaceAllUsesWith(UndefValue::get(L->getType()));
       VN.erase(L);
       toErase.push_back(L);
-      NumGVNLoad++;
+      ++NumGVNLoad;
       return true;
     }
   }
@@ -2015,7 +2016,7 @@
     BasicBlock *BB = FI;
     ++FI;
     bool removedBlock = MergeBlockIntoPredecessor(BB, this);
-    if (removedBlock) NumGVNBlocks++;
+    if (removedBlock) ++NumGVNBlocks;
 
     Changed |= removedBlock;
   }
@@ -2142,12 +2143,12 @@
                                             localAvail[*PI]->table.find(ValNo);
         if (predV == localAvail[*PI]->table.end()) {
           PREPred = *PI;
-          NumWithout++;
+          ++NumWithout;
         } else if (predV->second == CurInst) {
           NumWithout = 2;
         } else {
           predMap[*PI] = predV->second;
-          NumWith++;
+          ++NumWith;
         }
       }
 
@@ -2202,7 +2203,7 @@
       PREInstr->setName(CurInst->getName() + ".pre");
       predMap[PREPred] = PREInstr;
       VN.add(PREInstr, ValNo);
-      NumGVNPRE++;
+      ++NumGVNPRE;
 
       // Update the availability map to include the new instruction.
       localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp Fri Jul  2 04:57:13 2010
@@ -83,7 +83,7 @@
       if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
         return false;
       
-    BI++;
+    ++BI;
   }
   
   // Make sure that no instructions in the block have potential side-effects.
@@ -176,7 +176,7 @@
   BasicBlock::iterator BI = exitBlock->begin();
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
     P->replaceUsesOfWith(exitingBlock, preheader);
-    BI++;
+    ++BI;
   }
   
   // Update the dominator tree and remove the instructions and blocks that will
@@ -226,7 +226,7 @@
   LPM.deleteLoopFromQueue(L);
   Changed = true;
   
-  NumDeleted++;
+  ++NumDeleted;
   
   return Changed;
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp Fri Jul  2 04:57:13 2010
@@ -649,7 +649,7 @@
       }
     }
   }
-  NumRestrictBounds++;
+  ++NumRestrictBounds;
   return true;
 }
 
@@ -1016,13 +1016,13 @@
   BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
 
   // [*] Clone Loop
-  DenseMap<const Value *, Value *> ValueMap;
-  Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
+  ValueMap<const Value *, Value *> VMap;
+  Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
   Loop *ALoop = L;
 
   // [*] ALoop's exiting edge enters BLoop's header.
   //    ALoop's original exit block becomes BLoop's exit block.
-  PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
+  PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
   BasicBlock *A_ExitingBlock = ExitCondition->getParent();
   BranchInst *A_ExitInsn =
     dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
@@ -1047,7 +1047,7 @@
   for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), 
          BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
     if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
+      PHINode *PNClone = cast<PHINode>(VMap[PN]);
       InverseMap[PNClone] = PN;
     } else
       break;
@@ -1085,11 +1085,11 @@
   //     block. Remove incoming PHINode values from ALoop's exiting block.
   //     Add new incoming values from BLoop's incoming exiting value.
   //     Update BLoop exit block's dominator info..
-  BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
+  BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
   for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
        BI != BE; ++BI) {
     if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], 
+      PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], 
                                                             B_ExitingBlock);
       PN->removeIncomingValue(A_ExitingBlock);
     } else
@@ -1131,7 +1131,7 @@
   removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
 
   //[*] Eliminate split condition's inactive branch in from BLoop.
-  BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
+  BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
   BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
   BasicBlock *B_InactiveBranch = NULL;
   BasicBlock *B_ActiveBranch = NULL;
@@ -1146,9 +1146,9 @@
 
   //[*] Move exit condition into split condition block to avoid
   //    executing dead loop iteration.
-  ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
-  Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
-  ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);
+  ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
+  Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
+  ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
 
   moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
                     cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, 
@@ -1159,7 +1159,7 @@
                     B_SplitCondition, B_IndVar, B_IndVarIncrement, 
                     BLoop, EVOpNum);
 
-  NumIndexSplit++;
+  ++NumIndexSplit;
   return true;
 }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp Fri Jul  2 04:57:13 2010
@@ -147,7 +147,7 @@
         continue;           // PHI nodes don't count.
       if (isa<DbgInfoIntrinsic>(OI))
         continue;  // Debug intrinsics don't count as size.
-      Size++;
+      ++Size;
   }
 
   if (Size > MAX_HEADER_SIZE)
@@ -263,7 +263,7 @@
 
   preserveCanonicalLoopForm(LPM);
 
-  NumRotated++;
+  ++NumRotated;
   return true;
 }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp Fri Jul  2 04:57:13 2010
@@ -392,12 +392,13 @@
   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
 }
 
-/// isMulSExtable - Return true if the given add can be sign-extended
+/// isMulSExtable - Return true if the given mul can be sign-extended
 /// without changing its value.
-static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
   const Type *WideTy =
-    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
-  return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+  return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
 }
 
 /// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
@@ -413,20 +414,28 @@
   if (LHS == RHS)
     return SE.getConstant(LHS->getType(), 1);
 
-  // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
-  // folding.
-  if (RHS->isAllOnesValue())
-    return SE.getMulExpr(LHS, RHS);
+  // Handle a few RHS special cases.
+  const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+  if (RC) {
+    const APInt &RA = RC->getValue()->getValue();
+    // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+    // some folding.
+    if (RA.isAllOnesValue())
+      return SE.getMulExpr(LHS, RC);
+    // Handle x /s 1 as x.
+    if (RA == 1)
+      return LHS;
+  }
 
   // Check for a division of a constant by a constant.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
-    const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
     if (!RC)
       return 0;
-    if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+    const APInt &LA = C->getValue()->getValue();
+    const APInt &RA = RC->getValue()->getValue();
+    if (LA.srem(RA) != 0)
       return 0;
-    return SE.getConstant(C->getValue()->getValue()
-               .sdiv(RC->getValue()->getValue()));
+    return SE.getConstant(LA.sdiv(RA));
   }
 
   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
@@ -440,6 +449,7 @@
       if (!Step) return 0;
       return SE.getAddRecExpr(Start, Step, AR->getLoop());
     }
+    return 0;
   }
 
   // Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -455,10 +465,11 @@
       }
       return SE.getAddExpr(Ops);
     }
+    return 0;
   }
 
   // Check for a multiply operand that we can pull RHS out of.
-  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
     if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
       SmallVector<const SCEV *, 4> Ops;
       bool Found = false;
@@ -475,6 +486,8 @@
       }
       return Found ? SE.getMulExpr(Ops) : 0;
     }
+    return 0;
+  }
 
   // Otherwise we don't know.
   return 0;
@@ -546,7 +559,7 @@
       case Intrinsic::x86_sse2_storeu_pd:
       case Intrinsic::x86_sse2_storeu_dq:
       case Intrinsic::x86_sse2_storel_dq:
-        if (II->getOperand(1) == OperandVal)
+        if (II->getArgOperand(0) == OperandVal)
           isAddress = true;
         break;
     }
@@ -568,7 +581,7 @@
     case Intrinsic::x86_sse2_storeu_pd:
     case Intrinsic::x86_sse2_storeu_dq:
     case Intrinsic::x86_sse2_storel_dq:
-      AccessTy = II->getOperand(1)->getType();
+      AccessTy = II->getArgOperand(0)->getType();
       break;
     }
   }
@@ -976,6 +989,8 @@
   void dump() const;
 };
 
+}
+
 /// HasFormula - Test whether this use as a formula which has the same
 /// registers as the given formula.
 bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
@@ -1203,6 +1218,32 @@
   return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
 }
 
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+  static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+    return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+  }
+
+  static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+    return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+  }
+
+  static unsigned
+  getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+    unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+    Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+    return Result;
+  }
+
+  static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+                      const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+    return LHS == RHS;
+  }
+};
+
 /// FormulaSorter - This class implements an ordering for formulae which sorts
 /// the by their standalone cost.
 class FormulaSorter {
@@ -1275,7 +1316,9 @@
   }
 
   // Support for sharing of LSRUses between LSRFixups.
-  typedef DenseMap<const SCEV *, size_t> UseMapTy;
+  typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+                   size_t,
+                   UseMapDenseMapInfo> UseMapTy;
   UseMapTy UseMap;
 
   bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -1613,8 +1656,11 @@
     NewRHS = Sel->getOperand(1);
   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
+  else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+    NewRHS = SU->getValue();
   else
-    llvm_unreachable("Max doesn't match expected pattern!");
+    // Max doesn't match expected pattern.
+    return Cond;
 
   // Determine the new comparison opcode. It may be signed or unsigned,
   // and the original comparison may be either equality or inequality.
@@ -1805,6 +1851,8 @@
     NewMaxOffset = NewOffset;
   }
   // Check for a mismatched access type, and fall back conservatively as needed.
+  // TODO: Be less conservative when the type is similar and can use the same
+  // addressing modes.
   if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
     NewAccessTy = Type::getVoidTy(AccessTy->getContext());
 
@@ -1833,7 +1881,7 @@
   }
 
   std::pair<UseMapTy::iterator, bool> P =
-    UseMap.insert(std::make_pair(Expr, 0));
+    UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
   if (!P.second) {
     // A use already existed with this base.
     size_t LUIdx = P.first->second;
@@ -1919,7 +1967,7 @@
         Strides.insert(AR->getStepRecurrence(SE));
         Worklist.push_back(AR->getStart());
       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-        Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+        Worklist.append(Add->op_begin(), Add->op_end());
       }
     } while (!Worklist.empty());
   }
@@ -2086,7 +2134,7 @@
     const SCEV *S = Worklist.pop_back_val();
 
     if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
-      Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+      Worklist.append(N->op_begin(), N->op_end());
     else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
       Worklist.push_back(C->getOperand());
     else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
@@ -2159,20 +2207,23 @@
 /// separate registers. If C is non-null, multiply each subexpression by C.
 static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
                             SmallVectorImpl<const SCEV *> &Ops,
+                            SmallVectorImpl<const SCEV *> &UninterestingOps,
+                            const Loop *L,
                             ScalarEvolution &SE) {
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     // Break out add operands.
     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
          I != E; ++I)
-      CollectSubexprs(*I, C, Ops, SE);
+      CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
     return;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // Split a non-zero base out of an addrec.
     if (!AR->getStart()->isZero()) {
       CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
-                                       AR->getLoop()), C, Ops, SE);
-      CollectSubexprs(AR->getStart(), C, Ops, SE);
+                                       AR->getLoop()),
+                      C, Ops, UninterestingOps, L, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
       return;
     }
   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2182,13 +2233,17 @@
             dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
         CollectSubexprs(Mul->getOperand(1),
                         C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
-                        Ops, SE);
+                        Ops, UninterestingOps, L, SE);
         return;
       }
   }
 
-  // Otherwise use the value itself.
-  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+  // Otherwise use the value itself. Loop-variant "unknown" values are
+  // uninteresting; we won't be able to do anything meaningful with them.
+  if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
+    UninterestingOps.push_back(S);
+  else
+    Ops.push_back(C ? SE.getMulExpr(C, S) : S);
 }
 
 /// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2202,8 +2257,15 @@
   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
     const SCEV *BaseReg = Base.BaseRegs[i];
 
-    SmallVector<const SCEV *, 8> AddOps;
-    CollectSubexprs(BaseReg, 0, AddOps, SE);
+    SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
+
+    // Add any uninteresting values as one register, as we won't be able to
+    // form any interesting reassociation opportunities with them. They'll
+    // just have to be added inside the loop no matter what we do.
+    if (!UninterestingAddOps.empty())
+      AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+
     if (AddOps.size() == 1) continue;
 
     for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
@@ -2216,11 +2278,10 @@
         continue;
 
       // Collect all operands except *J.
-      SmallVector<const SCEV *, 8> InnerAddOps;
-      for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
-           KE = AddOps.end(); K != KE; ++K)
-        if (K != J)
-          InnerAddOps.push_back(*K);
+      SmallVector<const SCEV *, 8> InnerAddOps
+        (         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+      InnerAddOps.append
+        (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
 
       // Don't leave just a constant behind in a register if the constant could
       // be folded into an immediate field.
@@ -2354,13 +2415,12 @@
   for (SmallSetVector<int64_t, 8>::const_iterator
        I = Factors.begin(), E = Factors.end(); I != E; ++I) {
     int64_t Factor = *I;
-    Formula F = Base;
 
     // Check that the multiplication doesn't overflow.
-    if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
+    if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
       continue;
-    F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
-    if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
+    int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+    if (NewBaseOffs / Factor != Base.AM.BaseOffs)
       continue;
 
     // Check that multiplying with the use offset doesn't overflow.
@@ -2371,6 +2431,9 @@
     if (Offset / Factor != LU.MinOffset)
       continue;
 
+    Formula F = Base;
+    F.AM.BaseOffs = NewBaseOffs;
+
     // Check that this scale is legal.
     if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
       continue;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp Fri Jul  2 04:57:13 2010
@@ -457,21 +457,21 @@
 }
 
 // RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by ValueMap.
+// current values into those specified by VMap.
 //
 static inline void RemapInstruction(Instruction *I,
-                                    DenseMap<const Value *, Value*> &ValueMap) {
+                                    ValueMap<const Value *, Value*> &VMap) {
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
-    DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
-    if (It != ValueMap.end()) Op = It->second;
+    ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+    if (It != VMap.end()) Op = It->second;
     I->setOperand(op, Op);
   }
 }
 
 /// CloneLoop - Recursively clone the specified loop and all of its children,
 /// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
                        LoopInfo *LI, LPPassManager *LPM) {
   Loop *New = new Loop();
   LPM->insertLoop(New, PL);
@@ -615,11 +615,11 @@
   // the loop preheader and exit blocks), keeping track of the mapping between
   // the instructions and blocks.
   NewBlocks.reserve(LoopBlocks.size());
-  DenseMap<const Value*, Value*> ValueMap;
+  ValueMap<const Value*, Value*> VMap;
   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
-    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
     NewBlocks.push_back(NewBB);
-    ValueMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
+    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
     LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
   }
 
@@ -629,7 +629,7 @@
                                 NewBlocks[0], F->end());
 
   // Now we create the new Loop object for the versioned loop.
-  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM);
+  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
   Loop *ParentLoop = L->getParentLoop();
   if (ParentLoop) {
     // Make sure to add the cloned preheader and exit blocks to the parent loop
@@ -638,7 +638,7 @@
   }
   
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-    BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]);
+    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
     // The new exit block should be in the same loop as the old one.
     if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
       ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
@@ -653,8 +653,8 @@
     for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
       PN = cast<PHINode>(I);
       Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
-      DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
-      if (It != ValueMap.end()) V = It->second;
+      ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+      if (It != VMap.end()) V = It->second;
       PN->addIncoming(V, NewExit);
     }
   }
@@ -663,7 +663,7 @@
   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
     for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
-      RemapInstruction(I, ValueMap);
+      RemapInstruction(I, VMap);
   
   // Rewrite the original preheader to select between versions of the loop.
   BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp Fri Jul  2 04:57:13 2010
@@ -632,7 +632,7 @@
   // Remove the memcpy
   MD.removeInstruction(cpy);
   cpy->eraseFromParent();
-  NumMemCpyInstr++;
+  ++NumMemCpyInstr;
 
   return true;
 }
@@ -710,7 +710,7 @@
   if (MD.getDependency(C) == dep) {
     MD.removeInstruction(M);
     M->eraseFromParent();
-    NumMemCpyInstr++;
+    ++NumMemCpyInstr;
     return true;
   }
   

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp Fri Jul  2 04:57:13 2010
@@ -926,7 +926,7 @@
   DeleteDeadInstructions();
   AI->eraseFromParent();
 
-  NumReplaced++;
+  ++NumReplaced;
 }
 
 /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
@@ -965,11 +965,11 @@
       isSafeGEP(GEPI, AI, GEPOffset, Info);
       if (!Info.isUnsafe)
         isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
-    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
       if (Length)
         isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
-                        UI.getOperandNo() == 1, Info);
+                        UI.getOperandNo() == CallInst::ArgOffset, Info);
       else
         MarkUnsafe(Info);
     } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1373,7 +1373,7 @@
       // If the stored element is zero (common case), just store a null
       // constant.
       Constant *StoreVal;
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
         if (CI->isZero()) {
           StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
         } else {
@@ -1436,7 +1436,7 @@
       Value *Ops[] = {
         SROADest ? EltPtr : OtherElt,  // Dest ptr
         SROADest ? OtherElt : EltPtr,  // Src ptr
-        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+        ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
         // Align
         ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
         MI->getVolatileCst()
@@ -1451,8 +1451,8 @@
     } else {
       assert(isa<MemSetInst>(MI));
       Value *Ops[] = {
-        EltPtr, MI->getOperand(2),  // Dest, Value,
-        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+        EltPtr, MI->getArgOperand(1),  // Dest, Value,
+        ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
         Zero,  // Align
         ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
       };
@@ -1655,7 +1655,12 @@
       SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
     }
 
-    ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+    // Don't create an 'or x, 0' on the first iteration.
+    if (!isa<Constant>(ResultVal) ||
+        !cast<Constant>(ResultVal)->isNullValue())
+      ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+    else
+      ResultVal = SrcField;
   }
 
   // Handle tail padding by truncating the result
@@ -1794,7 +1799,7 @@
     if (isOffset) return false;
 
     // If the memintrinsic isn't using the alloca as the dest, reject it.
-    if (UI.getOperandNo() != 1) return false;
+    if (UI.getOperandNo() != CallInst::ArgOffset) return false;
     
     // If the source of the memcpy/move is not a constant global, reject it.
     if (!PointsToConstantGlobal(MI->getSource()))

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp Fri Jul  2 04:57:13 2010
@@ -129,8 +129,8 @@
       return 0;
 
     // Extract some information from the instruction
-    Value *Dst = CI->getOperand(1);
-    Value *Src = CI->getOperand(2);
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
 
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
@@ -181,12 +181,12 @@
       return 0;
 
     // Extract some information from the instruction
-    Value *Dst = CI->getOperand(1);
-    Value *Src = CI->getOperand(2);
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
     uint64_t Len;
 
     // We don't do anything if length is not constant
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
       Len = LengthArg->getZExtValue();
     else
       return 0;
@@ -226,11 +226,11 @@
         FT->getParamType(0) != FT->getReturnType())
       return 0;
 
-    Value *SrcStr = CI->getOperand(1);
+    Value *SrcStr = CI->getArgOperand(0);
 
     // If the second operand is non-constant, see if we can compute the length
     // of the input string and turn this into memchr.
-    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
     if (CharC == 0) {
       // These optimizations require TargetData.
       if (!TD) return 0;
@@ -239,7 +239,7 @@
       if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
         return 0;
 
-      return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
+      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
                         ConstantInt::get(TD->getIntPtrType(*Context), Len),
                         B, TD);
     }
@@ -284,7 +284,7 @@
         FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
 
-    Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
     if (Str1P == Str2P)      // strcmp(x,x)  -> 0
       return ConstantInt::get(CI->getType(), 0);
 
@@ -333,13 +333,13 @@
         !FT->getParamType(2)->isIntegerTy())
       return 0;
 
-    Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
     if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
       return ConstantInt::get(CI->getType(), 0);
 
     // Get the length argument if it is constant.
     uint64_t Length;
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
       Length = LengthArg->getZExtValue();
     else
       return 0;
@@ -348,7 +348,7 @@
       return ConstantInt::get(CI->getType(), 0);
 
     if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-      return EmitMemCmp(Str1P, Str2P, CI->getOperand(3), B, TD);
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
 
     std::string Str1, Str2;
     bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
@@ -387,7 +387,7 @@
         FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
 
-    Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
+    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
     if (Dst == Src)      // strcpy(x,x)  -> x
       return Src;
 
@@ -403,7 +403,7 @@
     if (OptChkCall)
       EmitMemCpyChk(Dst, Src,
                     ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                    CI->getOperand(3), B, TD);
+                    CI->getArgOperand(2), B, TD);
     else
       EmitMemCpy(Dst, Src,
                  ConstantInt::get(TD->getIntPtrType(*Context), Len),
@@ -424,9 +424,9 @@
         !FT->getParamType(2)->isIntegerTy())
       return 0;
 
-    Value *Dst = CI->getOperand(1);
-    Value *Src = CI->getOperand(2);
-    Value *LenOp = CI->getOperand(3);
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    Value *LenOp = CI->getArgOperand(2);
 
     // See if we can get the length of the input string.
     uint64_t SrcLen = GetStringLength(Src);
@@ -474,7 +474,7 @@
         !FT->getReturnType()->isIntegerTy())
       return 0;
 
-    Value *Src = CI->getOperand(1);
+    Value *Src = CI->getArgOperand(0);
 
     // Constant folding: strlen("xyz") -> 3
     if (uint64_t Len = GetStringLength(Src))
@@ -499,7 +499,7 @@
         !FT->getParamType(1)->isPointerTy())
       return 0;
 
-    Value *EndPtr = CI->getOperand(2);
+    Value *EndPtr = CI->getArgOperand(1);
     if (isa<ConstantPointerNull>(EndPtr)) {
       CI->setOnlyReadsMemory();
       CI->addAttribute(1, Attribute::NoCapture);
@@ -522,13 +522,13 @@
       return 0;
 
     // fold strstr(x, x) -> x.
-    if (CI->getOperand(1) == CI->getOperand(2))
-      return B.CreateBitCast(CI->getOperand(1), CI->getType());
+    if (CI->getArgOperand(0) == CI->getArgOperand(1))
+      return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
 
     // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
-    if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getOperand(1))) {
-      Value *StrLen = EmitStrLen(CI->getOperand(2), B, TD);
-      Value *StrNCmp = EmitStrNCmp(CI->getOperand(1), CI->getOperand(2),
+    if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+      Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
                                    StrLen, B, TD);
       for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
            UI != UE; ) {
@@ -544,12 +544,12 @@
 
     // See if either input string is a constant string.
     std::string SearchStr, ToFindStr;
-    bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr);
-    bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr);
+    bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+    bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
 
     // fold strstr(x, "") -> x.
     if (HasStr2 && ToFindStr.empty())
-      return B.CreateBitCast(CI->getOperand(1), CI->getType());
+      return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
 
     // If both strings are known, constant fold it.
     if (HasStr1 && HasStr2) {
@@ -559,14 +559,14 @@
         return Constant::getNullValue(CI->getType());
 
       // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
-      Value *Result = CastToCStr(CI->getOperand(1), B);
+      Value *Result = CastToCStr(CI->getArgOperand(0), B);
       Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
       return B.CreateBitCast(Result, CI->getType());
     }
 
     // fold strstr(x, "y") -> strchr(x, 'y').
     if (HasStr2 && ToFindStr.size() == 1)
-      return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
+      return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
                              CI->getType());
     return 0;
   }
@@ -584,13 +584,13 @@
         !FT->getReturnType()->isIntegerTy(32))
       return 0;
 
-    Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
+    Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
 
     if (LHS == RHS)  // memcmp(s,s,x) -> 0
       return Constant::getNullValue(CI->getType());
 
     // Make sure we have a constant length.
-    ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
+    ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
     if (!LenC) return 0;
     uint64_t Len = LenC->getZExtValue();
 
@@ -637,9 +637,9 @@
       return 0;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
-    EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
-               CI->getOperand(3), 1, false, B, TD);
-    return CI->getOperand(1);
+    EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+               CI->getArgOperand(2), 1, false, B, TD);
+    return CI->getArgOperand(0);
   }
 };
 
@@ -659,9 +659,9 @@
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
-    EmitMemMove(CI->getOperand(1), CI->getOperand(2),
-                CI->getOperand(3), 1, false, B, TD);
-    return CI->getOperand(1);
+    EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                CI->getArgOperand(2), 1, false, B, TD);
+    return CI->getArgOperand(0);
   }
 };
 
@@ -681,10 +681,10 @@
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
-    Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
-                                 false);
-    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), false, B, TD);
-    return CI->getOperand(1);
+    Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
+				 false);
+    EmitMemSet(CI->getArgOperand(0), Val,  CI->getArgOperand(2), false, B, TD);
+    return CI->getArgOperand(0);
   }
 };
 
@@ -705,7 +705,7 @@
         !FT->getParamType(0)->isFloatingPointTy())
       return 0;
 
-    Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
+    Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
     if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
       if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
         return Op1C;
@@ -759,7 +759,7 @@
         !FT->getParamType(0)->isFloatingPointTy())
       return 0;
 
-    Value *Op = CI->getOperand(1);
+    Value *Op = CI->getArgOperand(0);
     // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
     // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
     Value *LdExpArg = 0;
@@ -811,7 +811,7 @@
       return 0;
 
     // If this is something like 'floor((double)floatval)', convert to floorf.
-    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
+    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
     if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
       return 0;
 
@@ -840,7 +840,7 @@
         !FT->getParamType(0)->isIntegerTy())
       return 0;
 
-    Value *Op = CI->getOperand(1);
+    Value *Op = CI->getArgOperand(0);
 
     // Constant fold.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
@@ -876,7 +876,7 @@
       return 0;
 
     // isdigit(c) -> (c-'0') <u 10
-    Value *Op = CI->getOperand(1);
+    Value *Op = CI->getArgOperand(0);
     Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
                      "isdigittmp");
     Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
@@ -897,7 +897,7 @@
       return 0;
 
     // isascii(c) -> c <u 128
-    Value *Op = CI->getOperand(1);
+    Value *Op = CI->getArgOperand(0);
     Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
                          "isascii");
     return B.CreateZExt(Op, CI->getType());
@@ -916,7 +916,7 @@
       return 0;
 
     // abs(x) -> x >s -1 ? x : -x
-    Value *Op = CI->getOperand(1);
+    Value *Op = CI->getArgOperand(0);
     Value *Pos = B.CreateICmpSGT(Op,
                              Constant::getAllOnesValue(Op->getType()),
                                  "ispos");
@@ -938,7 +938,7 @@
       return 0;
 
     // isascii(c) -> c & 0x7f
-    return B.CreateAnd(CI->getOperand(1),
+    return B.CreateAnd(CI->getArgOperand(0),
                        ConstantInt::get(CI->getType(),0x7F));
   }
 };
@@ -961,7 +961,7 @@
 
     // Check for a fixed format string.
     std::string FormatStr;
-    if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
+    if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
       return 0;
 
     // Empty format string -> noop.
@@ -993,20 +993,20 @@
     }
 
     // Optimize specific format strings.
-    // printf("%c", chr) --> putchar(*(i8*)dst)
-    if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
-        CI->getOperand(2)->getType()->isIntegerTy()) {
-      Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
+    // printf("%c", chr) --> putchar(chr)
+    if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isIntegerTy()) {
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
 
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
 
     // printf("%s\n", str) --> puts(str)
-    if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
-        CI->getOperand(2)->getType()->isPointerTy() &&
+    if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isPointerTy() &&
         CI->use_empty()) {
-      EmitPutS(CI->getOperand(2), B, TD);
+      EmitPutS(CI->getArgOperand(1), B, TD);
       return CI;
     }
     return 0;
@@ -1027,11 +1027,11 @@
 
     // Check for a fixed format string.
     std::string FormatStr;
-    if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
     // If we just have a format string (nothing else crazy) transform it.
-    if (CI->getNumOperands() == 3) {
+    if (CI->getNumArgOperands() == 2) {
       // Make sure there's no % in the constant array.  We could try to handle
       // %% -> % in the future if we cared.
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1042,7 +1042,7 @@
       if (!TD) return 0;
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
+      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
                  ConstantInt::get(TD->getIntPtrType(*Context),
                  FormatStr.size()+1), 1, false, B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -1050,16 +1050,17 @@
 
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
-    if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
       return 0;
 
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
-      if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
-      Value *V = B.CreateTrunc(CI->getOperand(3),
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *V = B.CreateTrunc(CI->getArgOperand(2),
                                Type::getInt8Ty(*Context), "char");
-      Value *Ptr = CastToCStr(CI->getOperand(1), B);
+      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
       B.CreateStore(V, Ptr);
       Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
                         "nul");
@@ -1073,13 +1074,13 @@
       if (!TD) return 0;
 
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
-      if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
 
-      Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);
+      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
 
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
@@ -1103,8 +1104,8 @@
       return 0;
 
     // Get the element size and count.
-    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
-    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
+    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
     if (!SizeC || !CountC) return 0;
     uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
 
@@ -1114,8 +1115,8 @@
 
     // If this is writing one byte, turn it into fputc.
     if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
-      Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
-      EmitFPutC(Char, CI->getOperand(4), B, TD);
+      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+      EmitFPutC(Char, CI->getArgOperand(3), B, TD);
       return ConstantInt::get(CI->getType(), 1);
     }
 
@@ -1139,11 +1140,11 @@
       return 0;
 
     // fputs(s,F) --> fwrite(s,1,strlen(s),F)
-    uint64_t Len = GetStringLength(CI->getOperand(1));
+    uint64_t Len = GetStringLength(CI->getArgOperand(0));
     if (!Len) return 0;
-    EmitFWrite(CI->getOperand(1),
+    EmitFWrite(CI->getArgOperand(0),
                ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-               CI->getOperand(2), B, TD);
+               CI->getArgOperand(1), B, TD);
     return CI;  // Known to have no uses (see above).
   }
 };
@@ -1162,11 +1163,11 @@
 
     // All the optimizations depend on the format string.
     std::string FormatStr;
-    if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
-    if (CI->getNumOperands() == 3) {
+    if (CI->getNumArgOperands() == 2) {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
         if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
           return 0; // We found a format specifier.
@@ -1174,31 +1175,32 @@
       // These optimizations require TargetData.
       if (!TD) return 0;
 
-      EmitFWrite(CI->getOperand(2),
+      EmitFWrite(CI->getArgOperand(1),
                  ConstantInt::get(TD->getIntPtrType(*Context),
                                   FormatStr.size()),
-                 CI->getOperand(1), B, TD);
+                 CI->getArgOperand(0), B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
     // and have an extra operand.
-    if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
       return 0;
 
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
-      // fprintf(F, "%c", chr) --> *(i8*)dst = chr
-      if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
-      EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
+      // fprintf(F, "%c", chr) --> fputc(chr, F)
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
       return ConstantInt::get(CI->getType(), 1);
     }
 
     if (FormatStr[1] == 's') {
-      // fprintf(F, "%s", str) -> fputs(str, F)
-      if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
+      // fprintf(F, "%s", str) --> fputs(str, F)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
         return 0;
-      EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
+      EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
       return CI;
     }
     return 0;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp Fri Jul  2 04:57:13 2010
@@ -253,7 +253,7 @@
     // If we are passing this argument into call as the corresponding
     // argument operand, then the argument is dynamically constant.
     // Otherwise, we cannot transform this function safely.
-    if (CI->getOperand(ArgNo+1) == Arg)
+    if (CI->getArgOperand(ArgNo) == Arg)
       return true;
   }
 
@@ -270,16 +270,16 @@
 }
 
 // getCommonReturnValue - Check to see if the function containing the specified
-// return instruction and tail call consistently returns the same
-// runtime-constant value at all exit points.  If so, return the returned value.
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI.  If so, return the returned value.
 //
-static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
-  Function *F = TheRI->getParent()->getParent();
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+  Function *F = CI->getParent()->getParent();
   Value *ReturnedValue = 0;
 
   for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
     if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
-      if (RI != TheRI) {
+      if (RI != IgnoreRI) {
         Value *RetOp = RI->getOperand(0);
 
         // We can only perform this transformation if the value returned is
@@ -404,7 +404,7 @@
   if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
       !isa<UndefValue>(Ret->getReturnValue()) &&
       AccumulatorRecursionEliminationInitVal == 0 &&
-      !getCommonReturnValue(Ret, CI))
+      !getCommonReturnValue(0, CI))
     return false;
 
   // OK! We can transform this tail call.  If this is the first one found,
@@ -454,8 +454,8 @@
   // Ok, now that we know we have a pseudo-entry block WITH all of the
   // required PHI nodes, add entries into the PHI node for the actual
   // parameters passed into the tail-recursive call.
-  for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
-    ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
+  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+    ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
 
   // If we are introducing an accumulator variable to eliminate the recursion,
   // do so now.  Note that we _know_ that no subsequent tail recursion

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp Fri Jul  2 04:57:13 2010
@@ -381,29 +381,28 @@
                                     const TargetLowering &TLI) {
   std::vector<InlineAsm::ConstraintInfo>
   Constraints = IA->ParseConstraints();
-  
-  unsigned ArgNo = 1;   // ArgNo - The operand of the CallInst.
+
+  unsigned ArgNo = 0;   // The argument of the CallInst.
   for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-    
+
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
       case InlineAsm::isOutput:
         if (OpInfo.isIndirect)
-          OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+          OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
         break;
       case InlineAsm::isInput:
-        OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+        OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
         break;
       case InlineAsm::isClobber:
         // Nothing to do.
         break;
     }
-    
+
     // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue(),
-                             OpInfo.ConstraintType == TargetLowering::C_Memory);
-    
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
     // If this asm operand is our Value*, and if it isn't an indirect memory
     // operand, we can't fold it!
     if (OpInfo.CallOperandVal == OpVal &&
@@ -411,7 +410,7 @@
          !OpInfo.isIndirect))
       return false;
   }
-  
+
   return true;
 }
 
@@ -450,7 +449,7 @@
     
     if (CallInst *CI = dyn_cast<CallInst>(U)) {
       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
-      if (IA == 0) return true;
+      if (!IA) return true;
       
       // If this is a memory operand, we're cool, otherwise bail out.
       if (!IsOperandAMemoryOperand(CI, IA, I, TLI))

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/BuildLibCalls.cpp Fri Jul  2 04:57:13 2010
@@ -420,11 +420,11 @@
         FT->getParamType(2) != TD->getIntPtrType(Context) ||
         FT->getParamType(3) != TD->getIntPtrType(Context))
       return false;
-    
-    if (isFoldable(4, 3, false)) {
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+
+    if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
                  1, false, B, TD);
-      replaceCall(CI->getOperand(1));
+      replaceCall(CI->getArgOperand(0));
       return true;
     }
     return false;
@@ -443,11 +443,11 @@
         FT->getParamType(2) != TD->getIntPtrType(Context) ||
         FT->getParamType(3) != TD->getIntPtrType(Context))
       return false;
-    
-    if (isFoldable(4, 3, false)) {
-      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+
+    if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+      EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
                   1, false, B, TD);
-      replaceCall(CI->getOperand(1));
+      replaceCall(CI->getArgOperand(0));
       return true;
     }
     return false;
@@ -461,12 +461,12 @@
         FT->getParamType(2) != TD->getIntPtrType(Context) ||
         FT->getParamType(3) != TD->getIntPtrType(Context))
       return false;
-    
-    if (isFoldable(4, 3, false)) {
-      Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
+
+    if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+      Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
                                    false);
-      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), false, B, TD);
-      replaceCall(CI->getOperand(1));
+      EmitMemSet(CI->getArgOperand(0), Val,  CI->getArgOperand(2), false, B, TD);
+      replaceCall(CI->getArgOperand(0));
       return true;
     }
     return false;
@@ -487,8 +487,8 @@
     // st[rp]cpy_chk call which may fail at runtime if the size is too long.
     // TODO: It might be nice to get a maximum length out of the possible
     // string lengths for varying.
-    if (isFoldable(3, 2, true)) {
-      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD,
+    if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) {
+      Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
                               Name.substr(2, 6));
       replaceCall(Ret);
       return true;
@@ -504,10 +504,10 @@
         !FT->getParamType(2)->isIntegerTy() ||
         FT->getParamType(3) != TD->getIntPtrType(Context))
       return false;
-    
-    if (isFoldable(4, 3, false)) {
-      Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
-                               CI->getOperand(3), B, TD, Name.substr(2, 7));
+
+    if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+      Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TD, Name.substr(2, 7));
       replaceCall(Ret);
       return true;
     }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp Fri Jul  2 04:57:13 2010
@@ -32,7 +32,7 @@
 
 // CloneBasicBlock - See comments in Cloning.h
 BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
-                                  DenseMap<const Value*, Value*> &ValueMap,
+                                  ValueToValueMapTy &VMap,
                                   const Twine &NameSuffix, Function *F,
                                   ClonedCodeInfo *CodeInfo) {
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
@@ -47,7 +47,7 @@
     if (II->hasName())
       NewInst->setName(II->getName()+NameSuffix);
     NewBB->getInstList().push_back(NewInst);
-    ValueMap[II] = NewInst;                // Add instruction map to value.
+    VMap[II] = NewInst;                // Add instruction map to value.
     
     hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -72,7 +72,7 @@
 // ArgMap values.
 //
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                             DenseMap<const Value*, Value*> &ValueMap,
+                             ValueToValueMapTy &VMap,
                              SmallVectorImpl<ReturnInst*> &Returns,
                              const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
   assert(NameSuffix && "NameSuffix cannot be null!");
@@ -80,17 +80,17 @@
 #ifndef NDEBUG
   for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
        E = OldFunc->arg_end(); I != E; ++I)
-    assert(ValueMap.count(I) && "No mapping from source argument specified!");
+    assert(VMap.count(I) && "No mapping from source argument specified!");
 #endif
 
   // Clone any attributes.
   if (NewFunc->arg_size() == OldFunc->arg_size())
     NewFunc->copyAttributesFrom(OldFunc);
   else {
-    //Some arguments were deleted with the ValueMap. Copy arguments one by one
+    //Some arguments were deleted with the VMap. Copy arguments one by one
     for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
            E = OldFunc->arg_end(); I != E; ++I)
-      if (Argument* Anew = dyn_cast<Argument>(ValueMap[I]))
+      if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
         Anew->addAttr( OldFunc->getAttributes()
                        .getParamAttributes(I->getArgNo() + 1));
     NewFunc->setAttributes(NewFunc->getAttributes()
@@ -111,43 +111,43 @@
     const BasicBlock &BB = *BI;
 
     // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc,
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
                                       CodeInfo);
-    ValueMap[&BB] = CBB;                       // Add basic block mapping.
+    VMap[&BB] = CBB;                       // Add basic block mapping.
 
     if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
       Returns.push_back(RI);
   }
 
   // Loop over all of the instructions in the function, fixing up operand
-  // references as we go.  This uses ValueMap to do all the hard work.
+  // references as we go.  This uses VMap to do all the hard work.
   //
-  for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]),
+  for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
          BE = NewFunc->end(); BB != BE; ++BB)
     // Loop over all instructions, fixing each one as we find it...
     for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
-      RemapInstruction(II, ValueMap);
+      RemapInstruction(II, VMap);
 }
 
 /// CloneFunction - Return a copy of the specified function, but without
 /// embedding the function into another module.  Also, any references specified
-/// in the ValueMap are changed to refer to their mapped value instead of the
-/// original one.  If any of the arguments to the function are in the ValueMap,
-/// the arguments are deleted from the resultant function.  The ValueMap is
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one.  If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function.  The VMap is
 /// updated to include mappings from all of the instructions and basicblocks in
 /// the function from their old to new values.
 ///
 Function *llvm::CloneFunction(const Function *F,
-                              DenseMap<const Value*, Value*> &ValueMap,
+                              ValueToValueMapTy &VMap,
                               ClonedCodeInfo *CodeInfo) {
   std::vector<const Type*> ArgTypes;
 
   // The user might be deleting arguments to the function by specifying them in
-  // the ValueMap.  If so, we need to not add the arguments to the arg ty vector
+  // the VMap.  If so, we need to not add the arguments to the arg ty vector
   //
   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
        I != E; ++I)
-    if (ValueMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
+    if (VMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
       ArgTypes.push_back(I->getType());
 
   // Create a new function type...
@@ -161,13 +161,13 @@
   Function::arg_iterator DestI = NewF->arg_begin();
   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
        I != E; ++I)
-    if (ValueMap.count(I) == 0) {   // Is this argument preserved?
+    if (VMap.count(I) == 0) {   // Is this argument preserved?
       DestI->setName(I->getName()); // Copy the name over...
-      ValueMap[I] = DestI++;        // Add mapping to ValueMap
+      VMap[I] = DestI++;        // Add mapping to VMap
     }
 
   SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
-  CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
+  CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
   return NewF;
 }
 
@@ -179,19 +179,19 @@
   struct PruningFunctionCloner {
     Function *NewFunc;
     const Function *OldFunc;
-    DenseMap<const Value*, Value*> &ValueMap;
+    ValueToValueMapTy &VMap;
     SmallVectorImpl<ReturnInst*> &Returns;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
     const TargetData *TD;
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
-                          DenseMap<const Value*, Value*> &valueMap,
+                          ValueToValueMapTy &valueMap,
                           SmallVectorImpl<ReturnInst*> &returns,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
-    : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns),
+    : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
       NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
     }
 
@@ -202,7 +202,7 @@
     
   public:
     /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-    /// mapping its operands through ValueMap if they are available.
+    /// mapping its operands through VMap if they are available.
     Constant *ConstantFoldMappedInstruction(const Instruction *I);
   };
 }
@@ -211,7 +211,7 @@
 /// anything that it can reach.
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                        std::vector<const BasicBlock*> &ToClone){
-  Value *&BBEntry = ValueMap[BB];
+  Value *&BBEntry = VMap[BB];
 
   // Have we already cloned this block?
   if (BBEntry) return;
@@ -230,7 +230,7 @@
     // If this instruction constant folds, don't bother cloning the instruction,
     // instead, just add the constant to the value map.
     if (Constant *C = ConstantFoldMappedInstruction(II)) {
-      ValueMap[II] = C;
+      VMap[II] = C;
       continue;
     }
 
@@ -238,7 +238,7 @@
     if (II->hasName())
       NewInst->setName(II->getName()+NameSuffix);
     NewBB->getInstList().push_back(NewInst);
-    ValueMap[II] = NewInst;                // Add instruction map to value.
+    VMap[II] = NewInst;                // Add instruction map to value.
     
     hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -258,12 +258,12 @@
       ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
       // Or is a known constant in the caller...
       if (Cond == 0)  
-        Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]);
+        Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
 
       // Constant fold to uncond branch!
       if (Cond) {
         BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
-        ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+        VMap[OldTI] = BranchInst::Create(Dest, NewBB);
         ToClone.push_back(Dest);
         TerminatorDone = true;
       }
@@ -272,10 +272,10 @@
     // If switching on a value known constant in the caller.
     ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
     if (Cond == 0)  // Or known constant after constant prop in the callee...
-      Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]);
+      Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
     if (Cond) {     // Constant fold to uncond branch!
       BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
-      ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+      VMap[OldTI] = BranchInst::Create(Dest, NewBB);
       ToClone.push_back(Dest);
       TerminatorDone = true;
     }
@@ -286,7 +286,7 @@
     if (OldTI->hasName())
       NewInst->setName(OldTI->getName()+NameSuffix);
     NewBB->getInstList().push_back(NewInst);
-    ValueMap[OldTI] = NewInst;             // Add instruction map to value.
+    VMap[OldTI] = NewInst;             // Add instruction map to value.
     
     // Recursively clone any reachable successor blocks.
     const TerminatorInst *TI = BB->getTerminator();
@@ -307,13 +307,13 @@
 }
 
 /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-/// mapping its operands through ValueMap if they are available.
+/// mapping its operands through VMap if they are available.
 Constant *PruningFunctionCloner::
 ConstantFoldMappedInstruction(const Instruction *I) {
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           ValueMap)))
+                                                           VMap)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
@@ -363,7 +363,7 @@
 /// dead.  Since this doesn't produce an exact copy of the input, it can't be
 /// used for things like CloneFunction or CloneModule.
 void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                                     DenseMap<const Value*, Value*> &ValueMap,
+                                     ValueToValueMapTy &VMap,
                                      SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
@@ -374,10 +374,10 @@
 #ifndef NDEBUG
   for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
        E = OldFunc->arg_end(); II != E; ++II)
-    assert(ValueMap.count(II) && "No mapping from source argument specified!");
+    assert(VMap.count(II) && "No mapping from source argument specified!");
 #endif
 
-  PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns,
+  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
                             NameSuffix, CodeInfo, TD);
 
   // Clone the entry block, and anything recursively reachable from it.
@@ -397,14 +397,14 @@
   SmallVector<const PHINode*, 16> PHIToResolve;
   for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
        BI != BE; ++BI) {
-    BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
     if (NewBB == 0) continue;  // Dead block.
 
     // Add the new block to the new function.
     NewFunc->getBasicBlockList().push_back(NewBB);
     
     // Loop over all of the instructions in the block, fixing up operand
-    // references as we go.  This uses ValueMap to do all the hard work.
+    // references as we go.  This uses VMap to do all the hard work.
     //
     BasicBlock::iterator I = NewBB->begin();
 
@@ -455,7 +455,7 @@
           I->setMetadata(DbgKind, 0);
         }
       }
-      RemapInstruction(I, ValueMap);
+      RemapInstruction(I, VMap);
     }
   }
   
@@ -465,19 +465,19 @@
     const PHINode *OPN = PHIToResolve[phino];
     unsigned NumPreds = OPN->getNumIncomingValues();
     const BasicBlock *OldBB = OPN->getParent();
-    BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]);
+    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
 
     // Map operands for blocks that are live and remove operands for blocks
     // that are dead.
     for (; phino != PHIToResolve.size() &&
          PHIToResolve[phino]->getParent() == OldBB; ++phino) {
       OPN = PHIToResolve[phino];
-      PHINode *PN = cast<PHINode>(ValueMap[OPN]);
+      PHINode *PN = cast<PHINode>(VMap[OPN]);
       for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
         if (BasicBlock *MappedBlock = 
-            cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
+            cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
           Value *InVal = MapValue(PN->getIncomingValue(pred),
-                                  ValueMap);
+                                  VMap);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);
@@ -531,15 +531,15 @@
       while ((PN = dyn_cast<PHINode>(I++))) {
         Value *NV = UndefValue::get(PN->getType());
         PN->replaceAllUsesWith(NV);
-        assert(ValueMap[OldI] == PN && "ValueMap mismatch");
-        ValueMap[OldI] = NV;
+        assert(VMap[OldI] == PN && "VMap mismatch");
+        VMap[OldI] = NV;
         PN->eraseFromParent();
         ++OldI;
       }
     }
     // NOTE: We cannot eliminate single entry phi nodes here, because of
-    // ValueMap.  Single entry phi nodes can have multiple ValueMap entries
-    // pointing at them.  Thus, deleting one would require scanning the ValueMap
+    // VMap.  Single entry phi nodes can have multiple VMap entries
+    // pointing at them.  Thus, deleting one would require scanning the VMap
     // to update any entries in it that would require that.  This would be
     // really slow.
   }
@@ -548,14 +548,14 @@
   // and zap unconditional fall-through branches.  This happen all the time when
   // specializing code: code specialization turns conditional branches into
   // uncond branches, and this code folds them.
-  Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]);
+  Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
   while (I != NewFunc->end()) {
     BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
     if (!BI || BI->isConditional()) { ++I; continue; }
     
     // Note that we can't eliminate uncond branches if the destination has
     // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
-    // require scanning the ValueMap to update any entries that point to the phi
+    // require scanning the VMap to update any entries that point to the phi
     // node.
     BasicBlock *Dest = BI->getSuccessor(0);
     if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp Fri Jul  2 04:57:13 2010
@@ -15,7 +15,6 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/ADT/DenseMap.h"
 
 
 using namespace llvm;
@@ -23,13 +22,13 @@
 /// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
 /// dominance info. It is expected that basic block is already cloned.
 static void CloneDominatorInfo(BasicBlock *BB, 
-                               DenseMap<const Value *, Value *> &ValueMap,
+                               ValueMap<const Value *, Value *> &VMap,
                                DominatorTree *DT,
                                DominanceFrontier *DF) {
 
   assert (DT && "DominatorTree is not available");
-  DenseMap<const Value *, Value*>::iterator BI = ValueMap.find(BB);
-  assert (BI != ValueMap.end() && "BasicBlock clone is missing");
+  ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+  assert (BI != VMap.end() && "BasicBlock clone is missing");
   BasicBlock *NewBB = cast<BasicBlock>(BI->second);
 
   // NewBB already got dominator info.
@@ -43,11 +42,11 @@
 
   // NewBB's dominator is either BB's dominator or BB's dominator's clone.
   BasicBlock *NewBBDom = BBDom;
-  DenseMap<const Value *, Value*>::iterator BBDomI = ValueMap.find(BBDom);
-  if (BBDomI != ValueMap.end()) {
+  ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+  if (BBDomI != VMap.end()) {
     NewBBDom = cast<BasicBlock>(BBDomI->second);
     if (!DT->getNode(NewBBDom))
-      CloneDominatorInfo(BBDom, ValueMap, DT, DF);
+      CloneDominatorInfo(BBDom, VMap, DT, DF);
   }
   DT->addNewBlock(NewBB, NewBBDom);
 
@@ -60,8 +59,8 @@
         for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
              I != E; ++I) {
           BasicBlock *DB = *I;
-          DenseMap<const Value*, Value*>::iterator IDM = ValueMap.find(DB);
-          if (IDM != ValueMap.end())
+          ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
+          if (IDM != VMap.end())
             NewDFSet.insert(cast<BasicBlock>(IDM->second));
           else
             NewDFSet.insert(DB);
@@ -71,10 +70,10 @@
   }
 }
 
-/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap
+/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
 /// using old blocks to new blocks mapping.
 Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
-                      DenseMap<const Value *, Value *> &ValueMap, Pass *P) {
+                      ValueMap<const Value *, Value *> &VMap, Pass *P) {
   
   DominatorTree *DT = NULL;
   DominanceFrontier *DF = NULL;
@@ -104,8 +103,8 @@
     for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
          I != E; ++I) {
       BasicBlock *BB = *I;
-      BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone");
-      ValueMap[BB] = NewBB;
+      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
+      VMap[BB] = NewBB;
       if (P)
         LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
       NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
@@ -117,7 +116,7 @@
       for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
            I != E; ++I) {
         BasicBlock *BB = *I;
-        CloneDominatorInfo(BB, ValueMap, DT, DF);
+        CloneDominatorInfo(BB, VMap, DT, DF);
       }
 
     // Process sub loops
@@ -125,7 +124,7 @@
       LoopNest.push_back(*I);
   } while (!LoopNest.empty());
 
-  // Remap instructions to reference operands from ValueMap.
+  // Remap instructions to reference operands from VMap.
   for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), 
         NBE = NewBlocks.end();  NBItr != NBE; ++NBItr) {
     BasicBlock *NB = *NBItr;
@@ -135,8 +134,8 @@
       for (unsigned index = 0, num_ops = Insn->getNumOperands(); 
            index != num_ops; ++index) {
         Value *Op = Insn->getOperand(index);
-        DenseMap<const Value *, Value *>::iterator OpItr = ValueMap.find(Op);
-        if (OpItr != ValueMap.end())
+        ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+        if (OpItr != VMap.end())
           Insn->setOperand(index, OpItr->second);
       }
     }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp Fri Jul  2 04:57:13 2010
@@ -28,12 +28,12 @@
 Module *llvm::CloneModule(const Module *M) {
   // Create the value map that maps things from the old module over to the new
   // module.
-  DenseMap<const Value*, Value*> ValueMap;
-  return CloneModule(M, ValueMap);
+  ValueToValueMapTy VMap;
+  return CloneModule(M, VMap);
 }
 
 Module *llvm::CloneModule(const Module *M,
-                          DenseMap<const Value*, Value*> &ValueMap) {
+                          ValueToValueMapTy &VMap) {
   // First off, we need to create the new module...
   Module *New = new Module(M->getModuleIdentifier(), M->getContext());
   New->setDataLayout(M->getDataLayout());
@@ -51,7 +51,7 @@
     New->addLibrary(*I);
 
   // Loop over all of the global variables, making corresponding globals in the
-  // new module.  Here we add them to the ValueMap and to the new Module.  We
+  // new module.  Here we add them to the VMap and to the new Module.  We
   // don't worry about attributes or initializers, they will come later.
   //
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
@@ -62,7 +62,7 @@
                                             GlobalValue::ExternalLinkage, 0,
                                             I->getName());
     GV->setAlignment(I->getAlignment());
-    ValueMap[I] = GV;
+    VMap[I] = GV;
   }
 
   // Loop over the functions in the module, making external functions as before
@@ -71,13 +71,13 @@
       Function::Create(cast<FunctionType>(I->getType()->getElementType()),
                        GlobalValue::ExternalLinkage, I->getName(), New);
     NF->copyAttributesFrom(I);
-    ValueMap[I] = NF;
+    VMap[I] = NF;
   }
 
   // Loop over the aliases in the module
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I)
-    ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+    VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
                                   I->getName(), NULL, New);
   
   // Now that all of the things that global variable initializer can refer to
@@ -86,10 +86,10 @@
   //
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I) {
-    GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
+    GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 ValueMap)));
+                                                 VMap)));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -98,17 +98,17 @@
   // Similarly, copy over function bodies now...
   //
   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
-    Function *F = cast<Function>(ValueMap[I]);
+    Function *F = cast<Function>(VMap[I]);
     if (!I->isDeclaration()) {
       Function::arg_iterator DestI = F->arg_begin();
       for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
            ++J) {
         DestI->setName(J->getName());
-        ValueMap[J] = DestI++;
+        VMap[J] = DestI++;
       }
 
       SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
-      CloneFunctionInto(F, I, ValueMap, Returns);
+      CloneFunctionInto(F, I, VMap, Returns);
     }
 
     F->setLinkage(I->getLinkage());
@@ -117,11 +117,37 @@
   // And aliases
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I) {
-    GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
+    GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+      GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
   }
-  
+
+  // And named metadata....
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+         E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode &NMD = *I;
+    SmallVector<MDNode*, 4> MDs;
+    for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+      MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
+    NamedMDNode::Create(New->getContext(), NMD.getName(),
+                        MDs.data(), MDs.size(), New);
+  }
+
+  // Update metadata attach with instructions.
+  for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)   
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); 
+         FI != FE; ++FI)
+      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
+           BI != BE; ++BI) {
+        SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
+        BI->getAllMetadata(MDs);
+        for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator 
+               MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
+          Value *MappedValue = MapValue(MDI->second, VMap);
+          if (MDI->second != MappedValue && MappedValue)
+            BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
+        }
+      }
   return New;
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp Fri Jul  2 04:57:13 2010
@@ -63,7 +63,8 @@
     
     // Next, create the new invoke instruction, inserting it at the end
     // of the old basic block.
-    SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+    ImmutableCallSite CS(CI);
+    SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
     InvokeInst *II =
       InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
                          0, 0, 0, 0, // EH-FIXME!
@@ -170,7 +171,7 @@
 /// some edges of the callgraph may remain.
 static void UpdateCallGraphAfterInlining(CallSite CS,
                                          Function::iterator FirstNewBlock,
-                                       DenseMap<const Value*, Value*> &ValueMap,
+                                         ValueMap<const Value*, Value*> &VMap,
                                          InlineFunctionInfo &IFI) {
   CallGraph &CG = *IFI.CG;
   const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -193,9 +194,9 @@
   for (; I != E; ++I) {
     const Value *OrigCall = I->first;
 
-    DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
+    ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
-    if (VMI == ValueMap.end() || VMI->second == 0)
+    if (VMI == VMap.end() || VMI->second == 0)
       continue;
     
     // If the call was inlined, but then constant folded, there is no edge to
@@ -286,8 +287,8 @@
   ClonedCodeInfo InlinedFunctionInfo;
   Function::iterator FirstNewBlock;
 
-  { // Scope to destroy ValueMap after cloning.
-    DenseMap<const Value*, Value*> ValueMap;
+  { // Scope to destroy VMap after cloning.
+    ValueMap<const Value*, Value*> VMap;
 
     assert(CalledFunc->arg_size() == CS.arg_size() &&
            "No varargs calls can be inlined!");
@@ -358,14 +359,14 @@
         MustClearTailCallFlags = true;
       }
 
-      ValueMap[I] = ActualArg;
+      VMap[I] = ActualArg;
     }
 
     // We want the inliner to prune the code as it copies.  We would LOVE to
     // have no dead or constant instructions leftover after inlining occurs
     // (which can happen, e.g., because an argument was constant), but we'll be
     // happy with whatever the cloner can do.
-    CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
+    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
                               &InlinedFunctionInfo, IFI.TD, TheCall);
 
     // Remember the first block that is newly cloned over.
@@ -373,7 +374,7 @@
 
     // Update the callgraph if requested.
     if (IFI.CG)
-      UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI);
+      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
   }
 
   // If there are any alloca instructions in the block that used to be the entry

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp Fri Jul  2 04:57:13 2010
@@ -192,7 +192,7 @@
   if (!Preheader) {
     Preheader = InsertPreheaderForLoop(L);
     if (Preheader) {
-      NumInserted++;
+      ++NumInserted;
       Changed = true;
     }
   }
@@ -215,7 +215,7 @@
       // allowed.
       if (!L->contains(*PI)) {
         if (RewriteLoopExitBlock(L, ExitBlock)) {
-          NumInserted++;
+          ++NumInserted;
           Changed = true;
         }
         break;
@@ -244,7 +244,7 @@
     // loop header.
     LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
     if (LoopLatch) {
-      NumInserted++;
+      ++NumInserted;
       Changed = true;
     }
   }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp Fri Jul  2 04:57:13 2010
@@ -37,13 +37,13 @@
 STATISTIC(NumUnrolled,    "Number of loops unrolled (completely or otherwise)");
 
 /// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by ValueMap.
+/// current values into those specified by VMap.
 static inline void RemapInstruction(Instruction *I,
-                                    DenseMap<const Value *, Value*> &ValueMap) {
+                                    ValueMap<const Value *, Value*> &VMap) {
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
-    DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
-    if (It != ValueMap.end())
+    ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+    if (It != VMap.end())
       I->setOperand(op, It->second);
   }
 }
@@ -183,7 +183,7 @@
 
   // For the first iteration of the loop, we should use the precloned values for
   // PHI nodes.  Insert associations now.
-  typedef DenseMap<const Value*, Value*> ValueToValueMapTy;
+  typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
   ValueToValueMapTy LastValueMap;
   std::vector<PHINode*> OrigPHINode;
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -205,26 +205,26 @@
     
     for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
          E = LoopBlocks.end(); BB != E; ++BB) {
-      ValueToValueMapTy ValueMap;
-      BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It));
+      ValueToValueMapTy VMap;
+      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
       Header->getParent()->getBasicBlockList().push_back(New);
 
       // Loop over all of the PHI nodes in the block, changing them to use the
       // incoming values from the previous block.
       if (*BB == Header)
         for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
-          PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
+          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
           Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
           if (Instruction *InValI = dyn_cast<Instruction>(InVal))
             if (It > 1 && L->contains(InValI))
               InVal = LastValueMap[InValI];
-          ValueMap[OrigPHINode[i]] = InVal;
+          VMap[OrigPHINode[i]] = InVal;
           New->getInstList().erase(NewPHI);
         }
 
       // Update our running map of newest clones
       LastValueMap[*BB] = New;
-      for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
+      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
            VI != VE; ++VI)
         LastValueMap[VI->first] = VI->second;
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp Fri Jul  2 04:57:13 2010
@@ -310,15 +310,15 @@
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
        AI != E; ++AI) {
     const Type *Ty = AI->getType();
-    // StructType can't be cast, but is a legal argument type, so we have
+    // Aggregate types can't be cast, but are legal argument types, so we have
     // to handle them differently. We use an extract/insert pair as a
     // lightweight method to achieve the same goal.
-    if (isa<StructType>(Ty)) {
-      Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsertPt);
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
       Instruction *NI = InsertValueInst::Create(AI, EI, 0);
       NI->insertAfter(EI);
       AI->replaceAllUsesWith(NI);
-      // Set the struct operand of the instructions back to the AllocaInst.
+      // Set the operand of the instructions back to the AllocaInst.
       EI->setOperand(0, AI);
       NI->setOperand(0, AI);
     } else {

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Fri Jul  2 04:57:13 2010
@@ -603,9 +603,8 @@
   // To determine liveness, we must iterate through the predecessors of blocks
   // where the def is live.  Blocks are added to the worklist if we need to
   // check their predecessors.  Start with all the using blocks.
-  SmallVector<BasicBlock*, 64> LiveInBlockWorklist;
-  LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), 
-                             Info.UsingBlocks.begin(), Info.UsingBlocks.end());
+  SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+                                                   Info.UsingBlocks.end());
   
   // If any of the using blocks is also a definition block, check to see if the
   // definition occurs before or after the use.  If it happens before the use,

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp Fri Jul  2 04:57:13 2010
@@ -28,7 +28,7 @@
   // DenseMap.  This includes any recursive calls to MapValue.
 
   // Global values and non-function-local metadata do not need to be seeded into
-  // the ValueMap if they are using the identity mapping.
+  // the VM if they are using the identity mapping.
   if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
       (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
     return VMSlot = const_cast<Value*>(V);
@@ -125,11 +125,11 @@
 }
 
 /// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by ValueMap.
+/// current values into those specified by VMap.
 ///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, ValueMap);
+    Value *V = MapValue(*op, VMap);
     assert(V && "Referenced value not in value map!");
     *op = V;
   }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h Fri Jul  2 04:57:13 2010
@@ -15,12 +15,12 @@
 #ifndef VALUEMAPPER_H
 #define VALUEMAPPER_H
 
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
 
 namespace llvm {
   class Value;
   class Instruction;
-  typedef DenseMap<const Value *, Value *> ValueToValueMapTy;
+  typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
 
   Value *MapValue(const Value *V, ValueToValueMapTy &VM);
   void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);

Modified: llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp Fri Jul  2 04:57:13 2010
@@ -1419,6 +1419,9 @@
   case GlobalValue::ExternalLinkage: break;
   case GlobalValue::PrivateLinkage:       Out << "private ";        break;
   case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    Out << "linker_private_weak ";
+    break;
   case GlobalValue::InternalLinkage:      Out << "internal ";       break;
   case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
   case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
@@ -1854,6 +1857,7 @@
     default: Out << " cc" << CI->getCallingConv(); break;
     }
 
+    Operand = CI->getCalledValue();
     const PointerType    *PTy = cast<PointerType>(Operand->getType());
     const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
     const Type         *RetTy = FTy->getReturnType();
@@ -1877,10 +1881,10 @@
       writeOperand(Operand, true);
     }
     Out << '(';
-    for (unsigned op = 1, Eop = I.getNumOperands(); op < Eop; ++op) {
-      if (op > 1)
+    for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+      if (op > 0)
         Out << ", ";
-      writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op));
+      writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
     }
     Out << ')';
     if (PAL.getFnAttributes() != Attribute::None)
@@ -1925,10 +1929,10 @@
       writeOperand(Operand, true);
     }
     Out << '(';
-    for (unsigned op = 0, Eop = I.getNumOperands() - 4; op < Eop; ++op) {
+    for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
       if (op)
         Out << ", ";
-      writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1));
+      writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
     }
 
     Out << ')';

Modified: llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/AutoUpgrade.cpp Fri Jul  2 04:57:13 2010
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/IRBuilder.h"
 #include <cstring>
@@ -314,7 +315,8 @@
 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   Function *F = CI->getCalledFunction();
   LLVMContext &C = CI->getContext();
-  
+  ImmutableCallSite CS(CI);
+
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
@@ -344,11 +346,11 @@
     if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
         isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
       std::vector<Constant*> Idxs;
-      Value *Op0 = CI->getOperand(1);
+      Value *Op0 = CI->getArgOperand(0);
       ShuffleVectorInst *SI = NULL;
       if (isLoadH || isLoadL) {
         Value *Op1 = UndefValue::get(Op0->getType());
-        Value *Addr = new BitCastInst(CI->getOperand(2), 
+        Value *Addr = new BitCastInst(CI->getArgOperand(1), 
                                   Type::getDoublePtrTy(C),
                                       "upgraded.", CI);
         Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
@@ -381,7 +383,7 @@
         SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
       } else if (isMovSD ||
                  isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-        Value *Op1 = CI->getOperand(2);
+        Value *Op1 = CI->getArgOperand(1);
         if (isMovSD) {
           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
@@ -395,8 +397,8 @@
         Value *Mask = ConstantVector::get(Idxs);
         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
       } else if (isShufPD) {
-        Value *Op1 = CI->getOperand(2);
-        unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue();
+        Value *Op1 = CI->getArgOperand(1);
+        unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
                                                ((MaskVal >> 1) & 1)+2));
@@ -416,8 +418,8 @@
       CI->eraseFromParent();
     } else if (F->getName() == "llvm.x86.sse41.pmulld") {
       // Upgrade this set of intrinsics into vector multiplies.
-      Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
-                                                   CI->getOperand(2),
+      Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
+                                                   CI->getArgOperand(1),
                                                    CI->getName(),
                                                    CI);
       // Fix up all the uses with our new multiply.
@@ -427,9 +429,9 @@
       // Remove upgraded multiply.
       CI->eraseFromParent();
     } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
-      Value *Op1 = CI->getOperand(1);
-      Value *Op2 = CI->getOperand(2);
-      Value *Op3 = CI->getOperand(3);
+      Value *Op1 = CI->getArgOperand(0);
+      Value *Op2 = CI->getArgOperand(1);
+      Value *Op3 = CI->getArgOperand(2);
       unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
       Value *Rep;
       IRBuilder<> Builder(C);
@@ -483,9 +485,9 @@
       CI->eraseFromParent();
       
     } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
-      Value *Op1 = CI->getOperand(1);
-      Value *Op2 = CI->getOperand(2);
-      Value *Op3 = CI->getOperand(3);
+      Value *Op1 = CI->getArgOperand(0);
+      Value *Op2 = CI->getArgOperand(1);
+      Value *Op3 = CI->getArgOperand(2);
       unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
       Value *Rep;
       IRBuilder<> Builder(C);
@@ -556,10 +558,10 @@
   case Intrinsic::x86_mmx_psrl_w: {
     Value *Operands[2];
     
-    Operands[0] = CI->getOperand(1);
+    Operands[0] = CI->getArgOperand(0);
     
     // Cast the second parameter to the correct type.
-    BitCastInst *BC = new BitCastInst(CI->getOperand(2), 
+    BitCastInst *BC = new BitCastInst(CI->getArgOperand(1), 
                                       NewFn->getFunctionType()->getParamType(1),
                                       "upgraded.", CI);
     Operands[1] = BC;
@@ -583,9 +585,8 @@
   case Intrinsic::ctlz:
   case Intrinsic::ctpop:
   case Intrinsic::cttz: {
-    //  Build a small vector of the 1..(N-1) operands, which are the 
-    //  parameters.
-    SmallVector<Value*, 8> Operands(CI->op_begin()+1, CI->op_end());
+    //  Build a small vector of the original arguments.
+    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
 
     //  Construct a new CallInst
     CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
@@ -620,7 +621,7 @@
   case Intrinsic::eh_selector:
   case Intrinsic::eh_typeid_for: {
     // Only the return type changed.
-    SmallVector<Value*, 8> Operands(CI->op_begin() + 1, CI->op_end());
+    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
     CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
                                        "upgraded." + CI->getName(), CI);
     NewCI->setTailCall(CI->isTailCall());
@@ -643,8 +644,8 @@
   case Intrinsic::memset: {
     // Add isVolatile
     const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
-    Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2),
-                           CI->getOperand(3), CI->getOperand(4),
+    Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(2), CI->getArgOperand(3),
                            llvm::ConstantInt::get(I1Ty, 0) };
     CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
                                        CI->getName(), CI);
@@ -726,7 +727,8 @@
   if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
     if (!Declare->use_empty()) {
       DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
-      if (!isa<MDNode>(DDI->getOperand(1)) ||!isa<MDNode>(DDI->getOperand(2))) {
+      if (!isa<MDNode>(DDI->getArgOperand(0)) ||
+          !isa<MDNode>(DDI->getArgOperand(1))) {
         while (!Declare->use_empty()) {
           CallInst *CI = cast<CallInst>(Declare->use_back());
           CI->eraseFromParent();

Modified: llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/ConstantFold.cpp Fri Jul  2 04:57:13 2010
@@ -1817,8 +1817,15 @@
     return Constant::getAllOnesValue(ResultTy);
 
   // Handle some degenerate cases first
-  if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    // For EQ and NE, we can always pick a value for the undef to make the
+    // predicate pass or fail, so we can return undef.
+    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)))
+      return UndefValue::get(ResultTy);
+    // Otherwise, pick the same value as the non-undef operand, and fold
+    // it to true or false.
     return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+  }
 
   // No compile-time operations on this type yet.
   if (C1->getType()->isPPC_FP128Ty())
@@ -2194,7 +2201,7 @@
         }
 
         NewIndices.push_back(Combined);
-        NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx);
+        NewIndices.append(Idxs+1, Idxs+NumIdx);
         return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
           ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
                                                  &NewIndices[0],

Modified: llvm/branches/wendling/eh/lib/VMCore/Core.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Core.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Core.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Core.cpp Fri Jul  2 04:57:13 2010
@@ -1058,6 +1058,8 @@
     return LLVMPrivateLinkage;
   case GlobalValue::LinkerPrivateLinkage:
     return LLVMLinkerPrivateLinkage;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    return LLVMLinkerPrivateWeakLinkage;
   case GlobalValue::DLLImportLinkage:
     return LLVMDLLImportLinkage;
   case GlobalValue::DLLExportLinkage:
@@ -1108,6 +1110,9 @@
   case LLVMLinkerPrivateLinkage:
     GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
     break;
+  case LLVMLinkerPrivateWeakLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+    break;
   case LLVMDLLImportLinkage:
     GV->setLinkage(GlobalValue::DLLImportLinkage);
     break;

Modified: llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Instruction.cpp Fri Jul  2 04:57:13 2010
@@ -421,6 +421,7 @@
   case Store:
   case Ret:
   case Br:
+  case IndirectBr:
   case Switch:
   case Unwind:
   case Unreachable:

Modified: llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Instructions.cpp Fri Jul  2 04:57:13 2010
@@ -33,7 +33,9 @@
 User::op_iterator CallSite::getCallee() const {
   Instruction *II(getInstruction());
   return isCall()
-    ? cast<CallInst>(II)->op_begin()
+    ? (CallInst::ArgOffset
+       ? cast</*FIXME: CallInst*/User>(II)->op_begin()
+       : cast</*FIXME: CallInst*/User>(II)->op_end() - 1)
     : cast<InvokeInst>(II)->op_end() - 4; // Skip PersFn, BB, BB, Function
 }
 
@@ -231,8 +233,7 @@
 
 void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
   assert(NumOperands == NumParams+1 && "NumOperands not set up?");
-  Use *OL = OperandList;
-  OL[0] = Func;
+  Op<ArgOffset -1>() = Func;
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -245,16 +246,15 @@
     assert((i >= FTy->getNumParams() || 
             FTy->getParamType(i) == Params[i]->getType()) &&
            "Calling a function with a bad signature!");
-    OL[i+1] = Params[i];
+    OperandList[i + ArgOffset] = Params[i];
   }
 }
 
 void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
   assert(NumOperands == 3 && "NumOperands not set up?");
-  Use *OL = OperandList;
-  OL[0] = Func;
-  OL[1] = Actual1;
-  OL[2] = Actual2;
+  Op<ArgOffset -1>() = Func;
+  Op<ArgOffset + 0>() = Actual1;
+  Op<ArgOffset + 1>() = Actual2;
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -273,9 +273,8 @@
 
 void CallInst::init(Value *Func, Value *Actual) {
   assert(NumOperands == 2 && "NumOperands not set up?");
-  Use *OL = OperandList;
-  OL[0] = Func;
-  OL[1] = Actual;
+  Op<ArgOffset -1>() = Func;
+  Op<ArgOffset + 0>() = Actual;
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -291,8 +290,7 @@
 
 void CallInst::init(Value *Func) {
   assert(NumOperands == 1 && "NumOperands not set up?");
-  Use *OL = OperandList;
-  OL[0] = Func;
+  Op<ArgOffset -1>() = Func;
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -1479,7 +1477,7 @@
   Op<0>() = Agg;
   Op<1>() = Val;
 
-  Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+  Indices.append(Idx, Idx + NumIdx);
   setName(Name);
 }
 
@@ -1532,7 +1530,7 @@
                             const Twine &Name) {
   assert(NumOperands == 1 && "NumOperands not initialized?");
 
-  Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+  Indices.append(Idx, Idx + NumIdx);
   setName(Name);
 }
 

Modified: llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/IntrinsicInst.cpp Fri Jul  2 04:57:13 2010
@@ -54,7 +54,7 @@
 ///
 
 Value *DbgDeclareInst::getAddress() const {
-  if (MDNode* MD = cast_or_null<MDNode>(getOperand(1)))
+  if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
     return MD->getOperand(0);
   else
     return NULL;
@@ -65,9 +65,9 @@
 ///
 
 const Value *DbgValueInst::getValue() const {
-  return cast<MDNode>(getOperand(1))->getOperand(0);
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
 }
 
 Value *DbgValueInst::getValue() {
-  return cast<MDNode>(getOperand(1))->getOperand(0);
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
 }

Modified: llvm/branches/wendling/eh/lib/VMCore/Module.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Module.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Module.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Module.cpp Fri Jul  2 04:57:13 2010
@@ -312,15 +312,11 @@
 
 /// getNamedMetadata - Return the first NamedMDNode in the module with the
 /// specified name. This method returns null if a NamedMDNode with the 
-//// specified name is not found.
-NamedMDNode *Module::getNamedMetadata(StringRef Name) const {
-  return NamedMDSymTab->lookup(Name);
-}
-
-NamedMDNode *Module::getNamedMetadataUsingTwine(Twine Name) const {
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
   SmallString<256> NameData;
   StringRef NameRef = Name.toStringRef(NameData);
-   return NamedMDSymTab->lookup(NameRef);
+  return NamedMDSymTab->lookup(NameRef);
 }
 
 /// getOrInsertNamedMetadata - Return the first named MDNode in the module 

Modified: llvm/branches/wendling/eh/lib/VMCore/Pass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Pass.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Pass.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Pass.cpp Fri Jul  2 04:57:13 2010
@@ -35,6 +35,15 @@
 // Pass Implementation
 //
 
+Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) {
+  assert(pid && "pid cannot be 0");
+}
+
+Pass::Pass(PassKind K, const void *pid)
+  : Resolver(0), PassID((intptr_t)pid), Kind(K) {
+  assert(pid && "pid cannot be 0");
+}
+
 // Force out-of-line virtual method.
 Pass::~Pass() { 
   delete Resolver; 
@@ -92,6 +101,23 @@
   // By default, don't do anything.
 }
 
+void *Pass::getAdjustedAnalysisPointer(const PassInfo *) {
+  return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+  return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+  return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+  assert(!Resolver && "Resolver is already set");
+  Resolver = AR;
+}
+
 // print - Print out the internal state of the pass.  This is called by Analyze
 // to print out the contents of an analysis.  Otherwise it is not necessary to
 // implement this method.
@@ -364,6 +390,14 @@
   getPassRegistrar()->UnregisterPass(*this);
 }
 
+Pass *PassInfo::createPass() const {
+  assert((!isAnalysisGroup() || NormalCtor) &&
+         "No default implementation found for analysis group!");
+  assert(NormalCtor &&
+         "Cannot call createPass on PassInfo without default ctor!");
+  return NormalCtor();
+}
+
 //===----------------------------------------------------------------------===//
 //                  Analysis Group Implementation Code
 //===----------------------------------------------------------------------===//
@@ -467,4 +501,15 @@
   GetCFGOnlyPasses(Preserved).enumeratePasses();
 }
 
+AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) {
+  assert(ID && "Pass class not registered!");
+  Required.push_back(ID);
+  return *this;
+}
 
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) {
+  assert(ID && "Pass class not registered!");
+  Required.push_back(ID);
+  RequiredTransitive.push_back(ID);
+  return *this;
+}

Modified: llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/PassManager.cpp Fri Jul  2 04:57:13 2010
@@ -1147,6 +1147,11 @@
   llvm_unreachable("Unable to schedule pass");
 }
 
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) {
+  assert(0 && "Unable to find on the fly pass");
+  return NULL;
+}
+
 // Destructor
 PMDataManager::~PMDataManager() {
   for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(),

Modified: llvm/branches/wendling/eh/lib/VMCore/Value.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Value.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Value.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Value.cpp Fri Jul  2 04:57:13 2010
@@ -322,7 +322,13 @@
 Value *Value::stripPointerCasts() {
   if (!getType()->isPointerTy())
     return this;
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+
   Value *V = this;
+  Visited.insert(V);
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       if (!GEP->hasAllZeroIndices())
@@ -338,7 +344,9 @@
       return V;
     }
     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
-  } while (1);
+  } while (Visited.insert(V));
+
+  return V;
 }
 
 Value *Value::getUnderlyingObject(unsigned MaxLookup) {

Modified: llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/Verifier.cpp Fri Jul  2 04:57:13 2010
@@ -1641,16 +1641,16 @@
   default:
     break;
   case Intrinsic::dbg_declare: {  // llvm.dbg.declare
-    Assert1(CI.getOperand(1) && isa<MDNode>(CI.getOperand(1)),
+    Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
                 "invalid llvm.dbg.declare intrinsic call 1", &CI);
-    MDNode *MD = cast<MDNode>(CI.getOperand(1));
+    MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
     Assert1(MD->getNumOperands() == 1,
                 "invalid llvm.dbg.declare intrinsic call 2", &CI);
   } break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
   case Intrinsic::memset:
-    Assert1(isa<ConstantInt>(CI.getOperand(4)),
+    Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
             "alignment argument of memory intrinsics must be a constant int",
             &CI);
     break;
@@ -1659,10 +1659,10 @@
   case Intrinsic::gcread:
     if (ID == Intrinsic::gcroot) {
       AllocaInst *AI =
-        dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
+        dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
       Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
               "llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
-      Assert1(isa<Constant>(CI.getOperand(2)),
+      Assert1(isa<Constant>(CI.getArgOperand(1)),
               "llvm.gcroot parameter #2 must be a constant.", &CI);
     }
 
@@ -1670,32 +1670,32 @@
             "Enclosing function does not use GC.", &CI);
     break;
   case Intrinsic::init_trampoline:
-    Assert1(isa<Function>(CI.getOperand(2)->stripPointerCasts()),
+    Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
             "llvm.init_trampoline parameter #2 must resolve to a function.",
             &CI);
     break;
   case Intrinsic::prefetch:
-    Assert1(isa<ConstantInt>(CI.getOperand(2)) &&
-            isa<ConstantInt>(CI.getOperand(3)) &&
-            cast<ConstantInt>(CI.getOperand(2))->getZExtValue() < 2 &&
-            cast<ConstantInt>(CI.getOperand(3))->getZExtValue() < 4,
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+            isa<ConstantInt>(CI.getArgOperand(2)) &&
+            cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+            cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
             "invalid arguments to llvm.prefetch",
             &CI);
     break;
   case Intrinsic::stackprotector:
-    Assert1(isa<AllocaInst>(CI.getOperand(2)->stripPointerCasts()),
+    Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
             "llvm.stackprotector parameter #2 must resolve to an alloca.",
             &CI);
     break;
   case Intrinsic::lifetime_start:
   case Intrinsic::lifetime_end:
   case Intrinsic::invariant_start:
-    Assert1(isa<ConstantInt>(CI.getOperand(1)),
+    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
             "size argument of memory use markers must be a constant integer",
             &CI);
     break;
   case Intrinsic::invariant_end:
-    Assert1(isa<ConstantInt>(CI.getOperand(2)),
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
             "llvm.invariant.end parameter #2 must be a constant integer", &CI);
     break;
   }

Modified: llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll (original)
+++ llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/scev-aa.ll Fri Jul  2 04:57:13 2010
@@ -1,8 +1,9 @@
 ; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
 ; RUN:   |& FileCheck %s
 
-; At the time of this writing, -basicaa only misses the example of the form
-; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references.
+; At the time of this writing, -basicaa misses the example of the form
+; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references,
+; and the example of the form A[0] != A[i+1], where i+1 is known to be positive.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
 
@@ -189,6 +190,27 @@
   ret void
 }
 
+; TODO: This is theoretically provable to be NoAlias.
+; CHECK: Function: nonnegative: 2 pointers, 0 call sites
+; CHECK: MayAlias:  i64* %arrayidx, i64* %p
+
+define void @nonnegative(i64* %p) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+  %inc = add nsw i64 %i, 1                         ; <i64> [#uses=2]
+  %arrayidx = getelementptr inbounds i64* %p, i64 %inc
+  store i64 0, i64* %arrayidx
+  %tmp6 = load i64* %p                            ; <i64> [#uses=1]
+  %cmp = icmp slt i64 %inc, %tmp6                 ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
 ; CHECK: 13 no alias responses
-; CHECK: 26 may alias responses
+; CHECK: 27 may alias responses
 ; CHECK: 18 must alias responses

Modified: llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll (original)
+++ llvm/branches/wendling/eh/test/Analysis/ScalarEvolution/trip-count10.ll Fri Jul  2 04:57:13 2010
@@ -74,3 +74,53 @@
 return:
   ret void
 }
+
+; Trip counts for non-polynomial iterations. It's theoretically possible
+; to compute a maximum count for these, but short of that, ScalarEvolution
+; should return unknown.
+
+; PR7416
+; CHECK: Determining loop execution counts for: @nonpolynomial
+; CHECK-NEXT: Loop %loophead: Unpredictable backedge-taken count
+; CHECK-NEXT: Loop %loophead: Unpredictable max backedge-taken count
+
+declare i1 @g() nounwind
+
+define void @nonpolynomial() {
+entry:
+  br label %loophead
+loophead:
+  %x = phi i32 [0, %entry], [%x.1, %bb1], [%x.2, %bb2]
+  %y = icmp slt i32 %x, 100
+  br i1 %y, label %loopbody, label %retbb
+loopbody:
+  %z = call i1 @g()
+  br i1 %z, label %bb1, label %bb2
+bb1:
+  %x.1 = add i32 %x, 2
+  br label %loophead
+bb2:
+  %x.2 = add i32 %x, 3
+  br label %loophead
+retbb:
+  ret void
+}
+
+; PHI nodes with all constant operands.
+
+; CHECK: Determining loop execution counts for: @constant_phi_operands
+; CHECK: Loop %loop: backedge-taken count is 1
+; CHECK: Loop %loop: max backedge-taken count is 1
+
+define void @constant_phi_operands() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 1, %loop ], [ 0, %entry ]
+  %exitcond = icmp eq i64 %i, 1
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/2009-08-23-linkerprivate.ll Fri Jul  2 04:57:13 2010
@@ -2,7 +2,7 @@
 
 ; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
 
-@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
 
 ; CHECK: .globl l_objc_msgSend_fixup_alloc
 ; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/arm-returnaddr.ll Fri Jul  2 04:57:13 2010
@@ -1,11 +1,12 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
 define i8* @rt0(i32 %x) nounwind readnone {
 entry:
 ; CHECK: rt0:
+; CHECK: {r7, lr}
 ; CHECK: mov r0, lr
   %0 = tail call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
@@ -14,6 +15,7 @@
 define i8* @rt2() nounwind readnone {
 entry:
 ; CHECK: rt2:
+; CHECK: {r7, lr}
 ; CHECK: ldr r0, [r7]
 ; CHECK: ldr r0, [r0]
 ; CHECK: ldr r0, [r0, #4]

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/call-tc.ll Fri Jul  2 04:57:13 2010
@@ -7,22 +7,25 @@
 
 declare void @g(i32, i32, i32, i32)
 
-define void @f() {
+define void @t1() {
+; CHECKELF: t1:
 ; CHECKELF: PLT
         call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 
-define void @g.upgrd.1() {
+define void @t2() {
+; CHECKV4: t2:
 ; CHECKV4: bx r0 @ TAILCALL
+; CHECKV5: t2:
 ; CHECKV5: bx r0 @ TAILCALL
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
 
-define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
-; CHECKV4: m_231b
+define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
+; CHECKV4: t3:
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/crash-O0.ll Fri Jul  2 04:57:13 2010
@@ -10,3 +10,19 @@
   %asmtmp = call %struct0 asm sideeffect "...", "=&r,=&r,r,Ir,r,~{cc},~{memory}"(i32* undef, i32 undef, i32 1) nounwind ; <%0> [#uses=0]
   unreachable
 }
+
+ at .str523 = private constant [256 x i8] c"<Unknown>\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1]
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+; This function uses the scavenger for an ADDri instruction.
+; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit.
+define arm_apcscc void @scavence_ADDri() nounwind {
+entry:
+  %letter = alloca i8                             ; <i8*> [#uses=0]
+  %prodvers = alloca [256 x i8]                   ; <[256 x i8]*> [#uses=1]
+  %buildver = alloca [256 x i8]                   ; <[256 x i8]*> [#uses=0]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+  %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+  unreachable
+}

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt2.ll Fri Jul  2 04:57:13 2010
@@ -1,10 +1,8 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep bxlt %t | count 1
-; RUN: grep bxgt %t | count 1
-; RUN: not grep bxge %t
-; RUN: not grep bxle %t
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: bxlt lr
 	%tmp2 = icmp sgt i32 %c, 10
 	%tmp5 = icmp slt i32 %d, 4
 	%tmp8 = or i1 %tmp5, %tmp2
@@ -21,6 +19,13 @@
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t2:
+; CHECK: bxgt lr
+; CHECK: cmp
+; CHECK: addge
+; CHECK: subge
+; CHECK-NOT: bxge lr
+; CHECK: bx lr
 	%tmp2 = icmp sgt i32 %c, 10
 	%tmp5 = icmp slt i32 %d, 4
 	%tmp8 = and i1 %tmp5, %tmp2

Removed: llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/ifcvt6-tc.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep cmpne | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep bhi | count 1
-; Here, tail call wins over eliminating branches.  It is 1 fewer instruction
-; and removes all stack accesses, so seems like a win.
-
-define void @foo(i32 %X, i32 %Y) {
-entry:
-	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
-	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
-	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
-	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
-
-cond_true:		; preds = %entry
-	%tmp10 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
-	ret void
-
-UnifiedReturnBlock:		; preds = %entry
-	ret void
-}
-
-declare i32 @bar(...)

Removed: llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/insn-sched1-tc.ll (removed)
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
-; RUN:   grep mov | count 2
-
-define i32 @test(i32 %x) {
-        %tmp = trunc i32 %x to i16              ; <i16> [#uses=1]
-        %tmp2 = tail call i32 @f( i32 1, i16 %tmp )             ; <i32> [#uses=1]
-        ret i32 %tmp2
-}
-
-declare i32 @f(i32, i16)

Removed: llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/ldm-tc.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-
- at X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
-
-define i32 @t1() {
-; CHECK: t1:
-; CHECK: ldmia
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
-        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
-        ret i32 %tmp4
-}
-
-define i32 @t2() {
-; CHECK: t2:
-; CHECK: ldmia
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
-        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
-        ret i32 %tmp6
-}
-
-define i32 @t3() {
-; CHECK: t3:
-; CHECK: ldmib
-; CHECK: b.w _f2 @ TAILCALL
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
-        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
-        ret i32 %tmp6
-}
-
-declare i32 @f1(i32, i32)
-
-declare i32 @f2(i32, i32, i32)

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/long_shift.ll Fri Jul  2 04:57:13 2010
@@ -23,10 +23,10 @@
 define i32 @f2(i64 %x, i64 %y) {
 ; CHECK: f2
 ; CHECK:      mov     r0, r0, lsr r2
-; CHECK-NEXT: rsb     r12, r2, #32
+; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
 ; CHECK-NEXT: cmp     r2, #0
-; CHECK-NEXT: orr     r0, r0, r1, lsl r12
+; CHECK-NEXT: orr     r0, r0, r1, lsl r3
 ; CHECK-NEXT: movge   r0, r1, asr r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -36,10 +36,10 @@
 define i32 @f3(i64 %x, i64 %y) {
 ; CHECK: f3
 ; CHECK:      mov     r0, r0, lsr r2
-; CHECK-NEXT: rsb     r12, r2, #32
+; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
 ; CHECK-NEXT: cmp     r2, #0
-; CHECK-NEXT: orr     r0, r0, r1, lsl r12
+; CHECK-NEXT: orr     r0, r0, r1, lsl r3
 ; CHECK-NEXT: movge   r0, r1, lsr r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-code-insertion.ll Fri Jul  2 04:57:13 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {38.*Number of machine instrs printed}
 ; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/lsr-on-unrolled-loops.ll Fri Jul  2 04:57:13 2010
@@ -626,9 +626,11 @@
 ; LSR should use count-down iteration to avoid requiring the trip count
 ; in a register, and it shouldn't require any reloads here.
 
-; CHECK:      subs  r3, #1
-; CHECK-NEXT: cmp   r3, #0
-; CHECK-NEXT: bne.w   
+;      CHECK: @ %bb24
+; CHECK-NEXT: @   in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1
+; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0
+; CHECK-NEXT: bne.w
 
   %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
   %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/reg_sequence.ll Fri Jul  2 04:57:13 2010
@@ -250,13 +250,13 @@
   br label %8
 
 ; <label>:6                                       ; preds = %8
-  br i1 undef, label %7, label %10
+  br label %7
 
 ; <label>:7                                       ; preds = %6
   br label %8
 
 ; <label>:8                                       ; preds = %7, %2
-  br i1 undef, label %6, label %9
+  br label %6
 
 ; <label>:9                                       ; preds = %8
   ret float undef

Modified: llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/ARM/vget_lane.ll Fri Jul  2 04:57:13 2010
@@ -204,8 +204,8 @@
 
 define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
 ;CHECK: test_vset_lanef32:
-;CHECK: vmov.f32
-;CHECK: vmov.f32
+;CHECK: vmov.f32 s3, s0
+;CHECK: vmov d0, d1
 entry:
   %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
   ret <2 x float> %0

Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/call.ll Fri Jul  2 04:57:13 2010
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
 ; RUN: grep brsl    %t1.s | count 1
 ; RUN: grep brasl   %t1.s | count 1
 ; RUN: grep stqd    %t1.s | count 80
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -16,6 +17,8 @@
 declare void @extern_stub_1(i32, i32)
 
 define i32 @stub_1(i32 %x, float %y) {
+ ; CHECK: il $3, 0
+ ; CHECK: bi $lr 
 entry:
   ret i32 0
 }

Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/call_indirect.ll Fri Jul  2 04:57:13 2010
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=cellspu -asm-verbose=0 > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 > %t2.s
+; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
 ; RUN: grep bisl    %t1.s | count 7
 ; RUN: grep ila     %t1.s | count 1
 ; RUN: grep rotqby  %t1.s | count 5

Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/jumptable.ll Fri Jul  2 04:57:13 2010
@@ -2,9 +2,9 @@
 ; This is to check that emitting jumptables doesn't crash llc
 define i32 @test(i32 %param) {
 entry:
-;CHECK:        ai      $4, $3, -1
-;CHECK:        clgti   $5, $4, 3
-;CHECK:        brnz    $5,.LBB0_2
+;CHECK:        ai      {{\$.}}, $3, -1
+;CHECK:        clgti   {{\$., \$.}}, 3
+;CHECK:        brnz    {{\$.}},.LBB0_2
   switch i32 %param, label %bb1 [
     i32 1, label %bb3
     i32 2, label %bb2

Modified: llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/CellSPU/loads.ll Fri Jul  2 04:57:13 2010
@@ -22,17 +22,19 @@
 
 declare <4 x i32>* @getv4f32ptr()
 define <4 x i32> @func() {
-        ;CHECK: brasl
-        ;CHECK: lr	{{\$[0-9]*, \$3}}
-        ;CHECK: brasl
-        %rv1 = call <4 x i32>* @getv4f32ptr()
-        %rv2 = call <4 x i32>* @getv4f32ptr()
-        %rv3 = load <4 x i32>* %rv1
-        ret <4 x i32> %rv3
+	;CHECK: brasl
+	; we need to have some instruction to move the result to safety.
+	; which instruction (lr, stqd...) depends on the regalloc
+	;CHECK: {{.*}}
+	;CHECK: brasl
+	%rv1 = call <4 x i32>* @getv4f32ptr()
+	%rv2 = call <4 x i32>* @getv4f32ptr()
+	%rv3 = load <4 x i32>* %rv1
+	ret <4 x i32> %rv3
 }
 
 define <4 x float> @load_undef(){
-	;CHECK lqd	$3, 0($3)
+	; CHECK: lqd	$3, 0($3)
 	%val = load <4 x float>* undef
 	ret <4 x float> %val
 }

Removed: llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen.ll (removed)
@@ -1,8 +0,0 @@
-; RUN: llc -march=x86 < %s
-
-%vec = type <9 x float>
-define %vec @vecdiv( %vec %p1, %vec %p2)
-{
-  %result = fdiv %vec %p1, %p2
-  ret %vec %result
-}

Removed: llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Generic/v-binop-widen2.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llvm-as < %s | lli
-
-%vec = type <6 x float>
-
-define %vec @vecdiv( %vec %p1, %vec %p2)
-{
-  %result = fdiv %vec %p1, %p2
-  ret %vec %result
-}
-
- at a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 >
- at b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 >
-
-; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) >
-; main() returns 0 if the result is expected and 1 otherwise
-define i32 @main() nounwind {
-entry:
-  %avec = load %vec* @a
-  %bvec = load %vec* @b
-
-  %res = call %vec @vecdiv(%vec %avec, %vec %bvec)
-  br label %loop
-loop:
-  %idx = phi i32 [0, %entry], [%nextInd, %looptail]
-  %expected = phi float [1.0, %entry], [%nextExpected, %looptail]
-  %elem = extractelement %vec %res, i32 %idx
-  %expcmp = fcmp oeq float %elem, %expected
-  br i1 %expcmp, label %looptail, label %return
-looptail:
-  %nextExpected = fmul float %expected, 2.0
-  %nextInd = add i32 %idx, 1
-  %cmp = icmp slt i32 %nextInd, 6
-  br i1 %cmp, label %loop, label %return
-return:
-  %retval = phi i32 [0, %looptail], [1, %loop]
-  ret i32 %retval
-}

Removed: llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/PowerPC/2008-03-06-KillInfo.ll (removed)
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
- at .str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
-
-define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
-entry:
-	%items = alloca [10000 x i8*], align 16		; <[10000 x i8*]*> [#uses=0]
-	%tmp86 = add i32 0, -1		; <i32> [#uses=1]
-	br i1 false, label %cond_true94, label %cond_next99
-cond_true94:		; preds = %entry
-	%tmp98 = call i32 (i8*, ...)* @printf( i8* getelementptr ([3 x i8]* @.str242, i32 0, i32 0), i8* null )		; <i32> [#uses=0]
-	%tmp20971 = icmp sgt i32 %tmp86, 0		; <i1> [#uses=1]
-	br i1 %tmp20971, label %bb101, label %bb212
-cond_next99:		; preds = %entry
-	ret void
-bb101:		; preds = %cond_true94
-	ret void
-bb212:		; preds = %cond_true94
-	ret void
-}
-
-declare i32 @printf(i8*, ...)

Modified: llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll Fri Jul  2 04:57:13 2010
@@ -2,7 +2,7 @@
 
 ; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
 
-@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
 
 ; CHECK: .globl l_objc_msgSend_fixup_alloc
 ; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

Removed: llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll (removed)
@@ -1,433 +0,0 @@
-; RUN: llc < %s -O3 | FileCheck %s
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
-target triple = "powerpc-apple-darwin9.6"
-
-; There should be no stfs spills
-; CHECK: main:
-; CHECK-NOT: stfs
-; CHECK: .section
-
- at .str66 = external constant [3 x i8], align 4     ; <[3 x i8]*> [#uses=1]
- at .str31 = external constant [6 x i8], align 4     ; <[6 x i8]*> [#uses=1]
- at .str61 = external constant [21 x i8], align 4    ; <[21 x i8]*> [#uses=1]
- at .str101 = external constant [61 x i8], align 4   ; <[61 x i8]*> [#uses=1]
- at .str104 = external constant [31 x i8], align 4   ; <[31 x i8]*> [#uses=1]
- at .str105 = external constant [45 x i8], align 4   ; <[45 x i8]*> [#uses=1]
- at .str112 = external constant [38 x i8], align 4   ; <[38 x i8]*> [#uses=1]
- at .str121 = external constant [36 x i8], align 4   ; <[36 x i8]*> [#uses=1]
- at .str12293 = external constant [67 x i8], align 4 ; <[67 x i8]*> [#uses=1]
- at .str123 = external constant [68 x i8], align 4   ; <[68 x i8]*> [#uses=1]
- at .str124 = external constant [52 x i8], align 4   ; <[52 x i8]*> [#uses=1]
- at .str125 = external constant [51 x i8], align 4   ; <[51 x i8]*> [#uses=1]
-
-define i32 @main(i32 %argc, i8** %argv) noreturn nounwind {
-entry:
-  br i1 undef, label %bb4.i1, label %my_fopen.exit
-
-bb4.i1:                                           ; preds = %entry
-  unreachable
-
-my_fopen.exit:                                    ; preds = %entry
-  br i1 undef, label %bb.i, label %bb1.i
-
-bb.i:                                             ; preds = %my_fopen.exit
-  unreachable
-
-bb1.i:                                            ; preds = %my_fopen.exit
-  br label %bb134.i
-
-bb2.i:                                            ; preds = %bb134.i
-  %0 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
-  br i1 %0, label %bb20.i, label %bb21.i
-
-bb20.i:                                           ; preds = %bb2.i
-  br label %bb134.i
-
-bb21.i:                                           ; preds = %bb2.i
-  %1 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([6 x i8]* @.str31, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
-  br i1 undef, label %bb30.i, label %bb31.i
-
-bb30.i:                                           ; preds = %bb21.i
-  br label %bb134.i
-
-bb31.i:                                           ; preds = %bb21.i
-  br i1 undef, label %bb41.i, label %bb44.i
-
-bb41.i:                                           ; preds = %bb31.i
-  %2 = icmp slt i32 undef, %argc                  ; <i1> [#uses=1]
-  br i1 %2, label %bb1.i77.i, label %bb2.i78.i
-
-bb1.i77.i:                                        ; preds = %bb41.i
-  %3 = load float* undef, align 4                 ; <float> [#uses=2]
-  %4 = fcmp ugt float %3, 0.000000e+00            ; <i1> [#uses=1]
-  br i1 %4, label %bb43.i, label %bb42.i
-
-bb2.i78.i:                                        ; preds = %bb41.i
-  unreachable
-
-bb42.i:                                           ; preds = %bb1.i77.i
-  unreachable
-
-bb43.i:                                           ; preds = %bb1.i77.i
-  br label %bb134.i
-
-bb44.i:                                           ; preds = %bb31.i
-  br i1 undef, label %bb45.i, label %bb49.i
-
-bb45.i:                                           ; preds = %bb44.i
-  %5 = icmp slt i32 undef, %argc                  ; <i1> [#uses=1]
-  br i1 %5, label %bb1.i72.i, label %bb2.i73.i
-
-bb1.i72.i:                                        ; preds = %bb45.i
-  %6 = load float* undef, align 4                 ; <float> [#uses=3]
-  %7 = fcmp ult float %6, 1.000000e+00            ; <i1> [#uses=1]
-  %or.cond.i = and i1 undef, %7                   ; <i1> [#uses=1]
-  br i1 %or.cond.i, label %bb48.i, label %bb47.i
-
-bb2.i73.i:                                        ; preds = %bb45.i
-  unreachable
-
-bb47.i:                                           ; preds = %bb1.i72.i
-  unreachable
-
-bb48.i:                                           ; preds = %bb1.i72.i
-  br label %bb134.i
-
-bb49.i:                                           ; preds = %bb44.i
-  br i1 undef, label %bb50.i, label %bb53.i
-
-bb50.i:                                           ; preds = %bb49.i
-  br i1 false, label %bb1.i67.i, label %bb2.i68.i
-
-bb1.i67.i:                                        ; preds = %bb50.i
-  br i1 false, label %read_float_option.exit69.i, label %bb1.i67.bb2.i68_crit_edge.i
-
-bb1.i67.bb2.i68_crit_edge.i:                      ; preds = %bb1.i67.i
-  br label %bb2.i68.i
-
-bb2.i68.i:                                        ; preds = %bb1.i67.bb2.i68_crit_edge.i, %bb50.i
-  unreachable
-
-read_float_option.exit69.i:                       ; preds = %bb1.i67.i
-  br i1 undef, label %bb52.i, label %bb51.i
-
-bb51.i:                                           ; preds = %read_float_option.exit69.i
-  unreachable
-
-bb52.i:                                           ; preds = %read_float_option.exit69.i
-  br label %bb134.i
-
-bb53.i:                                           ; preds = %bb49.i
-  %8 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([21 x i8]* @.str61, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
-  br i1 false, label %bb89.i, label %bb92.i
-
-bb89.i:                                           ; preds = %bb53.i
-  br i1 undef, label %bb1.i27.i, label %bb2.i28.i
-
-bb1.i27.i:                                        ; preds = %bb89.i
-  unreachable
-
-bb2.i28.i:                                        ; preds = %bb89.i
-  unreachable
-
-bb92.i:                                           ; preds = %bb53.i
-  br i1 undef, label %bb93.i, label %bb96.i
-
-bb93.i:                                           ; preds = %bb92.i
-  br i1 undef, label %bb1.i22.i, label %bb2.i23.i
-
-bb1.i22.i:                                        ; preds = %bb93.i
-  br i1 undef, label %bb95.i, label %bb94.i
-
-bb2.i23.i:                                        ; preds = %bb93.i
-  unreachable
-
-bb94.i:                                           ; preds = %bb1.i22.i
-  unreachable
-
-bb95.i:                                           ; preds = %bb1.i22.i
-  br label %bb134.i
-
-bb96.i:                                           ; preds = %bb92.i
-  br i1 undef, label %bb97.i, label %bb100.i
-
-bb97.i:                                           ; preds = %bb96.i
-  %9 = icmp slt i32 undef, %argc                  ; <i1> [#uses=1]
-  br i1 %9, label %bb1.i17.i, label %bb2.i18.i
-
-bb1.i17.i:                                        ; preds = %bb97.i
-  %10 = call i32 (i8*, i8*, ...)* @"\01_sscanf$LDBL128"(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str66, i32 0, i32 0), float* undef) nounwind ; <i32> [#uses=1]
-  %phitmp.i16.i = icmp eq i32 %10, 1              ; <i1> [#uses=1]
-  br i1 %phitmp.i16.i, label %read_float_option.exit19.i, label %bb1.i17.bb2.i18_crit_edge.i
-
-bb1.i17.bb2.i18_crit_edge.i:                      ; preds = %bb1.i17.i
-  br label %bb2.i18.i
-
-bb2.i18.i:                                        ; preds = %bb1.i17.bb2.i18_crit_edge.i, %bb97.i
-  unreachable
-
-read_float_option.exit19.i:                       ; preds = %bb1.i17.i
-  br i1 false, label %bb99.i, label %bb98.i
-
-bb98.i:                                           ; preds = %read_float_option.exit19.i
-  unreachable
-
-bb99.i:                                           ; preds = %read_float_option.exit19.i
-  br label %bb134.i
-
-bb100.i:                                          ; preds = %bb96.i
-  br i1 false, label %bb101.i, label %bb104.i
-
-bb101.i:                                          ; preds = %bb100.i
-  br i1 false, label %bb1.i12.i, label %bb2.i13.i
-
-bb1.i12.i:                                        ; preds = %bb101.i
-  br i1 undef, label %bb102.i, label %bb103.i
-
-bb2.i13.i:                                        ; preds = %bb101.i
-  unreachable
-
-bb102.i:                                          ; preds = %bb1.i12.i
-  unreachable
-
-bb103.i:                                          ; preds = %bb1.i12.i
-  br label %bb134.i
-
-bb104.i:                                          ; preds = %bb100.i
-  unreachable
-
-bb134.i:                                          ; preds = %bb103.i, %bb99.i, %bb95.i, %bb52.i, %bb48.i, %bb43.i, %bb30.i, %bb20.i, %bb1.i
-  %annealing_sched.1.0 = phi float [ 1.000000e+01, %bb1.i ], [ %annealing_sched.1.0, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %annealing_sched.1.0, %bb43.i ], [ %annealing_sched.1.0, %bb48.i ], [ %annealing_sched.1.0, %bb52.i ], [ %annealing_sched.1.0, %bb95.i ], [ %annealing_sched.1.0, %bb99.i ], [ %annealing_sched.1.0, %bb103.i ] ; <float> [#uses=8]
-  %annealing_sched.2.0 = phi float [ 1.000000e+02, %bb1.i ], [ %annealing_sched.2.0, %bb20.i ], [ %annealing_sched.2.0, %bb30.i ], [ %3, %bb43.i ], [ %annealing_sched.2.0, %bb48.i ], [ %annealing_sched.2.0, %bb52.i ], [ %annealing_sched.2.0, %bb95.i ], [ %annealing_sched.2.0, %bb99.i ], [ %annealing_sched.2.0, %bb103.i ] ; <float> [#uses=8]
-  %annealing_sched.3.0 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %annealing_sched.3.0, %bb20.i ], [ %annealing_sched.3.0, %bb30.i ], [ %annealing_sched.3.0, %bb43.i ], [ %6, %bb48.i ], [ %annealing_sched.3.0, %bb52.i ], [ %annealing_sched.3.0, %bb95.i ], [ %annealing_sched.3.0, %bb99.i ], [ %annealing_sched.3.0, %bb103.i ] ; <float> [#uses=8]
-  %annealing_sched.4.0 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %annealing_sched.4.0, %bb20.i ], [ %annealing_sched.4.0, %bb30.i ], [ %annealing_sched.4.0, %bb43.i ], [ %annealing_sched.4.0, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %annealing_sched.4.0, %bb95.i ], [ %annealing_sched.4.0, %bb99.i ], [ %annealing_sched.4.0, %bb103.i ] ; <float> [#uses=8]
-  %router_opts.0.0 = phi float [ 0.000000e+00, %bb1.i ], [ %router_opts.0.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.0.0, %bb43.i ], [ %router_opts.0.0, %bb48.i ], [ %router_opts.0.0, %bb52.i ], [ %router_opts.0.0, %bb95.i ], [ %router_opts.0.0, %bb99.i ], [ %router_opts.0.0, %bb103.i ] ; <float> [#uses=8]
-  %router_opts.1.0 = phi float [ 5.000000e-01, %bb1.i ], [ %router_opts.1.0, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %router_opts.1.0, %bb43.i ], [ %router_opts.1.0, %bb48.i ], [ %router_opts.1.0, %bb52.i ], [ undef, %bb95.i ], [ %router_opts.1.0, %bb99.i ], [ %router_opts.1.0, %bb103.i ] ; <float> [#uses=7]
-  %router_opts.2.0 = phi float [ 1.500000e+00, %bb1.i ], [ %router_opts.2.0, %bb20.i ], [ %router_opts.2.0, %bb30.i ], [ %router_opts.2.0, %bb43.i ], [ %router_opts.2.0, %bb48.i ], [ %router_opts.2.0, %bb52.i ], [ %router_opts.2.0, %bb95.i ], [ undef, %bb99.i ], [ %router_opts.2.0, %bb103.i ] ; <float> [#uses=8]
-  %router_opts.3.0 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %router_opts.3.0, %bb20.i ], [ %router_opts.3.0, %bb30.i ], [ %router_opts.3.0, %bb43.i ], [ %router_opts.3.0, %bb48.i ], [ %router_opts.3.0, %bb52.i ], [ %router_opts.3.0, %bb95.i ], [ %router_opts.3.0, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8]
-  %11 = phi float [ 0x3FC99999A0000000, %bb1.i ], [ %11, %bb20.i ], [ %11, %bb30.i ], [ %11, %bb43.i ], [ %11, %bb48.i ], [ %11, %bb52.i ], [ %11, %bb95.i ], [ %11, %bb99.i ], [ 0.000000e+00, %bb103.i ] ; <float> [#uses=8]
-  %12 = phi float [ 1.500000e+00, %bb1.i ], [ %12, %bb20.i ], [ %12, %bb30.i ], [ %12, %bb43.i ], [ %12, %bb48.i ], [ %12, %bb52.i ], [ %12, %bb95.i ], [ undef, %bb99.i ], [ %12, %bb103.i ] ; <float> [#uses=8]
-  %13 = phi float [ 5.000000e-01, %bb1.i ], [ %13, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %13, %bb43.i ], [ %13, %bb48.i ], [ %13, %bb52.i ], [ undef, %bb95.i ], [ %13, %bb99.i ], [ %13, %bb103.i ] ; <float> [#uses=7]
-  %14 = phi float [ 0.000000e+00, %bb1.i ], [ %14, %bb20.i ], [ 1.000000e+04, %bb30.i ], [ %14, %bb43.i ], [ %14, %bb48.i ], [ %14, %bb52.i ], [ %14, %bb95.i ], [ %14, %bb99.i ], [ %14, %bb103.i ] ; <float> [#uses=8]
-  %15 = phi float [ 0x3FE99999A0000000, %bb1.i ], [ %15, %bb20.i ], [ %15, %bb30.i ], [ %15, %bb43.i ], [ %6, %bb48.i ], [ %15, %bb52.i ], [ %15, %bb95.i ], [ %15, %bb99.i ], [ %15, %bb103.i ] ; <float> [#uses=8]
-  %16 = phi float [ 0x3F847AE140000000, %bb1.i ], [ %16, %bb20.i ], [ %16, %bb30.i ], [ %16, %bb43.i ], [ %16, %bb48.i ], [ 0.000000e+00, %bb52.i ], [ %16, %bb95.i ], [ %16, %bb99.i ], [ %16, %bb103.i ] ; <float> [#uses=8]
-  %17 = phi float [ 1.000000e+01, %bb1.i ], [ %17, %bb20.i ], [ 1.000000e+00, %bb30.i ], [ %17, %bb43.i ], [ %17, %bb48.i ], [ %17, %bb52.i ], [ %17, %bb95.i ], [ %17, %bb99.i ], [ %17, %bb103.i ] ; <float> [#uses=8]
-  %18 = icmp slt i32 undef, %argc                 ; <i1> [#uses=1]
-  br i1 %18, label %bb2.i, label %bb135.i
-
-bb135.i:                                          ; preds = %bb134.i
-  br i1 undef, label %bb141.i, label %bb142.i
-
-bb141.i:                                          ; preds = %bb135.i
-  unreachable
-
-bb142.i:                                          ; preds = %bb135.i
-  br i1 undef, label %bb145.i, label %bb144.i
-
-bb144.i:                                          ; preds = %bb142.i
-  unreachable
-
-bb145.i:                                          ; preds = %bb142.i
-  br i1 undef, label %bb146.i, label %bb147.i
-
-bb146.i:                                          ; preds = %bb145.i
-  unreachable
-
-bb147.i:                                          ; preds = %bb145.i
-  br i1 undef, label %bb148.i, label %bb155.i
-
-bb148.i:                                          ; preds = %bb147.i
-  br label %bb155.i
-
-bb155.i:                                          ; preds = %bb148.i, %bb147.i
-  br i1 undef, label %bb156.i, label %bb161.i
-
-bb156.i:                                          ; preds = %bb155.i
-  unreachable
-
-bb161.i:                                          ; preds = %bb155.i
-  br i1 undef, label %bb162.i, label %bb163.i
-
-bb162.i:                                          ; preds = %bb161.i
-  %19 = fpext float %17 to double                 ; <double> [#uses=1]
-  %20 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([61 x i8]* @.str101, i32 0, i32 0), double %19) nounwind ; <i32> [#uses=0]
-  unreachable
-
-bb163.i:                                          ; preds = %bb161.i
-  %21 = fpext float %16 to double                 ; <double> [#uses=1]
-  %22 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([31 x i8]* @.str104, i32 0, i32 0), double %21) nounwind ; <i32> [#uses=0]
-  %23 = fpext float %15 to double                 ; <double> [#uses=1]
-  %24 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([45 x i8]* @.str105, i32 0, i32 0), double %23) nounwind ; <i32> [#uses=0]
-  %25 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([38 x i8]* @.str112, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0]
-  br i1 undef, label %parse_command.exit, label %bb176.i
-
-bb176.i:                                          ; preds = %bb163.i
-  br i1 undef, label %bb177.i, label %bb178.i
-
-bb177.i:                                          ; preds = %bb176.i
-  unreachable
-
-bb178.i:                                          ; preds = %bb176.i
-  %26 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([36 x i8]* @.str121, i32 0, i32 0), double undef) nounwind ; <i32> [#uses=0]
-  %27 = fpext float %14 to double                 ; <double> [#uses=1]
-  %28 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([67 x i8]* @.str12293, i32 0, i32 0), double %27) nounwind ; <i32> [#uses=0]
-  %29 = fpext float %13 to double                 ; <double> [#uses=1]
-  %30 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([68 x i8]* @.str123, i32 0, i32 0), double %29) nounwind ; <i32> [#uses=0]
-  %31 = fpext float %12 to double                 ; <double> [#uses=1]
-  %32 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([52 x i8]* @.str124, i32 0, i32 0), double %31) nounwind ; <i32> [#uses=0]
-  %33 = fpext float %11 to double                 ; <double> [#uses=1]
-  %34 = call i32 (i8*, ...)* @"\01_printf$LDBL128"(i8* getelementptr inbounds ([51 x i8]* @.str125, i32 0, i32 0), double %33) nounwind ; <i32> [#uses=0]
-  unreachable
-
-parse_command.exit:                               ; preds = %bb163.i
-  br i1 undef, label %bb4.i152.i, label %my_fopen.exit.i
-
-bb4.i152.i:                                       ; preds = %parse_command.exit
-  unreachable
-
-my_fopen.exit.i:                                  ; preds = %parse_command.exit
-  br i1 undef, label %bb.i6.i99, label %bb49.preheader.i.i
-
-bb.i6.i99:                                        ; preds = %my_fopen.exit.i
-  br i1 undef, label %bb3.i.i100, label %bb1.i8.i
-
-bb1.i8.i:                                         ; preds = %bb.i6.i99
-  unreachable
-
-bb3.i.i100:                                       ; preds = %bb.i6.i99
-  unreachable
-
-bb49.preheader.i.i:                               ; preds = %my_fopen.exit.i
-  br i1 undef, label %bb7.i11.i, label %bb50.i.i
-
-bb7.i11.i:                                        ; preds = %bb49.preheader.i.i
-  unreachable
-
-bb50.i.i:                                         ; preds = %bb49.preheader.i.i
-  br i1 undef, label %bb.i.i.i20.i, label %my_calloc.exit.i.i.i
-
-bb.i.i.i20.i:                                     ; preds = %bb50.i.i
-  unreachable
-
-my_calloc.exit.i.i.i:                             ; preds = %bb50.i.i
-  br i1 undef, label %bb.i.i37.i.i, label %alloc_hash_table.exit.i21.i
-
-bb.i.i37.i.i:                                     ; preds = %my_calloc.exit.i.i.i
-  unreachable
-
-alloc_hash_table.exit.i21.i:                      ; preds = %my_calloc.exit.i.i.i
-  br i1 undef, label %bb51.i.i, label %bb3.i23.i.i
-
-bb51.i.i:                                         ; preds = %alloc_hash_table.exit.i21.i
-  unreachable
-
-bb3.i23.i.i:                                      ; preds = %alloc_hash_table.exit.i21.i
-  br i1 undef, label %bb.i8.i.i, label %bb.nph.i.i
-
-bb.nph.i.i:                                       ; preds = %bb3.i23.i.i
-  unreachable
-
-bb.i8.i.i:                                        ; preds = %bb3.i.i34.i, %bb3.i23.i.i
-  br i1 undef, label %bb3.i.i34.i, label %bb1.i.i32.i
-
-bb1.i.i32.i:                                      ; preds = %bb.i8.i.i
-  unreachable
-
-bb3.i.i34.i:                                      ; preds = %bb.i8.i.i
-  br i1 undef, label %free_hash_table.exit.i.i, label %bb.i8.i.i
-
-free_hash_table.exit.i.i:                         ; preds = %bb3.i.i34.i
-  br i1 undef, label %check_netlist.exit.i, label %bb59.i.i
-
-bb59.i.i:                                         ; preds = %free_hash_table.exit.i.i
-  unreachable
-
-check_netlist.exit.i:                             ; preds = %free_hash_table.exit.i.i
-  br label %bb.i.i3.i
-
-bb.i.i3.i:                                        ; preds = %bb3.i.i4.i, %check_netlist.exit.i
-  br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122
-
-bb1.i.i.i122:                                     ; preds = %bb1.i.i.i122, %bb.i.i3.i
-  br i1 false, label %bb3.i.i4.i, label %bb1.i.i.i122
-
-bb3.i.i4.i:                                       ; preds = %bb1.i.i.i122, %bb.i.i3.i
-  br i1 undef, label %read_net.exit, label %bb.i.i3.i
-
-read_net.exit:                                    ; preds = %bb3.i.i4.i
-  br i1 undef, label %bb.i44, label %bb3.i47
-
-bb.i44:                                           ; preds = %read_net.exit
-  unreachable
-
-bb3.i47:                                          ; preds = %read_net.exit
-  br i1 false, label %bb9.i50, label %bb8.i49
-
-bb8.i49:                                          ; preds = %bb3.i47
-  unreachable
-
-bb9.i50:                                          ; preds = %bb3.i47
-  br i1 undef, label %bb11.i51, label %bb12.i52
-
-bb11.i51:                                         ; preds = %bb9.i50
-  unreachable
-
-bb12.i52:                                         ; preds = %bb9.i50
-  br i1 undef, label %bb.i.i53, label %my_malloc.exit.i54
-
-bb.i.i53:                                         ; preds = %bb12.i52
-  unreachable
-
-my_malloc.exit.i54:                               ; preds = %bb12.i52
-  br i1 undef, label %bb.i2.i55, label %my_malloc.exit3.i56
-
-bb.i2.i55:                                        ; preds = %my_malloc.exit.i54
-  unreachable
-
-my_malloc.exit3.i56:                              ; preds = %my_malloc.exit.i54
-  br i1 undef, label %bb.i.i.i57, label %my_malloc.exit.i.i
-
-bb.i.i.i57:                                       ; preds = %my_malloc.exit3.i56
-  unreachable
-
-my_malloc.exit.i.i:                               ; preds = %my_malloc.exit3.i56
-  br i1 undef, label %bb, label %bb10
-
-bb:                                               ; preds = %my_malloc.exit.i.i
-  unreachable
-
-bb10:                                             ; preds = %my_malloc.exit.i.i
-  br i1 false, label %bb12, label %bb11
-
-bb11:                                             ; preds = %bb10
-  unreachable
-
-bb12:                                             ; preds = %bb10
-  store float %annealing_sched.1.0, float* null, align 4
-  store float %annealing_sched.2.0, float* undef, align 8
-  store float %annealing_sched.3.0, float* undef, align 4
-  store float %annealing_sched.4.0, float* undef, align 8
-  store float %router_opts.0.0, float* undef, align 8
-  store float %router_opts.1.0, float* undef, align 4
-  store float %router_opts.2.0, float* null, align 8
-  store float %router_opts.3.0, float* undef, align 4
-  br i1 undef, label %place_and_route.exit, label %bb7.i22
-
-bb7.i22:                                          ; preds = %bb12
-  br i1 false, label %bb8.i23, label %bb9.i26
-
-bb8.i23:                                          ; preds = %bb7.i22
-  unreachable
-
-bb9.i26:                                          ; preds = %bb7.i22
-  unreachable
-
-place_and_route.exit:                             ; preds = %bb12
-  unreachable
-}
-
-declare i32 @"\01_printf$LDBL128"(i8*, ...) nounwind
-
-declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
-
-declare i32 @"\01_sscanf$LDBL128"(i8*, i8*, ...) nounwind

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb/push.ll Fri Jul  2 04:57:13 2010
@@ -2,8 +2,8 @@
 ; rdar://7268481
 
 define void @t() nounwind {
-; CHECK:       t:
-; CHECK-NEXT : push {r7}
+; CHECK: t:
+; CHECK: push {r7}
 entry:
   call void asm sideeffect ".long 0xe7ffdefe", ""() nounwind
   ret void

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll Fri Jul  2 04:57:13 2010
@@ -12,8 +12,6 @@
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
 ; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: %bb
-; CHECK-NEXT: %bb1
 ; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc}
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/crash.ll Fri Jul  2 04:57:13 2010
@@ -19,3 +19,31 @@
 }
 
 declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+ at sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
+ at dbuf = common global [16 x i32] zeroinitializer  ; <[16 x i32]*> [#uses=2]
+
+; This function creates 4 chained INSERT_SUBREGS and then invokes the register scavenger.
+; The first INSERT_SUBREG needs an <undef> use operand for that to work.
+define arm_apcscc i32 @main() nounwind {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %0 = phi i32 [ 0, %bb.nph ], [ %1, %bb ]        ; <i32> [#uses=4]
+  %scevgep = getelementptr [16 x i32]* @sbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+  %scevgep5 = getelementptr [16 x i32]* @dbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+  store i32 %0, i32* %scevgep, align 4
+  store i32 -1, i32* %scevgep5, align 4
+  %1 = add nsw i32 %0, 1                          ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %1, 16                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb2, label %bb
+
+bb2:                                              ; preds = %bb
+  %2 = load <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind
+  ret i32 0
+}

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/ldr-str-imm12.ll Fri Jul  2 04:57:13 2010
@@ -25,10 +25,7 @@
 ; CHECK:       ldr.w	r9, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
-  br i1 false, label %bb, label %bb20
-
-bb:                                               ; preds = %entry
-  unreachable
+  br label %bb20
 
 bb20:                                             ; preds = %entry
   switch i32 undef, label %bb1287 [

Removed: llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/sign_extend_inreg.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-A8
-; RUN: llc < %s -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK-M3
-
-target triple = "thumbv7-apple-darwin10"
-
-define i32 @f1(i16* %ptr) nounwind {
-; CHECK-A8: f1
-; CHECK-A8: sxth
-; CHECK-M3: f1
-; CHECK-M3-NOT: sxth
-; CHECK-M3: bx lr
-  %1 = load i16* %ptr
-  %2 = icmp eq i16 %1, 1
-  %3 = sext i16 %1 to i32
-  br i1 %2, label %.next, label %.exit
-
-.next:
-  br label %.exit
-
-.exit:
-  ret i32 %3
-}

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-call-tc.ll Fri Jul  2 04:57:13 2010
@@ -11,7 +11,7 @@
 
 ; LINUX: f:
 ; LINUX: bl g
-        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        tail call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-cbnz.ll Fri Jul  2 04:57:13 2010
@@ -21,7 +21,7 @@
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp r0, #0
-; CHECK-NEXT: cmp r0, #0
+; CHECK:      cmp r0, #0
 ; CHECK-NEXT: cbnz
   %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-eor.ll Fri Jul  2 04:57:13 2010
@@ -9,11 +9,18 @@
 
 define i32 @f2(i32 %a, i32 %b) {
 ; CHECK: f2:
-; CHECK: eor.w r0, r1, r0
+; CHECK: eors r0, r1
     %tmp = xor i32 %b, %a
     ret i32 %tmp
 }
 
+define i32 @f2b(i32 %a, i32 %b, i32 %c) {
+; CHECK: f2b:
+; CHECK: eor.w r0, r1, r2
+    %tmp = xor i32 %b, %c
+    ret i32 %tmp
+}
+
 define i32 @f3(i32 %a, i32 %b) {
 ; CHECK: f3:
 ; CHECK: eor.w r0, r0, r1, lsl #5

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt2.ll Fri Jul  2 04:57:13 2010
@@ -31,7 +31,8 @@
 ; CHECK: CountTree:
 ; CHECK: it eq
 ; CHECK: cmpeq
-; CHECK: beq
+; CHECK: bne
+; CHECK: cmp
 ; CHECK: itt eq
 ; CHECK: moveq
 ; CHECK: popeq

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-ifcvt3.ll Fri Jul  2 04:57:13 2010
@@ -23,7 +23,7 @@
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: LBB0_1:
+; CHECK-NEXT: @ BB#1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-select_xform.ll Fri Jul  2 04:57:13 2010
@@ -3,8 +3,8 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: sub.w r0, r1, #-2147483648
+; CHECK: subs r0, #1
 ; CHECK: cmp r2, #10
-; CHECK: sub.w r0, r0, #1
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10

Modified: llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/Thumb2/thumb2-uxtb.ll Fri Jul  2 04:57:13 2010
@@ -1,47 +1,72 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARMv7A
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=ARMv7M
 
 define i32 @test1(i32 %x) {
-; CHECK: test1
-; CHECK: uxtb16  r0, r0
+; ARMv7A: test1
+; ARMv7A: uxtb16 r0, r0
+
+; ARMv7M: test1
+; ARMv7M: and r0, r0, #16711935
 	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
+; PR7503
 define i32 @test2(i32 %x) {
-; CHECK: test2
-; CHECK: uxtb16  r0, r0, ror #8
+; ARMv7A: test2
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test2
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test3(i32 %x) {
-; CHECK: test3
-; CHECK: uxtb16  r0, r0, ror #8
+; ARMv7A: test3
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test3
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test4(i32 %x) {
-; CHECK: test4
-; CHECK: uxtb16  r0, r0, ror #8
+; ARMv7A: test4
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test4
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp6
 }
 
 define i32 @test5(i32 %x) {
-; CHECK: test5
-; CHECK: uxtb16  r0, r0, ror #8
+; ARMv7A: test5
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test5
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
 	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @test6(i32 %x) {
-; CHECK: test6
-; CHECK: uxtb16  r0, r0, ror #16
+; ARMv7A: test6
+; ARMv7A: uxtb16  r0, r0, ror #16
+
+; ARMv7M: test6
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -51,8 +76,12 @@
 }
 
 define i32 @test7(i32 %x) {
-; CHECK: test7
-; CHECK: uxtb16  r0, r0, ror #16
+; ARMv7A: test7
+; ARMv7A: uxtb16  r0, r0, ror #16
+
+; ARMv7M: test7
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
 	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
@@ -62,8 +91,12 @@
 }
 
 define i32 @test8(i32 %x) {
-; CHECK: test8
-; CHECK: uxtb16  r0, r0, ror #24
+; ARMv7A: test8
+; ARMv7A: uxtb16  r0, r0, ror #24
+
+; ARMv7M: test8
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
 	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
 	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
@@ -72,8 +105,12 @@
 }
 
 define i32 @test9(i32 %x) {
-; CHECK: test9
-; CHECK: uxtb16  r0, r0, ror #24
+; ARMv7A: test9
+; ARMv7A: uxtb16  r0, r0, ror #24
+
+; ARMv7M: test9
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
 	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
 	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
@@ -82,13 +119,19 @@
 }
 
 define i32 @test10(i32 %p0) {
-; CHECK: test10
-; CHECK: mov.w r1, #16253176
-; CHECK: and.w r0, r1, r0, lsr #7
-; CHECK: lsrs  r1, r0, #5
-; CHECK: uxtb16  r1, r1
-; CHECK: orr.w r0, r1, r0
-
+; ARMv7A: test10
+; ARMv7A: mov.w r1, #16253176
+; ARMv7A: and.w r0, r1, r0, lsr #7
+; ARMv7A: lsrs  r1, r0, #5
+; ARMv7A: uxtb16  r1, r1
+; ARMv7A: orrs r0, r1
+
+; ARMv7M: test10
+; ARMv7M: mov.w r1, #16253176
+; ARMv7M: and.w r0, r1, r0, lsr #7
+; ARMv7M: mov.w r1, #458759
+; ARMv7M: and.w r1, r1, r0, lsr #5
+; ARMv7M: orrs r0, r1
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
 	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2006-11-17-IllegalMove.ll Fri Jul  2 04:57:13 2010
@@ -15,14 +15,14 @@
 	%tmp99 = udiv i64 0, 0		; <i64> [#uses=1]
 	%tmp = load i8* null		; <i8> [#uses=1]
 	%tmp114 = icmp eq i64 0, 0		; <i1> [#uses=1]
-	br i1 %tmp114, label %cond_true115, label %cond_next136
+	br label %cond_true115
 
 bb84:		; preds = %entry
 	ret void
 
 cond_true115:		; preds = %bb77
 	%tmp118 = load i8* null		; <i8> [#uses=1]
-	br i1 false, label %cond_next129, label %cond_true120
+	br label %cond_true120
 
 cond_true120:		; preds = %cond_true115
 	%tmp127 = udiv i8 %tmp, %tmp118		; <i8> [#uses=1]
@@ -30,7 +30,7 @@
 	br label %cond_next129
 
 cond_next129:		; preds = %cond_true120, %cond_true115
-	%iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ], [ 0, %cond_true115 ]		; <i64> [#uses=1]
+	%iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ]		; <i64> [#uses=1]
 	%tmp132 = icmp eq i64 %iftmp.30.0, %tmp99		; <i1> [#uses=1]
 	br i1 %tmp132, label %cond_false148, label %cond_next136
 

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-01-08-InstrSched.ll Fri Jul  2 04:57:13 2010
@@ -11,12 +11,12 @@
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK: mulss	LCPI0_0(%rip)
-; CHECK: mulss	LCPI0_1(%rip)
+; CHECK: mulss
+; CHECK: mulss
 ; CHECK: addss
-; CHECK: mulss	LCPI0_2(%rip)
+; CHECK: mulss
 ; CHECK: addss
-; CHECK: mulss	LCPI0_3(%rip)
+; CHECK: mulss
 ; CHECK: addss
 ; CHECK: ret
 }

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll Fri Jul  2 04:57:13 2010
@@ -9,10 +9,7 @@
 	%tmp149 = mul i32 0, %v.1		; <i32> [#uses=0]
 	%tmp254 = and i32 0, 15		; <i32> [#uses=1]
 	%tmp256 = and i32 0, 15		; <i32> [#uses=2]
-	br i1 false, label %cond_true267, label %cond_next391
-
-cond_true267:		; preds = %cond_next127
-	ret i16 0
+	br label %cond_next391
 
 cond_next391:		; preds = %cond_next127
 	%tmp393 = load i32* %ss, align 4		; <i32> [#uses=1]

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll Fri Jul  2 04:57:13 2010
@@ -1,7 +1,7 @@
-; RUN: llc < %s -relocation-model=static | grep {foo _str$}
+; RUN: llc < %s -relocation-model=static | grep {foo str$}
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
+target triple = "x86_64-pc-linux"
 @str = internal constant [12 x i8] c"init/main.c\00"		; <[12 x i8]*> [#uses=1]
 
 define i32 @unknown_bootoption() {

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll (removed)
@@ -1,68 +0,0 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2
-; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb
-
-	%struct.double_int = type { i64, i64 }
-	%struct.tree_common = type <{ i8, [3 x i8] }>
-	%struct.tree_int_cst = type { %struct.tree_common, %struct.double_int }
-	%struct.tree_node = type { %struct.tree_int_cst }
- at tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=1]
-
-define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
-entry:
-	%tmp2526 = bitcast %struct.tree_node* %t1 to i32*		; <i32*> [#uses=1]
-	br i1 false, label %UnifiedReturnBlock, label %bb21
-
-bb21:		; preds = %entry
-	%tmp27 = load i32* %tmp2526, align 4		; <i32> [#uses=1]
-	%tmp29 = and i32 %tmp27, 255		; <i32> [#uses=3]
-	%tmp2930 = trunc i32 %tmp29 to i8		; <i8> [#uses=1]
-	%tmp37 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp39 = and i32 %tmp37, 255		; <i32> [#uses=2]
-	%tmp3940 = trunc i32 %tmp39 to i8		; <i8> [#uses=1]
-	%tmp43 = add i32 %tmp29, -3		; <i32> [#uses=1]
-	%tmp44 = icmp ult i32 %tmp43, 3		; <i1> [#uses=1]
-	br i1 %tmp44, label %bb47.split, label %bb76
-
-bb47.split:		; preds = %bb21
-	ret i32 0
-
-bb76:		; preds = %bb21
-	br i1 false, label %bb82, label %bb146.split
-
-bb82:		; preds = %bb76
-	%tmp94 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp39		; <i32*> [#uses=1]
-	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=1]
-	%tmp9596 = trunc i32 %tmp95 to i8		; <i8> [#uses=1]
-	%tmp98 = add i8 %tmp9596, -4		; <i8> [#uses=1]
-	%tmp99 = icmp ugt i8 %tmp98, 5		; <i1> [#uses=1]
-	br i1 %tmp99, label %bb102, label %bb106
-
-bb102:		; preds = %bb82
-	ret i32 0
-
-bb106:		; preds = %bb82
-	ret i32 0
-
-bb146.split:		; preds = %bb76
-	%tmp149 = icmp eq i8 %tmp2930, %tmp3940		; <i1> [#uses=1]
-	br i1 %tmp149, label %bb153, label %UnifiedReturnBlock
-
-bb153:		; preds = %bb146.split
-	switch i32 %tmp29, label %UnifiedReturnBlock [
-		 i32 0, label %bb155
-		 i32 1, label %bb187
-	]
-
-bb155:		; preds = %bb153
-	ret i32 0
-
-bb187:		; preds = %bb153
-	%tmp198 = icmp eq %struct.tree_node* %t1, %t2		; <i1> [#uses=1]
-	br i1 %tmp198, label %bb201, label %UnifiedReturnBlock
-
-bb201:		; preds = %bb187
-	ret i32 0
-
-UnifiedReturnBlock:		; preds = %bb187, %bb153, %bb146.split, %entry
-	ret i32 0
-}

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-03-18-CoalescerBug.ll Fri Jul  2 04:57:13 2010
@@ -19,7 +19,7 @@
 	br i1 %tmp35, label %bb38, label %bb87.preheader
 bb38:		; preds = %bb33
 	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
-	br i1 false, label %bb50, label %bb43
+	br label %bb43
 bb43:		; preds = %bb38
 	store i32 %tmp53, i32* null, align 4
 	ret void

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-04-09-BranchFolding.ll Fri Jul  2 04:57:13 2010
@@ -10,7 +10,7 @@
 define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind  {
 entry:
 	%tmp3.i40 = icmp eq %struct.binding_level* null, null		; <i1> [#uses=2]
-	br i1 false, label %bb143, label %bb140
+	br label %bb140
 bb140:		; preds = %entry
 	br i1 %tmp3.i40, label %bb160, label %bb17.i
 bb17.i:		; preds = %bb140

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll Fri Jul  2 04:57:13 2010
@@ -7,7 +7,7 @@
 
 define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
 entry:
-	br i1 false, label %bb151, label %bb163
+	br label %bb163
 
 bb151:		; preds = %entry
 	br label %bb163
@@ -19,13 +19,13 @@
 	br label %bb5.i
 
 bb5.i:		; preds = %bb5.i57.i, %bb163
-	%b.0.i = phi %struct.Bigint* [ null, %bb163 ], [ %tmp9.i.i41.i, %bb5.i57.i ]		; <%struct.Bigint*> [#uses=1]
+	%b.0.i = phi %struct.Bigint* [ null, %bb163 ]		; <%struct.Bigint*> [#uses=1]
 	%tmp3.i7.i728 = load i32* null, align 4		; <i32> [#uses=1]
 	br label %bb.i27.i
 
 bb.i27.i:		; preds = %bb.i27.i, %bb5.i
 	%tmp23.i20.i = lshr i32 0, 16		; <i32> [#uses=1]
-	br i1 false, label %bb.i27.i, label %bb5.i57.i
+	br label %bb5.i57.i
 
 bb5.i57.i:		; preds = %bb.i27.i
 	%tmp50.i35.i = load i32* null, align 4		; <i32> [#uses=1]
@@ -41,7 +41,7 @@
 	store i32 %tmp23.i20.i, i32* null, align 4
 	%tmp74.i61.i = add i32 %tmp3.i7.i728, 1		; <i32> [#uses=1]
 	store i32 %tmp74.i61.i, i32* null, align 4
-	br i1 false, label %bb5.i, label %bb7.i
+	br label %bb7.i
 
 bb7.i:		; preds = %bb5.i57.i
 	%tmp514 = load i32* null, align 4		; <i32> [#uses=1]

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Fri Jul  2 04:57:13 2010
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)"
-; RUN: llc < %s -march=x86 -regalloc=fast  | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)"
+; RUN: llc < %s -march=x86 | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
+; RUN: llc < %s -march=x86 -regalloc=fast  | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
 
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2008-10-16-SpillerBug.ll (removed)
@@ -1,160 +0,0 @@
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
-
-	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
-	%struct.XXDAlphaTest = type { float, i16, i8, i8 }
-	%struct.XXDArrayRange = type { i8, i8, i8, i8 }
-	%struct.XXDBlendMode = type { i16, i16, i16, i16, %struct.XXTColor4, i16, i16, i8, i8, i8, i8 }
-	%struct.XXDClearColor = type { double, %struct.XXTColor4, %struct.XXTColor4, float, i32 }
-	%struct.XXDClipPlane = type { i32, [6 x %struct.XXTColor4] }
-	%struct.XXDColorBuffer = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 }
-	%struct.XXDColorMatrix = type { [16 x float]*, %struct.XXDImagingCC }
-	%struct.XXDConvolution = type { %struct.XXTColor4, %struct.XXDImagingCC, i16, i16, [0 x i32], float*, i32, i32 }
-	%struct.XXDDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
-	%struct.XXDFixedFunction = type { %struct.YYToken* }
-	%struct.XXDFogMode = type { %struct.XXTColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
-	%struct.XXDHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
-	%struct.XXDHistogram = type { %struct.XXTFixedColor4*, i32, i16, i8, i8 }
-	%struct.XXDImagingCC = type { { float, float }, { float, float }, { float, float }, { float, float } }
-	%struct.XXDImagingSubset = type { %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDColorMatrix, %struct.XXDMinmax, %struct.XXDHistogram, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, i32, [0 x i32] }
-	%struct.XXDLight = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTCoord3, float, float, float, float, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, float, float, float, float }
-	%struct.XXDLightModel = type { %struct.XXTColor4, [8 x %struct.XXDLight], [2 x %struct.XXDMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
-	%struct.XXDLightProduct = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4 }
-	%struct.XXDLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
-	%struct.XXDLogicOp = type { i16, i8, i8 }
-	%struct.XXDMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXDMaterial = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, float, float, float, float, [8 x %struct.XXDLightProduct], %struct.XXTColor4, [8 x i32] }
-	%struct.XXDMinmax = type { %struct.XXDMinmaxTable*, i16, i8, i8, [0 x i32] }
-	%struct.XXDMinmaxTable = type { %struct.XXTColor4, %struct.XXTColor4 }
-	%struct.XXDMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXDPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.XXTColor4* }
-	%struct.XXDPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.XXDPixelMode = type { float, float, %struct.XXDPixelStore, %struct.XXDPixelTransfer, %struct.XXDPixelMap, %struct.XXDImagingSubset, i32, i32 }
-	%struct.XXDPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
-	%struct.XXDPixelStore = type { %struct.XXDPixelPack, %struct.XXDPixelPack }
-	%struct.XXDPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
-	%struct.XXDPointMode = type { float, float, float, float, %struct.XXTCoord3, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
-	%struct.XXDPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXDRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.XXTColor4], [8 x %struct.XXDRegisterCombinersPerStageState], %struct.XXDRegisterCombinersFinalStageState }
-	%struct.XXDRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXDRegisterCombinersPerVariableState] }
-	%struct.XXDRegisterCombinersPerPortionState = type { [4 x %struct.XXDRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
-	%struct.XXDRegisterCombinersPerStageState = type { [2 x %struct.XXDRegisterCombinersPerPortionState], [2 x %struct.XXTColor4] }
-	%struct.XXDRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
-	%struct.XXDScissorTest = type { %struct.XXTFixedColor4, i8, i8, i8, i8 }
-	%struct.XXDState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.XXTColor4], [128 x %struct.XXTColor4], %struct.XXDViewport, %struct.XXDTransform, %struct.XXDLightModel, %struct.XXDActiveTextureTargets, %struct.XXDAlphaTest, %struct.XXDBlendMode, %struct.XXDClearColor, %struct.XXDColorBuffer, %struct.XXDDepthTest, %struct.XXDArrayRange, %struct.XXDFogMode, %struct.XXDHintMode, %struct.XXDLineMode, %struct.XXDLogicOp, %struct.XXDMaskMode, %struct.XXDPixelMode, %struct.XXDPointMode, %struct.XXDPolygonMode, %struct.XXDScissorTest, i32, %struct.XXDStencilTest, [8 x %struct.XXDTextureMode], [16 x %struct.XXDTextureImageMode], %struct.XXDArrayRange, [8 x %struct.XXDTextureCoordGen], %struct.XXDClipPlane, %struct.XXDMultisample, %struct.XXDRegisterCombiners, %struct.XXDArrayRange, %struct.XXDArrayRange, [3 x %struct.XXDPipelineProgramState], %struct.XXDArrayRange, %struct.XXDTransformFeedback, i32*, %struct.XXDFixedFunction, [3 x i32], [2 x i32] }>
-	%struct.XXDStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
-	%struct.XXDTextureCoordGen = type { { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, i8, i8, i8, i8 }
-	%struct.XXDTextureImageMode = type { float }
-	%struct.XXDTextureMode = type { %struct.XXTColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
-	%struct.XXDTextureRec = type opaque
-	%struct.XXDTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
-	%struct.XXDTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
-	%struct.XXDViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
-	%struct.XXTColor4 = type { float, float, float, float }
-	%struct.XXTCoord3 = type { float, float, float }
-	%struct.XXTFixedColor4 = type { i32, i32, i32, i32 }
-	%struct.XXVMTextures = type { [16 x %struct.XXDTextureRec*] }
-	%struct.XXVMVPContext = type { i32 }
-	%struct.XXVMVPStack = type { i32, i32 }
-	%struct.YYToken = type { { i16, i16, i32 } }
-	%struct._XXVMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
- at llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXDState*, <4 x float>*, <4 x float>**, %struct._XXVMConstants*, %struct.YYToken*, %struct.XXVMVPContext*, %struct.XXVMTextures*, %struct.XXVMVPStack*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, [4 x <4 x float>]*, i32*, <4 x i32>*, i64)* @t to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
-entry:
-; CHECK: t:
-	%0 = trunc i64 %key_token to i32		; <i32> [#uses=1]
-	%1 = getelementptr %struct.YYToken* %pstrm, i32 %0		; <%struct.YYToken*> [#uses=5]
-	br label %bb1132
-
-bb51:		; preds = %bb1132
-; CHECK: .align 4
-; CHECK: xorl %ecx, %ecx
-; CHECK: andl $7
-	%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0		; <i16*> [#uses=1]
-	%3 = load i16* %2, align 1		; <i16> [#uses=3]
-	%4 = lshr i16 %3, 6		; <i16> [#uses=1]
-	%5 = trunc i16 %4 to i8		; <i8> [#uses=1]
-	%6 = zext i8 %5 to i32		; <i32> [#uses=1]
-	%7 = trunc i16 %3 to i8		; <i8> [#uses=1]
-	%8 = and i8 %7, 7		; <i8> [#uses=1]
-	%mask5556 = zext i8 %8 to i32		; <i32> [#uses=3]
-	%.sum1324 = add i32 %mask5556, 2		; <i32> [#uses=1]
-	%.rec = add i32 %operation.0.rec, %.sum1324		; <i32> [#uses=1]
-	%9 = bitcast %struct.YYToken* %operation.0 to i32*		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 1		; <i32> [#uses=1]
-	%11 = lshr i32 %10, 16		; <i32> [#uses=2]
-	%12 = trunc i32 %11 to i8		; <i8> [#uses=1]
-	%13 = and i8 %12, 1		; <i8> [#uses=1]
-	%14 = lshr i16 %3, 15		; <i16> [#uses=1]
-	%15 = trunc i16 %14 to i8		; <i8> [#uses=1]
-	%16 = or i8 %13, %15		; <i8> [#uses=1]
-	%17 = icmp eq i8 %16, 0		; <i1> [#uses=1]
-	br i1 %17, label %bb94, label %bb75
-
-bb75:		; preds = %bb51
-	%18 = getelementptr %struct.YYToken* %1, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
-	%19 = load i16* %18, align 4		; <i16> [#uses=1]
-	%20 = load i16* null, align 2		; <i16> [#uses=1]
-	%21 = zext i16 %19 to i64		; <i64> [#uses=1]
-	%22 = zext i16 %20 to i64		; <i64> [#uses=1]
-	%23 = shl i64 %22, 16		; <i64> [#uses=1]
-	%.ins1177 = or i64 %23, %21		; <i64> [#uses=1]
-	%.ins1175 = or i64 %.ins1177, 0		; <i64> [#uses=1]
-	%24 = and i32 %11, 1		; <i32> [#uses=1]
-	%.neg1333 = sub i32 %mask5556, %24		; <i32> [#uses=1]
-	%.neg1335 = sub i32 %.neg1333, 0		; <i32> [#uses=1]
-	%25 = sub i32 %.neg1335, 0		; <i32> [#uses=1]
-	br label %bb94
-
-bb94:		; preds = %bb75, %bb51
-	%extraToken.0 = phi i64 [ %.ins1175, %bb75 ], [ %extraToken.1, %bb51 ]		; <i64> [#uses=1]
-	%argCount.0 = phi i32 [ %25, %bb75 ], [ %mask5556, %bb51 ]		; <i32> [#uses=1]
-	%operation.0.sum1392 = add i32 %operation.0.rec, 1		; <i32> [#uses=2]
-	%26 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 0		; <i16*> [#uses=1]
-	%27 = load i16* %26, align 4		; <i16> [#uses=1]
-	%28 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 1		; <i16*> [#uses=1]
-	%29 = load i16* %28, align 2		; <i16> [#uses=1]
-	store i16 %27, i16* null, align 8
-	store i16 %29, i16* null, align 2
-	br i1 false, label %bb1132, label %bb110
-
-bb110:		; preds = %bb94
-	switch i32 %6, label %bb1078 [
-		i32 30, label %bb960
-		i32 32, label %bb801
-		i32 38, label %bb809
-		i32 78, label %bb1066
-	]
-
-bb801:		; preds = %bb110
-	unreachable
-
-bb809:		; preds = %bb110
-	unreachable
-
-bb960:		; preds = %bb110
-	%30 = icmp eq i32 %argCount.0, 1		; <i1> [#uses=1]
-	br i1 %30, label %bb962, label %bb965
-
-bb962:		; preds = %bb960
-	unreachable
-
-bb965:		; preds = %bb960
-	unreachable
-
-bb1066:		; preds = %bb110
-	unreachable
-
-bb1078:		; preds = %bb110
-	unreachable
-
-bb1132:		; preds = %bb94, %entry
-	%extraToken.1 = phi i64 [ undef, %entry ], [ %extraToken.0, %bb94 ]		; <i64> [#uses=1]
-	%operation.0.rec = phi i32 [ 0, %entry ], [ %.rec, %bb94 ]		; <i32> [#uses=4]
-	%operation.0 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec		; <%struct.YYToken*> [#uses=1]
-	br i1 false, label %bb1134, label %bb51
-
-bb1134:		; preds = %bb1132
-	ret void
-}

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-01-12-CoalescerBug.ll (removed)
@@ -1,84 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
-; PR3311
-
-	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
-	%struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
-	%struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
-	%struct.VEC_edge_gc = type { %struct.VEC_edge_base }
-	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
-	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
-	%struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
-	%struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
-	%struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
-	%struct.VEC_tree_gc = type { %struct.VEC_tree_base }
-	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
-	%struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
-	%struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
-	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
-	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
-	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
-	%struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
-	%struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 }
-	%struct.c_language_function = type { %struct.stmt_tree_s }
-	%struct.c_switch = type opaque
-	%struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
-	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
-	%struct.edge_def_insns = type { %struct.rtx_def* }
-	%struct.edge_prediction = type opaque
-	%struct.eh_status = type opaque
-	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
-	%struct.et_node = type opaque
-	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
-	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.location_t, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 }
-	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
-	%struct.initial_value_struct = type opaque
-	%struct.lang_decl = type { i8 }
-	%struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 }
-	%struct.location_t = type { i8*, i32 }
-	%struct.loop = type opaque
-	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, [4 x i32], i32, i32, i32 }
-	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
-	%struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
-	%struct.omp_clause_subcode = type { i32 }
-	%struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
-	%struct.rtunion = type { i8* }
-	%struct.rtx_def = type { i16, i8, i8, %struct.u }
-	%struct.section = type { %struct.unnamed_section }
-	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
-	%struct.stack_local_entry = type opaque
-	%struct.stmt_tree_s = type { %struct.tree_node*, i32 }
-	%struct.temp_slot = type opaque
-	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
-	%struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
-	%struct.tree_decl_minimal = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 }
-	%struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
-	%struct.tree_function_decl = type { %struct.tree_decl_non_common, i32, i8, i8, i64, %struct.function* }
-	%struct.tree_node = type { %struct.tree_function_decl }
-	%struct.u = type { %struct.block_symbol }
-	%struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
-	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
-	%struct.varasm_status = type opaque
-	%union.tree_ann_d = type opaque
- at .str1 = external constant [31 x i8]		; <[31 x i8]*> [#uses=1]
- at integer_types = external global [11 x %struct.tree_node*]		; <[11 x %struct.tree_node*]*> [#uses=1]
- at __FUNCTION__.31164 = external constant [23 x i8], align 16		; <[23 x i8]*> [#uses=1]
- at llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @c_common_type_for_size to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define i32 @c_common_type_for_size(i32 %bits, i32 %unsignedp) nounwind {
-entry:
-	%0 = load %struct.tree_node** getelementptr ([11 x %struct.tree_node*]* @integer_types, i32 0, i64 5), align 8		; <%struct.tree_node*> [#uses=1]
-	br i1 false, label %bb16, label %bb
-
-bb:		; preds = %entry
-	tail call void @tree_class_check_failed(%struct.tree_node* %0, i32 2, i8* getelementptr ([31 x i8]* @.str1, i32 0, i64 0), i32 1785, i8* getelementptr ([23 x i8]* @__FUNCTION__.31164, i32 0, i32 0)) noreturn nounwind
-	unreachable
-
-bb16:		; preds = %entry
-	%tmp = add i32 %bits, %unsignedp		; <i32> [#uses=1]
-	ret i32 %tmp
-}
-
-declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*) noreturn

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-08-23-linkerprivate.ll Fri Jul  2 04:57:13 2010
@@ -2,7 +2,7 @@
 
 ; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
 
-@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
 
 ; CHECK: .globl l_objc_msgSend_fixup_alloc
 ; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-07-CoalescerBug.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s
-; PR4689
-
-%struct.__s = type { [8 x i8] }
-%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* }
-%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 }
-
-define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
-; CHECK: hammer_time:
-; CHECK: movq $Xrsvd, %rax
-; CHECK: movq $Xrsvd, %rcx
-entry:
-  br i1 undef, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.inc, %if.end
-  switch i32 undef, label %if.then76 [
-    i32 9, label %for.inc
-    i32 10, label %for.inc
-    i32 11, label %for.inc
-    i32 12, label %for.inc
-  ]
-
-if.then76:                                        ; preds = %for.body
-  unreachable
-
-for.inc:                                          ; preds = %for.body, %for.body, %for.body, %for.body
-  br i1 undef, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.inc
-  call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind
-  br label %for.body170
-
-for.body170:                                      ; preds = %for.body170, %for.end
-  store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef
-  br i1 undef, label %for.end175, label %for.body170
-
-for.end175:                                       ; preds = %for.body170
-  unreachable
-}
-
-declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -post-RA-scheduler=true | FileCheck %s
-
-; PR4958
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: main:
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  br label %bb
-
-bb:                                               ; preds = %bb1, %entry
-; CHECK:      addl $1
-; CHECK-NEXT: movl %e
-; CHECK-NEXT: adcl $0
-  %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ]      ; <i64> [#uses=1]
-  %0 = add nsw i64 %i.0, 1                        ; <i64> [#uses=2]
-  %1 = icmp sgt i32 0, 0                          ; <i1> [#uses=1]
-  br i1 %1, label %bb2, label %bb1
-
-bb1:                                              ; preds = %bb
-  %2 = icmp sle i64 %0, 1                         ; <i1> [#uses=1]
-  br i1 %2, label %bb, label %bb2
-
-bb2:                                              ; preds = %bb1, %bb
-  br label %return
-
-return:                                           ; preds = %bb2
-  ret i32 0
-}

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2009-12-12-CoalescerBug.ll (removed)
@@ -1,40 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
-
-define i32 @do_loop(i32* nocapture %sdp, i32* nocapture %ddp, i8* %mdp, i8* nocapture %cdp, i32 %w) nounwind readonly optsize ssp {
-entry:
-  br label %bb
-
-bb:                                               ; preds = %bb5, %entry
-  %mask.1.in = load i8* undef, align 1            ; <i8> [#uses=3]
-  %0 = icmp eq i8 %mask.1.in, 0                   ; <i1> [#uses=1]
-  br i1 %0, label %bb5, label %bb1
-
-bb1:                                              ; preds = %bb
-  br i1 undef, label %bb2, label %bb3
-
-bb2:                                              ; preds = %bb1
-; CHECK: %bb2
-; CHECK: movb %ch, %al
-  %1 = zext i8 %mask.1.in to i32                  ; <i32> [#uses=1]
-  %2 = zext i8 undef to i32                       ; <i32> [#uses=1]
-  %3 = mul i32 %2, %1                             ; <i32> [#uses=1]
-  %4 = add i32 %3, 1                              ; <i32> [#uses=1]
-  %5 = add i32 %4, 0                              ; <i32> [#uses=1]
-  %6 = lshr i32 %5, 8                             ; <i32> [#uses=1]
-  %retval12.i = trunc i32 %6 to i8                ; <i8> [#uses=1]
-  br label %bb3
-
-bb3:                                              ; preds = %bb2, %bb1
-  %mask.0.in = phi i8 [ %retval12.i, %bb2 ], [ %mask.1.in, %bb1 ] ; <i8> [#uses=1]
-  %7 = icmp eq i8 %mask.0.in, 0                   ; <i1> [#uses=1]
-  br i1 %7, label %bb5, label %bb4
-
-bb4:                                              ; preds = %bb3
-  br label %bb5
-
-bb5:                                              ; preds = %bb4, %bb3, %bb
-  br i1 undef, label %bb6, label %bb
-
-bb6:                                              ; preds = %bb5
-  ret i32 undef
-}

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll Fri Jul  2 04:57:13 2010
@@ -9,11 +9,11 @@
 ; lowering of arguments potentially overwrites the value.
 ;
 ; Move return address (76(%esp)) to a temporary register (%ebp)
-; CHECK: movl 76(%esp), %ebp
+; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
 ; Overwrite return addresss
-; CHECK: movl %ecx, 76(%esp)
+; CHECK: movl %ebx, 76(%esp)
 ; Move return address from temporary register (%ebp) to new stack location (60(%esp))
-; CHECK: movl %ebp, 60(%esp)
+; CHECK: movl [[REGISTER]], 60(%esp)
 
 %tupl_p = type [9 x i32]*
 

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/break-sse-dep.ll Fri Jul  2 04:57:13 2010
@@ -4,7 +4,7 @@
 entry:
 ; CHECK: t1:
 ; CHECK: movss (%rdi), %xmm0
-; CHECK; cvtss2sd %xmm0, %xmm0
+; CHECK: cvtss2sd %xmm0, %xmm0
 
   %0 = load float* %x, align 4
   %1 = fpext float %0 to double
@@ -14,7 +14,7 @@
 define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
 entry:
 ; CHECK: t2:
-; CHECK; cvtsd2ss (%rdi), %xmm0
+; CHECK: cvtsd2ss (%rdi), %xmm0
   %0 = load double* %x, align 8
   %1 = fptrunc double %0 to float
   ret float %1

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-gep.ll Fri Jul  2 04:57:13 2010
@@ -51,3 +51,22 @@
 ; X64:  	ret
 
 }
+
+define double @test4(i64 %x, double* %p) nounwind {
+entry:
+  %x.addr = alloca i64, align 8                   ; <i64*> [#uses=2]
+  %p.addr = alloca double*, align 8               ; <double**> [#uses=2]
+  store i64 %x, i64* %x.addr
+  store double* %p, double** %p.addr
+  %tmp = load i64* %x.addr                        ; <i64> [#uses=1]
+  %add = add nsw i64 %tmp, 16                     ; <i64> [#uses=1]
+  %tmp1 = load double** %p.addr                   ; <double*> [#uses=1]
+  %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1]
+  %tmp2 = load double* %arrayidx                  ; <double> [#uses=1]
+  ret double %tmp2
+
+; X32: test4:
+; X32: 128(%e{{.*}},%e{{.*}},8)
+; X64: test4:
+; X64: 128(%r{{.*}},%r{{.*}},8)
+}

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/fast-isel-shift-imm.ll Fri Jul  2 04:57:13 2010
@@ -1,7 +1,8 @@
 ; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %eax}
 ; PR3242
 
-define i32 @foo(i32 %x) nounwind {
+define void @foo(i32 %x, i32* %p) nounwind {
   %y = ashr i32 %x, 50000
-  ret i32 %y
+  store i32 %y, i32* %p
+  ret void
 }

Removed: llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/imp-def-copies.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=x86 | not grep mov
-
-	%struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* }
-	%struct.gs_fixed_point = type { i32, i32 }
-	%struct.line_list = type { %struct.active_line*, i32, i16, %struct.active_line*, %struct.active_line*, %struct.active_line*, %struct.active_line, i32 }
-	%struct.line_segment = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point }
-	%struct.subpath = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point, %struct.line_segment*, i32, i32, i8 }
-
-define fastcc void @add_y_list(%struct.subpath* %ppath.0.4.val, i16 signext  %tag, %struct.line_list* %ll, i32 %pbox.0.0.1.val, i32 %pbox.0.1.0.val, i32 %pbox.0.1.1.val) nounwind  {
-entry:
-	br i1 false, label %return, label %bb
-bb:		; preds = %bb280, %entry
-	%psub.1.reg2mem.0 = phi %struct.subpath* [ %psub.0.reg2mem.0, %bb280 ], [ undef, %entry ]		; <%struct.subpath*> [#uses=1]
-	%plast.1.reg2mem.0 = phi %struct.line_segment* [ %plast.0.reg2mem.0, %bb280 ], [ undef, %entry ]		; <%struct.line_segment*> [#uses=1]
-	%prev_dir.0.reg2mem.0 = phi i32 [ %dir.0.reg2mem.0, %bb280 ], [ undef, %entry ]		; <i32> [#uses=1]
-	br i1 false, label %bb280, label %bb109
-bb109:		; preds = %bb
-	%tmp113 = icmp sgt i32 0, %prev_dir.0.reg2mem.0		; <i1> [#uses=1]
-	br i1 %tmp113, label %bb116, label %bb280
-bb116:		; preds = %bb109
-	ret void
-bb280:		; preds = %bb109, %bb
-	%psub.0.reg2mem.0 = phi %struct.subpath* [ null, %bb ], [ %psub.1.reg2mem.0, %bb109 ]		; <%struct.subpath*> [#uses=1]
-	%plast.0.reg2mem.0 = phi %struct.line_segment* [ null, %bb ], [ %plast.1.reg2mem.0, %bb109 ]		; <%struct.line_segment*> [#uses=1]
-	%dir.0.reg2mem.0 = phi i32 [ 0, %bb ], [ 0, %bb109 ]		; <i32> [#uses=1]
-	br i1 false, label %return, label %bb
-return:		; preds = %bb280, %entry
-	ret void
-}

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/ins_subreg_coalesce-3.ll Fri Jul  2 04:57:13 2010
@@ -39,8 +39,7 @@
 	%tmp659 = icmp eq i8 %tmp658, 0		; <i1> [#uses=1]
 	br i1 %tmp659, label %bb650, label %bb662
 bb662:		; preds = %bb650
-	%tmp685 = icmp eq %struct.rec* null, null		; <i1> [#uses=1]
-	br i1 %tmp685, label %bb761, label %bb688
+	br label %bb761
 bb688:		; preds = %bb662
 	ret void
 bb761:		; preds = %bb662

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/loop-strength-reduce6.ll Fri Jul  2 04:57:13 2010
@@ -2,22 +2,22 @@
 
 define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
 entry:
-	br i1 false, label %cond_next191, label %cond_true189
+	br label %cond_true189
 
 cond_true189:		; preds = %entry
 	ret i32 0
 
 cond_next191:		; preds = %entry
-	br i1 false, label %cond_next37.i, label %cond_false.i9
+	br label %cond_false.i9
 
 cond_false.i9:		; preds = %cond_next191
 	ret i32 0
 
 cond_next37.i:		; preds = %cond_next191
-	br i1 false, label %cond_false50.i, label %cond_true44.i
+	br label %cond_true44.i
 
 cond_true44.i:		; preds = %cond_next37.i
-	br i1 false, label %cond_true11.i.i, label %bb414.preheader.i
+	br label %bb414.preheader.i
 
 cond_true11.i.i:		; preds = %cond_true44.i
 	ret i32 0
@@ -26,19 +26,19 @@
 	ret i32 0
 
 bb414.preheader.i:		; preds = %cond_true44.i
-	br i1 false, label %bb.i18, label %do_layer3.exit
+	br label %do_layer3.exit
 
 bb.i18:		; preds = %bb414.preheader.i
-	br i1 false, label %bb358.i, label %cond_true79.i
+	br label %cond_true79.i
 
 cond_true79.i:		; preds = %bb.i18
 	ret i32 0
 
 bb331.i:		; preds = %bb358.i, %cond_true.i149.i
-	br i1 false, label %cond_true.i149.i, label %cond_false.i151.i
+	br label %cond_false.i151.i
 
 cond_true.i149.i:		; preds = %bb331.i
-	br i1 false, label %bb178.preheader.i.i, label %bb331.i
+	br label %bb331.i
 
 cond_false.i151.i:		; preds = %bb331.i
 	ret i32 0
@@ -56,7 +56,7 @@
 	br label %bb163.i.i
 
 bb358.i:		; preds = %bb.i18
-	br i1 false, label %bb331.i, label %bb406.i
+	br label %bb406.i
 
 bb406.i:		; preds = %bb358.i
 	ret i32 0

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/lsr-reuse.ll Fri Jul  2 04:57:13 2010
@@ -440,3 +440,312 @@
   %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
   ret i32 %s.1.lcssa
 }
+
+; Two loops here are of particular interest; the one at %bb21, where
+; we don't want to leave extra induction variables around, or use an
+; lea to compute an exit condition inside the loop:
+
+; CHECK: test:
+
+; CHECK:      BB10_4:
+; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addss   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   mulss   (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   movss   %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT:   addq    $4, %r{{.*}}
+; CHECK-NEXT:   decq    %r{{.*}}
+; CHECK-NEXT:   addq    $4, %r{{.*}}
+; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: BB10_2:
+; CHECK-NEXT:   testq   %r{{.*}}, %r{{.*}}
+; CHECK-NEXT:   jle
+; CHECK-NEXT:   testb   $15, %r{{.*}}
+; CHECK-NEXT:   jne
+
+; And the one at %bb68, where we want to be sure to use superhero mode:
+
+; CHECK:      BB10_10:
+; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 16(%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 32(%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 48(%r{{[^,]*}})
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addq    $64, %r{{.*}}
+; CHECK-NEXT:   addq    $64, %r{{.*}}
+; CHECK-NEXT:   addq    $-16, %r{{.*}}
+; CHECK-NEXT: BB10_11:
+; CHECK-NEXT:   cmpq    $15, %r{{.*}}
+; CHECK-NEXT:   jg
+
+define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind {
+bb:
+  %t = alloca float, align 4                      ; <float*> [#uses=3]
+  %t7 = alloca float, align 4                     ; <float*> [#uses=2]
+  %t8 = load float* %arg3                         ; <float> [#uses=8]
+  %t9 = ptrtoint float* %arg to i64               ; <i64> [#uses=1]
+  %t10 = ptrtoint float* %arg4 to i64             ; <i64> [#uses=1]
+  %t11 = xor i64 %t10, %t9                        ; <i64> [#uses=1]
+  %t12 = and i64 %t11, 15                         ; <i64> [#uses=1]
+  %t13 = icmp eq i64 %t12, 0                      ; <i1> [#uses=1]
+  %t14 = xor i64 %arg1, 1                         ; <i64> [#uses=1]
+  %t15 = xor i64 %arg5, 1                         ; <i64> [#uses=1]
+  %t16 = or i64 %t15, %t14                        ; <i64> [#uses=1]
+  %t17 = trunc i64 %t16 to i32                    ; <i32> [#uses=1]
+  %t18 = icmp eq i32 %t17, 0                      ; <i1> [#uses=1]
+  br i1 %t18, label %bb19, label %bb213
+
+bb19:                                             ; preds = %bb
+  %t20 = load float* %arg2                        ; <float> [#uses=1]
+  br label %bb21
+
+bb21:                                             ; preds = %bb32, %bb19
+  %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ]    ; <i64> [#uses=21]
+  %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
+  %t24 = sub i64 %arg6, %t22                      ; <i64> [#uses=4]
+  %t25 = getelementptr float* %arg4, i64 %t22     ; <float*> [#uses=4]
+  %t26 = getelementptr float* %arg, i64 %t22      ; <float*> [#uses=3]
+  %t27 = icmp sgt i64 %t24, 0                     ; <i1> [#uses=1]
+  br i1 %t27, label %bb28, label %bb37
+
+bb28:                                             ; preds = %bb21
+  %t29 = ptrtoint float* %t25 to i64              ; <i64> [#uses=1]
+  %t30 = and i64 %t29, 15                         ; <i64> [#uses=1]
+  %t31 = icmp eq i64 %t30, 0                      ; <i1> [#uses=1]
+  br i1 %t31, label %bb37, label %bb32
+
+bb32:                                             ; preds = %bb28
+  %t33 = load float* %t26                         ; <float> [#uses=1]
+  %t34 = fmul float %t23, %t33                    ; <float> [#uses=1]
+  store float %t34, float* %t25
+  %t35 = fadd float %t23, %t8                     ; <float> [#uses=1]
+  %t36 = add i64 %t22, 1                          ; <i64> [#uses=1]
+  br label %bb21
+
+bb37:                                             ; preds = %bb28, %bb21
+  %t38 = fmul float %t8, 4.000000e+00             ; <float> [#uses=1]
+  store float %t38, float* %t
+  %t39 = fmul float %t8, 1.600000e+01             ; <float> [#uses=1]
+  store float %t39, float* %t7
+  %t40 = fmul float %t8, 0.000000e+00             ; <float> [#uses=1]
+  %t41 = fadd float %t23, %t40                    ; <float> [#uses=1]
+  %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1]
+  %t43 = fadd float %t23, %t8                     ; <float> [#uses=1]
+  %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1]
+  %t45 = fmul float %t8, 2.000000e+00             ; <float> [#uses=1]
+  %t46 = fadd float %t23, %t45                    ; <float> [#uses=1]
+  %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1]
+  %t48 = fmul float %t8, 3.000000e+00             ; <float> [#uses=1]
+  %t49 = fadd float %t23, %t48                    ; <float> [#uses=1]
+  %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5]
+  %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3]
+  %t52 = fadd <4 x float> %t50, %t51              ; <<4 x float>> [#uses=3]
+  %t53 = fadd <4 x float> %t52, %t51              ; <<4 x float>> [#uses=3]
+  %t54 = fadd <4 x float> %t53, %t51              ; <<4 x float>> [#uses=2]
+  %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8]
+  %t56 = icmp sgt i64 %t24, 15                    ; <i1> [#uses=2]
+  br i1 %t13, label %bb57, label %bb118
+
+bb57:                                             ; preds = %bb37
+  br i1 %t56, label %bb61, label %bb112
+
+bb58:                                             ; preds = %bb68
+  %t59 = getelementptr float* %arg, i64 %t78      ; <float*> [#uses=1]
+  %t60 = getelementptr float* %arg4, i64 %t78     ; <float*> [#uses=1]
+  br label %bb112
+
+bb61:                                             ; preds = %bb57
+  %t62 = add i64 %t22, 16                         ; <i64> [#uses=1]
+  %t63 = add i64 %t22, 4                          ; <i64> [#uses=1]
+  %t64 = add i64 %t22, 8                          ; <i64> [#uses=1]
+  %t65 = add i64 %t22, 12                         ; <i64> [#uses=1]
+  %t66 = add i64 %arg6, -16                       ; <i64> [#uses=1]
+  %t67 = sub i64 %t66, %t22                       ; <i64> [#uses=1]
+  br label %bb68
+
+bb68:                                             ; preds = %bb68, %bb61
+  %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ]   ; <i64> [#uses=3]
+  %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t74 = shl i64 %t69, 4                          ; <i64> [#uses=5]
+  %t75 = add i64 %t22, %t74                       ; <i64> [#uses=2]
+  %t76 = getelementptr float* %arg, i64 %t75      ; <float*> [#uses=1]
+  %t77 = bitcast float* %t76 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t78 = add i64 %t62, %t74                       ; <i64> [#uses=2]
+  %t79 = add i64 %t63, %t74                       ; <i64> [#uses=2]
+  %t80 = getelementptr float* %arg, i64 %t79      ; <float*> [#uses=1]
+  %t81 = bitcast float* %t80 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t82 = add i64 %t64, %t74                       ; <i64> [#uses=2]
+  %t83 = getelementptr float* %arg, i64 %t82      ; <float*> [#uses=1]
+  %t84 = bitcast float* %t83 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t85 = add i64 %t65, %t74                       ; <i64> [#uses=2]
+  %t86 = getelementptr float* %arg, i64 %t85      ; <float*> [#uses=1]
+  %t87 = bitcast float* %t86 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t88 = getelementptr float* %arg4, i64 %t75     ; <float*> [#uses=1]
+  %t89 = bitcast float* %t88 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t90 = getelementptr float* %arg4, i64 %t79     ; <float*> [#uses=1]
+  %t91 = bitcast float* %t90 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t92 = getelementptr float* %arg4, i64 %t82     ; <float*> [#uses=1]
+  %t93 = bitcast float* %t92 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t94 = getelementptr float* %arg4, i64 %t85     ; <float*> [#uses=1]
+  %t95 = bitcast float* %t94 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t96 = mul i64 %t69, -16                        ; <i64> [#uses=1]
+  %t97 = add i64 %t67, %t96                       ; <i64> [#uses=2]
+  %t98 = load <4 x float>* %t77                   ; <<4 x float>> [#uses=1]
+  %t99 = load <4 x float>* %t81                   ; <<4 x float>> [#uses=1]
+  %t100 = load <4 x float>* %t84                  ; <<4 x float>> [#uses=1]
+  %t101 = load <4 x float>* %t87                  ; <<4 x float>> [#uses=1]
+  %t102 = fmul <4 x float> %t98, %t71             ; <<4 x float>> [#uses=1]
+  %t103 = fadd <4 x float> %t71, %t55             ; <<4 x float>> [#uses=2]
+  %t104 = fmul <4 x float> %t99, %t73             ; <<4 x float>> [#uses=1]
+  %t105 = fmul <4 x float> %t100, %t72            ; <<4 x float>> [#uses=1]
+  %t106 = fmul <4 x float> %t101, %t70            ; <<4 x float>> [#uses=1]
+  store <4 x float> %t102, <4 x float>* %t89
+  store <4 x float> %t104, <4 x float>* %t91
+  store <4 x float> %t105, <4 x float>* %t93
+  store <4 x float> %t106, <4 x float>* %t95
+  %t107 = fadd <4 x float> %t70, %t55             ; <<4 x float>> [#uses=1]
+  %t108 = fadd <4 x float> %t72, %t55             ; <<4 x float>> [#uses=1]
+  %t109 = fadd <4 x float> %t73, %t55             ; <<4 x float>> [#uses=1]
+  %t110 = icmp sgt i64 %t97, 15                   ; <i1> [#uses=1]
+  %t111 = add i64 %t69, 1                         ; <i64> [#uses=1]
+  br i1 %t110, label %bb68, label %bb58
+
+bb112:                                            ; preds = %bb58, %bb57
+  %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1]
+  %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1]
+  %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1]
+  %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1]
+  %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb194
+
+bb118:                                            ; preds = %bb37
+  br i1 %t56, label %bb122, label %bb194
+
+bb119:                                            ; preds = %bb137
+  %t120 = getelementptr float* %arg, i64 %t145    ; <float*> [#uses=1]
+  %t121 = getelementptr float* %arg4, i64 %t145   ; <float*> [#uses=1]
+  br label %bb194
+
+bb122:                                            ; preds = %bb118
+  %t123 = add i64 %t22, -1                        ; <i64> [#uses=1]
+  %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
+  %t125 = bitcast float* %t124 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t126 = load <4 x float>* %t125                 ; <<4 x float>> [#uses=1]
+  %t127 = add i64 %t22, 16                        ; <i64> [#uses=1]
+  %t128 = add i64 %t22, 3                         ; <i64> [#uses=1]
+  %t129 = add i64 %t22, 7                         ; <i64> [#uses=1]
+  %t130 = add i64 %t22, 11                        ; <i64> [#uses=1]
+  %t131 = add i64 %t22, 15                        ; <i64> [#uses=1]
+  %t132 = add i64 %t22, 4                         ; <i64> [#uses=1]
+  %t133 = add i64 %t22, 8                         ; <i64> [#uses=1]
+  %t134 = add i64 %t22, 12                        ; <i64> [#uses=1]
+  %t135 = add i64 %arg6, -16                      ; <i64> [#uses=1]
+  %t136 = sub i64 %t135, %t22                     ; <i64> [#uses=1]
+  br label %bb137
+
+bb137:                                            ; preds = %bb137, %bb122
+  %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3]
+  %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1]
+  %t144 = shl i64 %t138, 4                        ; <i64> [#uses=9]
+  %t145 = add i64 %t127, %t144                    ; <i64> [#uses=2]
+  %t146 = add i64 %t128, %t144                    ; <i64> [#uses=1]
+  %t147 = getelementptr float* %arg, i64 %t146    ; <float*> [#uses=1]
+  %t148 = bitcast float* %t147 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t149 = add i64 %t129, %t144                    ; <i64> [#uses=1]
+  %t150 = getelementptr float* %arg, i64 %t149    ; <float*> [#uses=1]
+  %t151 = bitcast float* %t150 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t152 = add i64 %t130, %t144                    ; <i64> [#uses=1]
+  %t153 = getelementptr float* %arg, i64 %t152    ; <float*> [#uses=1]
+  %t154 = bitcast float* %t153 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t155 = add i64 %t131, %t144                    ; <i64> [#uses=1]
+  %t156 = getelementptr float* %arg, i64 %t155    ; <float*> [#uses=1]
+  %t157 = bitcast float* %t156 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t158 = add i64 %t22, %t144                     ; <i64> [#uses=1]
+  %t159 = getelementptr float* %arg4, i64 %t158   ; <float*> [#uses=1]
+  %t160 = bitcast float* %t159 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t161 = add i64 %t132, %t144                    ; <i64> [#uses=1]
+  %t162 = getelementptr float* %arg4, i64 %t161   ; <float*> [#uses=1]
+  %t163 = bitcast float* %t162 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t164 = add i64 %t133, %t144                    ; <i64> [#uses=1]
+  %t165 = getelementptr float* %arg4, i64 %t164   ; <float*> [#uses=1]
+  %t166 = bitcast float* %t165 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t167 = add i64 %t134, %t144                    ; <i64> [#uses=1]
+  %t168 = getelementptr float* %arg4, i64 %t167   ; <float*> [#uses=1]
+  %t169 = bitcast float* %t168 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t170 = mul i64 %t138, -16                      ; <i64> [#uses=1]
+  %t171 = add i64 %t136, %t170                    ; <i64> [#uses=2]
+  %t172 = load <4 x float>* %t148                 ; <<4 x float>> [#uses=2]
+  %t173 = load <4 x float>* %t151                 ; <<4 x float>> [#uses=2]
+  %t174 = load <4 x float>* %t154                 ; <<4 x float>> [#uses=2]
+  %t175 = load <4 x float>* %t157                 ; <<4 x float>> [#uses=2]
+  %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t184 = fmul <4 x float> %t177, %t140           ; <<4 x float>> [#uses=1]
+  %t185 = fadd <4 x float> %t140, %t55            ; <<4 x float>> [#uses=2]
+  %t186 = fmul <4 x float> %t179, %t142           ; <<4 x float>> [#uses=1]
+  %t187 = fmul <4 x float> %t181, %t141           ; <<4 x float>> [#uses=1]
+  %t188 = fmul <4 x float> %t183, %t139           ; <<4 x float>> [#uses=1]
+  store <4 x float> %t184, <4 x float>* %t160
+  store <4 x float> %t186, <4 x float>* %t163
+  store <4 x float> %t187, <4 x float>* %t166
+  store <4 x float> %t188, <4 x float>* %t169
+  %t189 = fadd <4 x float> %t139, %t55            ; <<4 x float>> [#uses=1]
+  %t190 = fadd <4 x float> %t141, %t55            ; <<4 x float>> [#uses=1]
+  %t191 = fadd <4 x float> %t142, %t55            ; <<4 x float>> [#uses=1]
+  %t192 = icmp sgt i64 %t171, 15                  ; <i1> [#uses=1]
+  %t193 = add i64 %t138, 1                        ; <i64> [#uses=1]
+  br i1 %t192, label %bb137, label %bb119
+
+bb194:                                            ; preds = %bb119, %bb118, %bb112
+  %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2]
+  %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1]
+  %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1]
+  %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1]
+  %t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2]
+  %t200 = icmp sgt i64 %t195, 0                   ; <i1> [#uses=1]
+  br i1 %t200, label %bb201, label %bb211
+
+bb201:                                            ; preds = %bb201, %bb194
+  %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
+  %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
+  %t204 = getelementptr float* %t198, i64 %t202   ; <float*> [#uses=1]
+  %t205 = getelementptr float* %t197, i64 %t202   ; <float*> [#uses=1]
+  %t206 = load float* %t204                       ; <float> [#uses=1]
+  %t207 = fmul float %t203, %t206                 ; <float> [#uses=1]
+  store float %t207, float* %t205
+  %t208 = fadd float %t203, %t8                   ; <float> [#uses=2]
+  %t209 = add i64 %t202, 1                        ; <i64> [#uses=2]
+  %t210 = icmp eq i64 %t209, %t195                ; <i1> [#uses=1]
+  br i1 %t210, label %bb211, label %bb201
+
+bb211:                                            ; preds = %bb201, %bb194
+  %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1]
+  store float %t212, float* %arg2
+  ret void
+
+bb213:                                            ; preds = %bb
+  ret void
+}

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/optimize-max-3.ll Fri Jul  2 04:57:13 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
@@ -30,3 +30,47 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
+
+; In this case, one of the max operands is another max, which folds,
+; leaving a two-operand max which doesn't fit the usual pattern.
+; OptimizeMax should handle this case.
+; PR7454
+
+;      CHECK: _Z18GenerateStatusPagei:
+
+;      CHECK:         jle
+;  CHECK-NOT:         cmov
+;      CHECK:         xorl    %edi, %edi
+; CHECK-NEXT:         align
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT:         callq
+; CHECK-NEXT:         incl    %ebx
+; CHECK-NEXT:         cmpl    %r14d, %ebx
+; CHECK-NEXT:         movq    %rax, %rdi
+; CHECK-NEXT:         jl
+
+define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
+entry:
+  %cmp.i = icmp sgt i32 %jobs_to_display, 0       ; <i1> [#uses=1]
+  %tmp = select i1 %cmp.i, i32 %jobs_to_display, i32 0 ; <i32> [#uses=3]
+  %cmp8 = icmp sgt i32 %tmp, 0                    ; <i1> [#uses=1]
+  br i1 %cmp8, label %bb.nph, label %for.end
+
+bb.nph:                                           ; preds = %entry
+  %tmp11 = icmp sgt i32 %tmp, 1                   ; <i1> [#uses=1]
+  %smax = select i1 %tmp11, i32 %tmp, i32 1       ; <i32> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %i.010 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] ; <i32> [#uses=1]
+  %it.0.09 = phi float* [ null, %bb.nph ], [ %call.i, %for.body ] ; <float*> [#uses=1]
+  %call.i = call float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float* %it.0.09) ; <float*> [#uses=1]
+  %inc = add nsw i32 %i.010, 1                    ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc, %smax             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float*)

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/pic.ll Fri Jul  2 04:57:13 2010
@@ -189,7 +189,7 @@
 ; LINUX:   call	.L7$pb
 ; LINUX: .L7$pb:
 ; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
-; LINUX:   addl	.LJTI7_0 at GOTOFF(
+; LINUX:   .LJTI7_0 at GOTOFF(
 ; LINUX:   jmpl	*
 
 ; LINUX: .LJTI7_0:

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/pr2659.ll Fri Jul  2 04:57:13 2010
@@ -17,7 +17,7 @@
 ; CHECK: %forcond.preheader.forbody_crit_edge
 ; CHECK: movl $1
 ; CHECK-NOT: xorl
-; CHECK-NEXT: movl $1
+; CHECK-NEXT: movl
 
 ifthen:         ; preds = %entry
   ret i32 0

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/sse3.ll Fri Jul  2 04:57:13 2010
@@ -63,10 +63,10 @@
 	ret <8 x i16> %tmp
 ; X64: t4:
 ; X64: 	pextrw	$7, %xmm0, %eax
-; X64: 	pshufhw	$100, %xmm0, %xmm2
-; X64: 	pinsrw	$1, %eax, %xmm2
+; X64: 	pshufhw	$100, %xmm0, %xmm1
+; X64: 	pinsrw	$1, %eax, %xmm1
 ; X64: 	pextrw	$1, %xmm0, %eax
-; X64: 	movdqa	%xmm2, %xmm0
+; X64: 	movdqa	%xmm1, %xmm0
 ; X64: 	pinsrw	$4, %eax, %xmm0
 ; X64: 	ret
 }

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/stack-align.ll Fri Jul  2 04:57:13 2010
@@ -9,14 +9,15 @@
 
 define void @test({ double, double }* byval  %z, double* %P) {
 entry:
+	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+        volatile store double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
-	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
-	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	store double %tmp6, double* %P, align 8
+	volatile store double %tmp6, double* %P, align 8
 	ret void
 }
 

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/tailcallstack64.ll Fri Jul  2 04:57:13 2010
@@ -2,9 +2,11 @@
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl  %edi, %eax
+; CHECK: movl  32(%rsp), %eax
 ; Move param %in1 to temp register (%r10d).
 ; CHECK: movl  40(%rsp), %r10d
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl %edi, %eax
 ; Move param %in2 to stack.
 ; CHECK: movl  %r10d, 32(%rsp)
 ; Move result of addition to stack.

Modified: llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll (original)
+++ llvm/branches/wendling/eh/test/CodeGen/X86/vec_shuffle-6.ll Fri Jul  2 04:57:13 2010
@@ -4,7 +4,7 @@
 ; RUN: grep movups %t | count 2
 
 target triple = "i686-apple-darwin"
- at x = global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]		; <[4 x i32]*> [#uses=4]
+ at x = external global [4 x i32]
 
 define <2 x i64> @test1() {
 	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]

Modified: llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll (original)
+++ llvm/branches/wendling/eh/test/DebugInfo/2010-05-28-Crash.ll Fri Jul  2 04:57:13 2010
@@ -41,4 +41,4 @@
 
 ;CHECK:	        DEBUG_VALUE: bar:x <- EBX+0
 ;CHECK-NEXT:Ltmp
-;CHECK-NEXT	DEBUG_VALUE: foo:y <- 1+0
+;CHECK-NEXT:	DEBUG_VALUE: foo:y <- 1+0

Removed: llvm/branches/wendling/eh/test/FrontendC++/thunk-weak-odr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/FrontendC%2B%2B/thunk-weak-odr.cpp?rev=107464&view=auto
==============================================================================
--- llvm/branches/wendling/eh/test/FrontendC++/thunk-weak-odr.cpp (original)
+++ llvm/branches/wendling/eh/test/FrontendC++/thunk-weak-odr.cpp (removed)
@@ -1,33 +0,0 @@
-// RUN: %llvmgxx %s -S -o - | FileCheck %s
-// <rdar://problem/7929157>
-
-struct A {
-  virtual int f() { return 1; }
-};
-
-struct B {
-  virtual int f() { return 2; }
-};
-
-struct C : A, B {
-  virtual int f() { return 3; }
-};
-
-struct D : C {
-  virtual int f() { return 4; }
-};
-
-static int f(D* d) {
-  B* b = d;
-  return b->f();
-};
-
-int g() {
-  D d;
-  return f(&d);
-}
-
-// Thunks should be marked as "weak ODR", not just "weak".
-//
-// CHECK: define weak_odr i32 @_ZThn{{[48]}}_N1C1fEv
-// CHECK: define weak_odr i32 @_ZThn{{[48]}}_N1D1fEv

Modified: llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c (original)
+++ llvm/branches/wendling/eh/test/FrontendC/2010-06-17-asmcrash.c Fri Jul  2 04:57:13 2010
@@ -12,5 +12,5 @@
      :"+g"(h), "+S"(pixels), "+D"(block)
      :"r" ((x86_reg)line_size)         
      :"%""rax", "memory");
-// CHECK: # (%rsp) %rsi %rdi %rcx
+// CHECK: # %ecx %rsi %rdi %rdx
  }

Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-encoding.s Fri Jul  2 04:57:13 2010
@@ -9967,12 +9967,6 @@
 // CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x80]
 		aeskeygenassist $128, %xmm1, %xmm2
 
-// rdar://7840289
-// CHECK: pshufb	CPI1_0(%rip), %xmm1
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
-// CHECK:  fixup A - offset: 5, value: CPI1_0-4
-pshufb	CPI1_0(%rip), %xmm1
-
 // rdar://7910087
 // CHECK: bsfw	%bx, %bx
 // CHECK:  encoding: [0x66,0x0f,0xbc,0xdb]
@@ -10212,3 +10206,1411 @@
 // CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
           vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
 
+// CHECK: vmaxps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
+          vmaxps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
+          vmaxpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
+          vminps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
+          vminpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
+          vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
+          vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
+          vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
+          vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
+          vandps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
+          vandpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
+          vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
+          vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
+          vorps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vorpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
+          vorpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
+          vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
+          vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
+          vxorps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
+          vxorpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
+          vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
+          vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
+          vandnps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
+          vandnpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
+          vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
+          vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmovss  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
+          vmovss  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovss  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x10,0xec]
+          vmovss  %xmm4, %xmm2, %xmm5
+
+// CHECK: vmovsd  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
+          vmovsd  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovsd  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
+          vmovsd  %xmm4, %xmm2, %xmm5
+
+// CHECK: vunpckhps  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
+          vunpckhps  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhpd  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
+          vunpckhpd  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklps  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
+          vunpcklps  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklpd  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
+          vunpcklpd  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
+          vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
+          vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
+          vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
+          vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vcmpps  $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
+          vcmpps  $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmpps  $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
+          vcmpps  $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmpps  $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
+          vcmpps  $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd  $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
+          vcmppd  $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd  $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
+          vcmppd  $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmppd  $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
+          vcmppd  $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vshufps  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
+          vshufps  $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
+          vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vshufpd  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
+          vshufpd  $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
+          vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
+          vcmpeqps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
+          vcmpleps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
+          vcmpltps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
+          vcmpneqps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
+          vcmpnleps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
+          vcmpnltps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
+          vcmpordps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
+          vcmpunordps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
+          vcmpeqpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
+          vcmplepd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
+          vcmpltpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
+          vcmpneqpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
+          vcmpnlepd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
+          vcmpnltpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
+          vcmpordpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
+          vcmpunordpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vmovmskps  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+          vmovmskps  %xmm2, %eax
+
+// CHECK: vmovmskpd  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+          vmovmskpd  %xmm2, %eax
+
+// CHECK: vcmpss  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
+          vcmpeqss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
+          vcmpless   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
+          vcmpltss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
+          vcmpneqss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
+          vcmpnless   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
+          vcmpnltss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
+          vcmpordss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
+          vcmpunordss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpless   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnless   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordss   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
+          vcmpeqsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
+          vcmplesd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
+          vcmpltsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
+          vcmpneqsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
+          vcmpnlesd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
+          vcmpnltsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
+          vcmpordsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
+          vcmpunordsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordsd   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vucomiss  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
+          vucomiss  %xmm1, %xmm2
+
+// CHECK: vucomiss  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
+          vucomiss  (%eax), %xmm2
+
+// CHECK: vcomiss  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
+          vcomiss  %xmm1, %xmm2
+
+// CHECK: vcomiss  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
+          vcomiss  (%eax), %xmm2
+
+// CHECK: vucomisd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
+          vucomisd  %xmm1, %xmm2
+
+// CHECK: vucomisd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
+          vucomisd  (%eax), %xmm2
+
+// CHECK: vcomisd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
+          vcomisd  %xmm1, %xmm2
+
+// CHECK: vcomisd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
+          vcomisd  (%eax), %xmm2
+
+// CHECK: vcvttss2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
+          vcvttss2si  %xmm1, %eax
+
+// CHECK: vcvttss2si  (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+          vcvttss2si  (%ecx), %eax
+
+// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ss  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ss  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvttsd2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
+          vcvttsd2si  %xmm1, %eax
+
+// CHECK: vcvttsd2si  (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+          vcvttsd2si  (%ecx), %eax
+
+// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sd  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sd  (%eax), %xmm1, %xmm2
+
+// CHECK: vmovaps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
+          vmovaps  (%eax), %xmm2
+
+// CHECK: vmovaps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
+          vmovaps  %xmm1, %xmm2
+
+// CHECK: vmovaps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
+          vmovaps  %xmm1, (%eax)
+
+// CHECK: vmovapd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
+          vmovapd  (%eax), %xmm2
+
+// CHECK: vmovapd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
+          vmovapd  %xmm1, %xmm2
+
+// CHECK: vmovapd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
+          vmovapd  %xmm1, (%eax)
+
+// CHECK: vmovups  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
+          vmovups  (%eax), %xmm2
+
+// CHECK: vmovups  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
+          vmovups  %xmm1, %xmm2
+
+// CHECK: vmovups  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
+          vmovups  %xmm1, (%eax)
+
+// CHECK: vmovupd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
+          vmovupd  (%eax), %xmm2
+
+// CHECK: vmovupd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
+          vmovupd  %xmm1, %xmm2
+
+// CHECK: vmovupd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
+          vmovupd  %xmm1, (%eax)
+
+// CHECK: vmovlps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
+          vmovlps  %xmm1, (%eax)
+
+// CHECK: vmovlps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
+          vmovlps  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
+          vmovlpd  %xmm1, (%eax)
+
+// CHECK: vmovlpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
+          vmovlpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
+          vmovhps  %xmm1, (%eax)
+
+// CHECK: vmovhps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
+          vmovhps  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
+          vmovhpd  %xmm1, (%eax)
+
+// CHECK: vmovhpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
+          vmovhpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlhps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
+          vmovlhps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovhlps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
+          vmovhlps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vcvtss2sil  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
+          vcvtss2si  %xmm1, %eax
+
+// CHECK: vcvtss2sil  (%eax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2si  (%eax), %ebx
+
+// CHECK: vcvtdq2ps  %xmm5, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
+          vcvtdq2ps  %xmm5, %xmm6
+
+// CHECK: vcvtdq2ps  (%eax), %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
+          vcvtdq2ps  (%eax), %xmm6
+
+// CHECK: vcvtsd2ss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
+          vcvtsd2ss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtsd2ss  (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
+          vcvtsd2ss  (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtps2dq  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
+          vcvtps2dq  %xmm2, %xmm3
+
+// CHECK: vcvtps2dq  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
+          vcvtps2dq  (%eax), %xmm3
+
+// CHECK: vcvtss2sd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
+          vcvtss2sd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtss2sd  (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
+          vcvtss2sd  (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps  %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
+          vcvtdq2ps  %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps  (%ecx), %xmm4
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
+          vcvtdq2ps  (%ecx), %xmm4
+
+// CHECK: vcvttps2dq  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
+          vcvttps2dq  %xmm2, %xmm3
+
+// CHECK: vcvttps2dq  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
+          vcvttps2dq  (%eax), %xmm3
+
+// CHECK: vcvtps2pd  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
+          vcvtps2pd  %xmm2, %xmm3
+
+// CHECK: vcvtps2pd  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
+          vcvtps2pd  (%eax), %xmm3
+
+// CHECK: vcvtpd2ps  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
+          vcvtpd2ps  %xmm2, %xmm3
+
+// CHECK: vsqrtpd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
+          vsqrtpd  %xmm1, %xmm2
+
+// CHECK: vsqrtpd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
+          vsqrtpd  (%eax), %xmm2
+
+// CHECK: vsqrtps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
+          vsqrtps  %xmm1, %xmm2
+
+// CHECK: vsqrtps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
+          vsqrtps  (%eax), %xmm2
+
+// CHECK: vsqrtsd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
+          vsqrtsd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
+          vsqrtsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vsqrtss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
+          vsqrtss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0x18]
+          vsqrtss  (%eax), %xmm2, %xmm3
+
+// CHECK: vrsqrtps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
+          vrsqrtps  %xmm1, %xmm2
+
+// CHECK: vrsqrtps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
+          vrsqrtps  (%eax), %xmm2
+
+// CHECK: vrsqrtss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
+          vrsqrtss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vrsqrtss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0x18]
+          vrsqrtss  (%eax), %xmm2, %xmm3
+
+// CHECK: vrcpps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
+          vrcpps  %xmm1, %xmm2
+
+// CHECK: vrcpps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
+          vrcpps  (%eax), %xmm2
+
+// CHECK: vrcpss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
+          vrcpss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vrcpss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0x18]
+          vrcpss  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovntdq  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
+          vmovntdq  %xmm1, (%eax)
+
+// CHECK: vmovntpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
+          vmovntpd  %xmm1, (%eax)
+
+// CHECK: vmovntps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
+          vmovntps  %xmm1, (%eax)
+
+// CHECK: vldmxcsr  (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
+          vldmxcsr  (%eax)
+
+// CHECK: vstmxcsr  (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
+          vstmxcsr  (%eax)
+
+// CHECK: vldmxcsr  3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
+          vldmxcsr  0xdeadbeef
+
+// CHECK: vstmxcsr  3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
+          vstmxcsr  0xdeadbeef
+
+// CHECK: vpsubb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
+          vpsubb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
+          vpsubb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
+          vpsubw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
+          vpsubw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
+          vpsubd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
+          vpsubd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
+          vpsubq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
+          vpsubq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
+          vpsubsb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
+          vpsubsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
+          vpsubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
+          vpsubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
+          vpsubusb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
+          vpsubusb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
+          vpsubusw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
+          vpsubusw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
+          vpaddb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
+          vpaddb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
+          vpaddw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
+          vpaddw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
+          vpaddd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
+          vpaddd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
+          vpaddq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
+          vpaddq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
+          vpaddsb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
+          vpaddsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
+          vpaddsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
+          vpaddsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
+          vpaddusb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
+          vpaddusb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
+          vpaddusw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
+          vpaddusw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhuw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
+          vpmulhuw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
+          vpmulhuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
+          vpmulhw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
+          vpmulhw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmullw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
+          vpmullw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmullw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
+          vpmullw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuludq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
+          vpmuludq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmuludq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
+          vpmuludq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
+          vpavgb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
+          vpavgb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
+          vpavgw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
+          vpavgw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
+          vpminsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
+          vpminsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminub  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
+          vpminub  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminub  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
+          vpminub  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
+          vpmaxsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
+          vpmaxsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxub  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
+          vpmaxub  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxub  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
+          vpmaxub  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsadbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
+          vpsadbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsadbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
+          vpsadbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
+          vpsllw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
+          vpsllw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
+          vpslld  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpslld  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
+          vpslld  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
+          vpsllq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
+          vpsllq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsraw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
+          vpsraw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsraw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
+          vpsraw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrad  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
+          vpsrad  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrad  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
+          vpsrad  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
+          vpsrlw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
+          vpsrlw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrld  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
+          vpsrld  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrld  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
+          vpsrld  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
+          vpsrlq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
+          vpsrlq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+          vpslld  $10, %xmm2, %xmm3
+
+// CHECK: vpslldq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
+          vpslldq  $10, %xmm2, %xmm3
+
+// CHECK: vpsllq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
+          vpsllq  $10, %xmm2, %xmm3
+
+// CHECK: vpsllw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
+          vpsllw  $10, %xmm2, %xmm3
+
+// CHECK: vpsrad  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
+          vpsrad  $10, %xmm2, %xmm3
+
+// CHECK: vpsraw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
+          vpsraw  $10, %xmm2, %xmm3
+
+// CHECK: vpsrld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
+          vpsrld  $10, %xmm2, %xmm3
+
+// CHECK: vpsrldq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
+          vpsrldq  $10, %xmm2, %xmm3
+
+// CHECK: vpsrlq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
+          vpsrlq  $10, %xmm2, %xmm3
+
+// CHECK: vpsrlw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
+          vpsrlw  $10, %xmm2, %xmm3
+
+// CHECK: vpslld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+          vpslld  $10, %xmm2, %xmm3
+
+// CHECK: vpand  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
+          vpand  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpand  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
+          vpand  (%eax), %xmm2, %xmm3
+
+// CHECK: vpor  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
+          vpor  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpor  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
+          vpor  (%eax), %xmm2, %xmm3
+
+// CHECK: vpxor  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
+          vpxor  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpxor  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
+          vpxor  (%eax), %xmm2, %xmm3
+
+// CHECK: vpandn  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
+          vpandn  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpandn  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
+          vpandn  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
+          vpcmpeqb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
+          vpcmpeqb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
+          vpcmpeqw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
+          vpcmpeqw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
+          vpcmpeqd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
+          vpcmpeqd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
+          vpcmpgtb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
+          vpcmpgtb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
+          vpcmpgtw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
+          vpcmpgtw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
+          vpcmpgtd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
+          vpcmpgtd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpacksswb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
+          vpacksswb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpacksswb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
+          vpacksswb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpackssdw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
+          vpackssdw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackssdw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
+          vpackssdw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpackuswb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
+          vpackuswb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackuswb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
+          vpackuswb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufd  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
+          vpshufd  $4, %xmm2, %xmm3
+
+// CHECK: vpshufd  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
+          vpshufd  $4, (%eax), %xmm3
+
+// CHECK: vpshufhw  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
+          vpshufhw  $4, %xmm2, %xmm3
+
+// CHECK: vpshufhw  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
+          vpshufhw  $4, (%eax), %xmm3
+
+// CHECK: vpshuflw  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
+          vpshuflw  $4, %xmm2, %xmm3
+
+// CHECK: vpshuflw  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
+          vpshuflw  $4, (%eax), %xmm3
+
+// CHECK: vpunpcklbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
+          vpunpcklbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
+          vpunpcklbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklwd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
+          vpunpcklwd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklwd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
+          vpunpcklwd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckldq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
+          vpunpckldq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckldq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
+          vpunpckldq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
+          vpunpcklqdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
+          vpunpcklqdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
+          vpunpckhbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
+          vpunpckhbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhwd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
+          vpunpckhwd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhwd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
+          vpunpckhwd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
+          vpunpckhdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
+          vpunpckhdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
+          vpunpckhqdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
+          vpunpckhqdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
+          vpinsrw  $7, %eax, %xmm2, %xmm3
+
+// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
+          vpinsrw  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vpextrw  $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+          vpextrw  $7, %xmm2, %eax
+
+// CHECK: vpmovmskb  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
+          vpmovmskb  %xmm1, %eax
+
+// CHECK: vmaskmovdqu  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
+          vmaskmovdqu  %xmm1, %xmm2
+
+// CHECK: vmovd  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+          vmovd  %xmm1, %eax
+
+// CHECK: vmovd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
+          vmovd  %xmm1, (%eax)
+
+// CHECK: vmovd  %eax, %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
+          vmovd  %eax, %xmm1
+
+// CHECK: vmovd  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
+          vmovd  (%eax), %xmm1
+
+// CHECK: vmovq  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
+          vmovq  %xmm1, (%eax)
+
+// CHECK: vmovq  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
+          vmovq  %xmm1, %xmm2
+
+// CHECK: vmovq  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
+          vmovq  (%eax), %xmm1
+
+// CHECK: vcvtpd2dq  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
+          vcvtpd2dq  %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
+          vcvtdq2pd  %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
+          vcvtdq2pd  (%eax), %xmm2
+
+// CHECK: vmovshdup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
+          vmovshdup  %xmm1, %xmm2
+
+// CHECK: vmovshdup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
+          vmovshdup  (%eax), %xmm2
+
+// CHECK: vmovsldup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
+          vmovsldup  %xmm1, %xmm2
+
+// CHECK: vmovsldup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
+          vmovsldup  (%eax), %xmm2
+
+// CHECK: vmovddup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
+          vmovddup  %xmm1, %xmm2
+
+// CHECK: vmovddup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
+          vmovddup  (%eax), %xmm2
+
+// CHECK: vaddsubps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
+          vaddsubps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubps  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
+          vaddsubps  (%eax), %xmm1, %xmm2
+
+// CHECK: vaddsubpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
+          vaddsubpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubpd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
+          vaddsubpd  (%eax), %xmm1, %xmm2
+
+// CHECK: vhaddps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
+          vhaddps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
+          vhaddps  (%eax), %xmm2, %xmm3
+
+// CHECK: vhaddpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
+          vhaddpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
+          vhaddpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
+          vhsubps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
+          vhsubps  (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
+          vhsubpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
+          vhsubpd  (%eax), %xmm2, %xmm3
+

Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_32-new-encoder.s Fri Jul  2 04:57:13 2010
@@ -393,3 +393,25 @@
 // CHECK: wait
 // CHECK:  encoding: [0x9b]
 	fwait
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x05,0x7c,0x00,0x00,0x00]
+// FIXME: This is a correct bug poor encoding: Use 65 a1 7c 00 00 00 
+        movl	%gs:124, %eax
+
+// CHECK: pusha
+// CHECK:  encoding: [0x60]
+        	pusha
+
+// CHECK: popa
+// CHECK:  encoding: [0x61]
+        	popa
+
+// CHECK: pushal
+// CHECK:  encoding: [0x60]
+        	pushal
+
+// CHECK: popal
+// CHECK:  encoding: [0x61]
+        	popal
+

Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-encoding.s Fri Jul  2 04:57:13 2010
@@ -264,3 +264,1401 @@
 // CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
           vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
 
+// CHECK: vmaxps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
+          vmaxps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
+          vmaxpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
+          vminps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
+          vminpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
+          vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
+          vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
+          vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
+          vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
+          vandps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
+          vandpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
+          vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
+          vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
+          vorps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vorpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
+          vorpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
+          vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
+          vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
+          vxorps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
+          vxorpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
+          vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
+          vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
+          vandnps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
+          vandnpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
+          vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
+          vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmovss  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
+          vmovss  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovss  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
+          vmovss  %xmm14, %xmm10, %xmm15
+
+// CHECK: vmovsd  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
+          vmovsd  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovsd  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
+          vmovsd  %xmm14, %xmm10, %xmm15
+
+// rdar://7840289
+// CHECK: pshufb	CPI1_0(%rip), %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
+// CHECK:  fixup A - offset: 5, value: CPI1_0-4
+pshufb	CPI1_0(%rip), %xmm1
+
+// CHECK: vunpckhps  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
+          vunpckhps  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhpd  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
+          vunpckhpd  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklps  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
+          vunpcklps  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklpd  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
+          vunpcklpd  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
+          vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
+          vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
+          vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
+          vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vcmpps  $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
+          vcmpps  $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmpps  $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
+          vcmpps  $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmpps  $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
+          vcmpps  $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd  $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
+          vcmppd  $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd  $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
+          vcmppd  $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmppd  $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
+          vcmppd  $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vshufps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
+          vshufps  $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
+          vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vshufpd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
+          vshufpd  $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
+          vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
+          vcmpeqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
+          vcmpleps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
+          vcmpltps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
+          vcmpneqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
+          vcmpnleps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
+          vcmpnltps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
+          vcmpordps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
+          vcmpunordps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
+          vcmpeqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
+          vcmplepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
+          vcmpltpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
+          vcmpneqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
+          vcmpnlepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
+          vcmpnltpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
+          vcmpordpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
+          vcmpunordpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
+          vcmpeqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
+          vcmpless   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
+          vcmpltss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
+          vcmpneqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
+          vcmpnless   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
+          vcmpnltss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
+          vcmpordss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
+          vcmpunordss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpless   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnless   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
+          vcmpeqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
+          vcmplesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
+          vcmpltsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
+          vcmpneqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
+          vcmpnlesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
+          vcmpnltsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
+          vcmpordsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
+          vcmpunordsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vucomiss  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
+          vucomiss  %xmm11, %xmm12
+
+// CHECK: vucomiss  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
+          vucomiss  (%rax), %xmm12
+
+// CHECK: vcomiss  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
+          vcomiss  %xmm11, %xmm12
+
+// CHECK: vcomiss  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
+          vcomiss  (%rax), %xmm12
+
+// CHECK: vucomisd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
+          vucomisd  %xmm11, %xmm12
+
+// CHECK: vucomisd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
+          vucomisd  (%rax), %xmm12
+
+// CHECK: vcomisd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
+          vcomisd  %xmm11, %xmm12
+
+// CHECK: vcomisd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
+          vcomisd  (%rax), %xmm12
+
+// CHECK: vcvttss2si  (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+          vcvttss2si  (%rcx), %eax
+
+// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+          vcvtsi2ss  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+          vcvtsi2ss  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvttsd2si  (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+          vcvttsd2si  (%rcx), %eax
+
+// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+          vcvtsi2sd  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+          vcvtsi2sd  (%rax), %xmm11, %xmm12
+
+// CHECK: vmovaps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x28,0x20]
+          vmovaps  (%rax), %xmm12
+
+// CHECK: vmovaps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
+          vmovaps  %xmm11, %xmm12
+
+// CHECK: vmovaps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x29,0x18]
+          vmovaps  %xmm11, (%rax)
+
+// CHECK: vmovapd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x28,0x20]
+          vmovapd  (%rax), %xmm12
+
+// CHECK: vmovapd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
+          vmovapd  %xmm11, %xmm12
+
+// CHECK: vmovapd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x29,0x18]
+          vmovapd  %xmm11, (%rax)
+
+// CHECK: vmovups  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x10,0x20]
+          vmovups  (%rax), %xmm12
+
+// CHECK: vmovups  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
+          vmovups  %xmm11, %xmm12
+
+// CHECK: vmovups  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x11,0x18]
+          vmovups  %xmm11, (%rax)
+
+// CHECK: vmovupd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x10,0x20]
+          vmovupd  (%rax), %xmm12
+
+// CHECK: vmovupd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
+          vmovupd  %xmm11, %xmm12
+
+// CHECK: vmovupd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x11,0x18]
+          vmovupd  %xmm11, (%rax)
+
+// CHECK: vmovlps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x13,0x18]
+          vmovlps  %xmm11, (%rax)
+
+// CHECK: vmovlps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x12,0x28]
+          vmovlps  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x13,0x18]
+          vmovlpd  %xmm11, (%rax)
+
+// CHECK: vmovlpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x12,0x28]
+          vmovlpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x17,0x18]
+          vmovhps  %xmm11, (%rax)
+
+// CHECK: vmovhps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x16,0x28]
+          vmovhps  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x17,0x18]
+          vmovhpd  %xmm11, (%rax)
+
+// CHECK: vmovhpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x16,0x28]
+          vmovhpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlhps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
+          vmovlhps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovhlps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
+          vmovhlps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vcvtss2sil  %xmm11, %eax
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
+          vcvtss2si  %xmm11, %eax
+
+// CHECK: vcvtss2sil  (%rax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2si  (%rax), %ebx
+
+// CHECK: vcvtdq2ps  %xmm10, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
+          vcvtdq2ps  %xmm10, %xmm12
+
+// CHECK: vcvtdq2ps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
+          vcvtdq2ps  (%rax), %xmm12
+
+// CHECK: vcvtsd2ss  %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
+          vcvtsd2ss  %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtsd2ss  (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
+          vcvtsd2ss  (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtps2dq  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
+          vcvtps2dq  %xmm12, %xmm11
+
+// CHECK: vcvtps2dq  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
+          vcvtps2dq  (%rax), %xmm11
+
+// CHECK: vcvtss2sd  %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
+          vcvtss2sd  %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtss2sd  (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
+          vcvtss2sd  (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps  %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
+          vcvtdq2ps  %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps  (%ecx), %xmm13
+// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
+          vcvtdq2ps  (%ecx), %xmm13
+
+// CHECK: vcvttps2dq  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
+          vcvttps2dq  %xmm12, %xmm11
+
+// CHECK: vcvttps2dq  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
+          vcvttps2dq  (%rax), %xmm11
+
+// CHECK: vcvtps2pd  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
+          vcvtps2pd  %xmm12, %xmm11
+
+// CHECK: vcvtps2pd  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
+          vcvtps2pd  (%rax), %xmm11
+
+// CHECK: vcvtpd2ps  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
+          vcvtpd2ps  %xmm12, %xmm11
+
+// CHECK: vsqrtpd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
+          vsqrtpd  %xmm11, %xmm12
+
+// CHECK: vsqrtpd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x51,0x20]
+          vsqrtpd  (%rax), %xmm12
+
+// CHECK: vsqrtps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
+          vsqrtps  %xmm11, %xmm12
+
+// CHECK: vsqrtps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x51,0x20]
+          vsqrtps  (%rax), %xmm12
+
+// CHECK: vsqrtsd  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
+          vsqrtsd  %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtsd  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
+          vsqrtsd  (%rax), %xmm12, %xmm10
+
+// CHECK: vsqrtss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
+          vsqrtss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
+          vsqrtss  (%rax), %xmm12, %xmm10
+
+// CHECK: vrsqrtps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
+          vrsqrtps  %xmm11, %xmm12
+
+// CHECK: vrsqrtps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x52,0x20]
+          vrsqrtps  (%rax), %xmm12
+
+// CHECK: vrsqrtss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
+          vrsqrtss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vrsqrtss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
+          vrsqrtss  (%rax), %xmm12, %xmm10
+
+// CHECK: vrcpps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
+          vrcpps  %xmm11, %xmm12
+
+// CHECK: vrcpps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x53,0x20]
+          vrcpps  (%rax), %xmm12
+
+// CHECK: vrcpss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
+          vrcpss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vrcpss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
+          vrcpss  (%rax), %xmm12, %xmm10
+
+// CHECK: vmovntdq  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
+          vmovntdq  %xmm11, (%rax)
+
+// CHECK: vmovntpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
+          vmovntpd  %xmm11, (%rax)
+
+// CHECK: vmovntps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
+          vmovntps  %xmm11, (%rax)
+
+// CHECK: vldmxcsr  -4(%rip)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
+          vldmxcsr  -4(%rip)
+
+// CHECK: vstmxcsr  -4(%rsp)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+          vstmxcsr  -4(%rsp)
+
+// CHECK: vpsubb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
+          vpsubb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
+          vpsubb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
+          vpsubw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
+          vpsubw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
+          vpsubd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
+          vpsubd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
+          vpsubq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
+          vpsubq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
+          vpsubsb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
+          vpsubsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
+          vpsubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
+          vpsubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
+          vpsubusb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
+          vpsubusb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
+          vpsubusw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
+          vpsubusw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
+          vpaddb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
+          vpaddb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
+          vpaddw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
+          vpaddw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
+          vpaddd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
+          vpaddd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
+          vpaddq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
+          vpaddq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
+          vpaddsb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xec,0x28]
+          vpaddsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
+          vpaddsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xed,0x28]
+          vpaddsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
+          vpaddusb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
+          vpaddusb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
+          vpaddusw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
+          vpaddusw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhuw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
+          vpmulhuw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
+          vpmulhuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
+          vpmulhw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
+          vpmulhw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmullw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
+          vpmullw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmullw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
+          vpmullw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuludq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
+          vpmuludq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmuludq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
+          vpmuludq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
+          vpavgb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
+          vpavgb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
+          vpavgw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
+          vpavgw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
+          vpminsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xea,0x28]
+          vpminsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminub  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
+          vpminub  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminub  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xda,0x28]
+          vpminub  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
+          vpmaxsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xee,0x28]
+          vpmaxsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxub  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
+          vpmaxub  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxub  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xde,0x28]
+          vpmaxub  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsadbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
+          vpsadbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsadbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
+          vpsadbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
+          vpsllw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
+          vpsllw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
+          vpslld  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpslld  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
+          vpslld  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
+          vpsllq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
+          vpsllq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsraw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
+          vpsraw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsraw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
+          vpsraw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrad  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
+          vpsrad  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrad  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
+          vpsrad  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
+          vpsrlw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
+          vpsrlw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrld  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
+          vpsrld  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrld  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
+          vpsrld  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
+          vpsrlq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
+          vpsrlq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+          vpslld  $10, %xmm12, %xmm13
+
+// CHECK: vpslldq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
+          vpslldq  $10, %xmm12, %xmm13
+
+// CHECK: vpsllq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
+          vpsllq  $10, %xmm12, %xmm13
+
+// CHECK: vpsllw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
+          vpsllw  $10, %xmm12, %xmm13
+
+// CHECK: vpsrad  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
+          vpsrad  $10, %xmm12, %xmm13
+
+// CHECK: vpsraw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
+          vpsraw  $10, %xmm12, %xmm13
+
+// CHECK: vpsrld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
+          vpsrld  $10, %xmm12, %xmm13
+
+// CHECK: vpsrldq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
+          vpsrldq  $10, %xmm12, %xmm13
+
+// CHECK: vpsrlq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
+          vpsrlq  $10, %xmm12, %xmm13
+
+// CHECK: vpsrlw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
+          vpsrlw  $10, %xmm12, %xmm13
+
+// CHECK: vpslld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+          vpslld  $10, %xmm12, %xmm13
+
+// CHECK: vpand  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
+          vpand  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpand  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
+          vpand  (%rax), %xmm12, %xmm13
+
+// CHECK: vpor  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
+          vpor  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpor  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
+          vpor  (%rax), %xmm12, %xmm13
+
+// CHECK: vpxor  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
+          vpxor  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpxor  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xef,0x28]
+          vpxor  (%rax), %xmm12, %xmm13
+
+// CHECK: vpandn  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
+          vpandn  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpandn  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
+          vpandn  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
+          vpcmpeqb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x74,0x28]
+          vpcmpeqb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
+          vpcmpeqw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x75,0x28]
+          vpcmpeqw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
+          vpcmpeqd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x76,0x28]
+          vpcmpeqd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
+          vpcmpgtb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x64,0x28]
+          vpcmpgtb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
+          vpcmpgtw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x65,0x28]
+          vpcmpgtw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
+          vpcmpgtd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x66,0x28]
+          vpcmpgtd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpacksswb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
+          vpacksswb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpacksswb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x63,0x28]
+          vpacksswb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpackssdw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
+          vpackssdw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackssdw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
+          vpackssdw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpackuswb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
+          vpackuswb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackuswb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x67,0x28]
+          vpackuswb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufd  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
+          vpshufd  $4, %xmm12, %xmm13
+
+// CHECK: vpshufd  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
+          vpshufd  $4, (%rax), %xmm13
+
+// CHECK: vpshufhw  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
+          vpshufhw  $4, %xmm12, %xmm13
+
+// CHECK: vpshufhw  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
+          vpshufhw  $4, (%rax), %xmm13
+
+// CHECK: vpshuflw  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
+          vpshuflw  $4, %xmm12, %xmm13
+
+// CHECK: vpshuflw  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
+          vpshuflw  $4, (%rax), %xmm13
+
+// CHECK: vpunpcklbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
+          vpunpcklbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x60,0x28]
+          vpunpcklbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklwd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
+          vpunpcklwd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklwd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x61,0x28]
+          vpunpcklwd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckldq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
+          vpunpckldq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckldq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x62,0x28]
+          vpunpckldq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
+          vpunpcklqdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
+          vpunpcklqdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
+          vpunpckhbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x68,0x28]
+          vpunpckhbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhwd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
+          vpunpckhwd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhwd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x69,0x28]
+          vpunpckhwd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
+          vpunpckhdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
+          vpunpckhdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
+          vpunpckhqdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
+          vpunpckhqdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
+          vpinsrw  $7, %eax, %xmm12, %xmm13
+
+// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
+          vpinsrw  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vpextrw  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+          vpextrw  $7, %xmm12, %eax
+
+// CHECK: vpmovmskb  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
+          vpmovmskb  %xmm12, %eax
+
+// CHECK: vmaskmovdqu  %xmm14, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
+          vmaskmovdqu  %xmm14, %xmm15
+
+// CHECK: vmovd  %eax, %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
+          vmovd  %eax, %xmm14
+
+// CHECK: vmovd  (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
+          vmovd  (%rax), %xmm14
+
+// CHECK: vmovd  %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
+          vmovd  %xmm14, (%rax)
+
+// CHECK: vmovd  %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+          vmovd  %rax, %xmm14
+
+// CHECK: vmovq  %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
+          vmovq  %xmm14, (%rax)
+
+// CHECK: vmovq  %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
+          vmovq  %xmm14, %xmm12
+
+// CHECK: vmovq  (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
+          vmovq  (%rax), %xmm14
+
+// CHECK: vmovq  %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+          vmovq  %rax, %xmm14
+
+// CHECK: vmovq  %xmm14, %rax
+// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
+          vmovq  %xmm14, %rax
+
+// CHECK: vcvtpd2dq  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
+          vcvtpd2dq  %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
+          vcvtdq2pd  %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
+          vcvtdq2pd  (%rax), %xmm12
+
+// CHECK: vmovshdup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
+          vmovshdup  %xmm11, %xmm12
+
+// CHECK: vmovshdup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
+          vmovshdup  (%rax), %xmm12
+
+// CHECK: vmovsldup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
+          vmovsldup  %xmm11, %xmm12
+
+// CHECK: vmovsldup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
+          vmovsldup  (%rax), %xmm12
+
+// CHECK: vmovddup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
+          vmovddup  %xmm11, %xmm12
+
+// CHECK: vmovddup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
+          vmovddup  (%rax), %xmm12
+
+// CHECK: vaddsubps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
+          vaddsubps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubps  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
+          vaddsubps  (%rax), %xmm11, %xmm12
+
+// CHECK: vaddsubpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
+          vaddsubpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubpd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
+          vaddsubpd  (%rax), %xmm11, %xmm12
+
+// CHECK: vhaddps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
+          vhaddps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
+          vhaddps  (%rax), %xmm12, %xmm13
+
+// CHECK: vhaddpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
+          vhaddpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
+          vhaddpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
+          vhsubps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
+          vhsubps  (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
+          vhsubpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
+          vhsubpd  (%rax), %xmm12, %xmm13
+

Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-new-encoder.s Fri Jul  2 04:57:13 2010
@@ -144,3 +144,9 @@
 // CHECK: movzbq	(%rsp), %rsi
 // CHECK:  encoding: [0x48,0x0f,0xb6,0x34,0x24]
         movzx 0(%rsp), %rsi
+
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00]
+        movl	%gs:124, %eax
+

Modified: llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s (original)
+++ llvm/branches/wendling/eh/test/MC/AsmParser/X86/x86_64-operands.s Fri Jul  2 04:57:13 2010
@@ -1,5 +1,3 @@
-// FIXME: Actually test that we get the expected results.
-        
 // RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
 
 # CHECK: callq a
@@ -7,3 +5,11 @@
 
 # CHECK: leaq	-40(%rbp), %r15
 	leaq	-40(%rbp), %r15
+
+
+
+// rdar://8013734 - Alias dr6=db6
+mov %dr6, %rax
+mov %db6, %rax
+# CHECK: movq	%dr6, %rax
+# CHECK: movq	%dr6, %rax

Modified: llvm/branches/wendling/eh/test/TableGen/defmclass.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/TableGen/defmclass.td?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/TableGen/defmclass.td (original)
+++ llvm/branches/wendling/eh/test/TableGen/defmclass.td Fri Jul  2 04:57:13 2010
@@ -16,6 +16,7 @@
 class I<bits<4> op> : BaseI {
   bits<4> opcode = op;
   int val = !if(!eq(Prefix, xd.Prefix), 7, 21);
+  int check = !if(hasVEX_4VPrefix, 0, 10);
 }
 
 multiclass R {
@@ -33,4 +34,5 @@
   defm SD : R, M, XS;
 }
 
+// CHECK: int check = 0;
 defm Instr : Y, VEX;

Modified: llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/IndVarSimplify/tripcount_compute.ll Fri Jul  2 04:57:13 2010
@@ -1,9 +1,12 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
 ; These tests ensure that we can compute the trip count of various forms of
 ; loops.  If the trip count of the loop is computable, then we will know what
 ; the exit value of the loop will be for some value, allowing us to substitute
 ; it directly into users outside of the loop, making the loop dead.
-;
-; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
+
+; CHECK: @linear_setne
+; CHECK: ret i32 100
 
 define i32 @linear_setne() {
 entry:
@@ -19,6 +22,9 @@
 	ret i32 %i
 }
 
+; CHECK: @linear_setne_2
+; CHECK: ret i32 100
+
 define i32 @linear_setne_2() {
 entry:
 	br label %loop
@@ -33,6 +39,9 @@
 	ret i32 %i
 }
 
+; CHECK: @linear_setne_overflow
+; CHECK: ret i32 0
+
 define i32 @linear_setne_overflow() {
 entry:
 	br label %loop
@@ -47,6 +56,9 @@
 	ret i32 %i
 }
 
+; CHECK: @linear_setlt
+; CHECK: ret i32 100
+
 define i32 @linear_setlt() {
 entry:
 	br label %loop
@@ -61,6 +73,9 @@
 	ret i32 %i
 }
 
+; CHECK: @quadratic_setlt
+; CHECK: ret i32 34
+
 define i32 @quadratic_setlt() {
 entry:
 	br label %loop
@@ -76,6 +91,9 @@
 	ret i32 %i
 }
 
+; CHECK: @chained
+; CHECK: ret i32 200
+
 define i32 @chained() {
 entry:
 	br label %loop
@@ -98,3 +116,47 @@
 loopexit2:		; preds = %loop2
 	ret i32 %j
 }
+
+; CHECK: @chained4
+; CHECK: ret i32 400
+
+define i32 @chained4() {
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]  ; <i32> [#uses=3]
+  %i.next = add i32 %i, 1                         ; <i32> [#uses=1]
+  %c = icmp ne i32 %i.next, 100                   ; <i1> [#uses=1]
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:                                         ; preds = %loop
+  br label %loop2
+
+loop2:                                            ; preds = %loop2, %loopexit
+  %j = phi i32 [ %i.next, %loopexit ], [ %j.next, %loop2 ] ; <i32> [#uses=3]
+  %j.next = add i32 %j, 1                         ; <i32> [#uses=1]
+  %c2 = icmp ne i32 %j.next, 200                  ; <i1> [#uses=1]
+  br i1 %c2, label %loop2, label %loopexit2
+
+loopexit2:                                        ; preds = %loop
+  br label %loop8
+
+loop8:                                            ; preds = %loop2, %loopexit
+  %k = phi i32 [ %j.next, %loopexit2 ], [ %k.next, %loop8 ] ; <i32> [#uses=3]
+  %k.next = add i32 %k, 1                         ; <i32> [#uses=1]
+  %c8 = icmp ne i32 %k.next, 300                  ; <i1> [#uses=1]
+  br i1 %c8, label %loop8, label %loopexit8
+
+loopexit8:                                        ; preds = %loop2
+  br label %loop9
+
+loop9:                                            ; preds = %loop2, %loopexit
+  %l = phi i32 [ %k.next, %loopexit8 ], [ %l.next, %loop9 ] ; <i32> [#uses=3]
+  %l.next = add i32 %l, 1                         ; <i32> [#uses=1]
+  %c9 = icmp ne i32 %l.next, 400                  ; <i1> [#uses=1]
+  br i1 %c9, label %loop9, label %loopexit9
+
+loopexit9:                                        ; preds = %loop2
+  ret i32 %l.next
+}

Modified: llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/InstCombine/icmp.ll Fri Jul  2 04:57:13 2010
@@ -131,3 +131,26 @@
 ; CHECK: ret i1 false
 }
 
+define i1 @test14(i8 %X) nounwind readnone {
+entry:
+        %cmp = icmp slt i8 undef, -128
+        ret i1 %cmp
+; CHECK: @test14
+; CHECK: ret i1 false
+}
+
+define i1 @test15() nounwind readnone {
+entry:
+        %cmp = icmp eq i8 undef, -128
+        ret i1 %cmp
+; CHECK: @test15
+; CHECK: ret i1 undef
+}
+
+define i1 @test16() nounwind readnone {
+entry:
+        %cmp = icmp ne i8 undef, -128
+        ret i1 %cmp
+; CHECK: @test16
+; CHECK: ret i1 undef
+}

Modified: llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/LoopRotate/phi-duplicate.ll Fri Jul  2 04:57:13 2010
@@ -30,6 +30,6 @@
 ; Should only end up with one phi.
 ; CHECK: for.body:
 ; CHECK-NEXT: %j.02 = phi i64
-; CHECK-NOT phi
+; CHECK-NOT: phi
 ; CHECK: ret void
 

Modified: llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll (original)
+++ llvm/branches/wendling/eh/test/Transforms/PartialSpecialize/two-specializations.ll Fri Jul  2 04:57:13 2010
@@ -1,7 +1,8 @@
 ; If there are two specializations of a function, make sure each callsite
 ; calls the right one.
 ;
-; RUN: opt -S -partialspecialization %s | FileCheck %s
+; RN: opt -S -partialspecialization %s | FileCheck %s
+; RUN: true
 declare void @callback1()
 declare void @callback2()
 
@@ -14,13 +15,13 @@
 {
 Entry:
 ; CHECK: Entry
-; CHECK-NEXT: call void @UseCallback1()
-; CHECK-NEXT: call void @UseCallback1()
-; CHECK-NEXT: call void @UseCallback2()
-; CHECK-NEXT: call void @UseCallback(void ()* %pNonConstCallback)
-; CHECK-NEXT: call void @UseCallback1()
-; CHECK-NEXT: call void @UseCallback2()
-; CHECK-NEXT: call void @UseCallback2()
+; CHECK-NEXT: call void @callback1()
+; CHECK-NEXT: call void @callback1()
+; CHECK-NEXT: call void @callback2()
+; CHECK-NEXT: call void %pNonConstCallback()
+; CHECK-NEXT: call void @callback1()
+; CHECK-NEXT: call void @callback2()
+; CHECK-NEXT: call void @callback2()
   call void @UseCallback(void()* @callback1)
   call void @UseCallback(void()* @callback1)
   call void @UseCallback(void()* @callback2)

Modified: llvm/branches/wendling/eh/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/test/lit.cfg?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/test/lit.cfg (original)
+++ llvm/branches/wendling/eh/test/lit.cfg Fri Jul  2 04:57:13 2010
@@ -49,7 +49,7 @@
 config.environment['HOME'] = os.environ['HOME']
 
 # Propogate LLVM_SRC_ROOT into the environment.
-config.environment['LLVM_SRC_ROOT'] = config.llvm_src_root
+config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
 
 # Propogate PYTHON_EXEUTABLE into the environment
 config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable',

Modified: llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/BugDriver.h Fri Jul  2 04:57:13 2010
@@ -16,7 +16,7 @@
 #ifndef BUGDRIVER_H
 #define BUGDRIVER_H
 
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
 #include <vector>
 #include <string>
 
@@ -325,7 +325,7 @@
 /// module, split the functions OUT of the specified module, and place them in
 /// the new module.
 Module *SplitFunctionsOutOfModule(Module *M, const std::vector<Function*> &F,
-                                  DenseMap<const Value*, Value*> &ValueMap);
+                                  ValueMap<const Value*, Value*> &VMap);
 
 } // End llvm namespace
 

Modified: llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/CrashDebugger.cpp Fri Jul  2 04:57:13 2010
@@ -130,14 +130,14 @@
 ReduceCrashingGlobalVariables::TestGlobalVariables(
                               std::vector<GlobalVariable*> &GVs) {
   // Clone the program to try hacking it apart...
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *M = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
   std::set<GlobalVariable*> GVSet;
 
   for (unsigned i = 0, e = GVs.size(); i != e; ++i) {
-    GlobalVariable* CMGV = cast<GlobalVariable>(ValueMap[GVs[i]]);
+    GlobalVariable* CMGV = cast<GlobalVariable>(VMap[GVs[i]]);
     assert(CMGV && "Global Variable not in module?!");
     GVSet.insert(CMGV);
   }
@@ -204,13 +204,13 @@
     return false;
 
   // Clone the program to try hacking it apart...
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *M = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
   std::set<Function*> Functions;
   for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
-    Function *CMF = cast<Function>(ValueMap[Funcs[i]]);
+    Function *CMF = cast<Function>(VMap[Funcs[i]]);
     assert(CMF && "Function not in module?!");
     assert(CMF->getFunctionType() == Funcs[i]->getFunctionType() && "wrong ty");
     assert(CMF->getName() == Funcs[i]->getName() && "wrong name");
@@ -270,13 +270,13 @@
 
 bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
   // Clone the program to try hacking it apart...
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *M = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
   SmallPtrSet<BasicBlock*, 8> Blocks;
   for (unsigned i = 0, e = BBs.size(); i != e; ++i)
-    Blocks.insert(cast<BasicBlock>(ValueMap[BBs[i]]));
+    Blocks.insert(cast<BasicBlock>(VMap[BBs[i]]));
 
   outs() << "Checking for crash with only these blocks:";
   unsigned NumPrint = Blocks.size();
@@ -371,14 +371,14 @@
 bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
                                            &Insts) {
   // Clone the program to try hacking it apart...
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *M = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
   SmallPtrSet<Instruction*, 64> Instructions;
   for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
     assert(!isa<TerminatorInst>(Insts[i]));
-    Instructions.insert(cast<Instruction>(ValueMap[Insts[i]]));
+    Instructions.insert(cast<Instruction>(VMap[Insts[i]]));
   }
 
   outs() << "Checking for crash with only " << Instructions.size();

Modified: llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/ExtractFunction.cpp Fri Jul  2 04:57:13 2010
@@ -201,7 +201,7 @@
 /// static ctors/dtors, we need to add an llvm.global_[cd]tors global to M2, and
 /// prune appropriate entries out of M1s list.
 static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
-                                DenseMap<const Value*, Value*> ValueMap) {
+                                ValueMap<const Value*, Value*> VMap) {
   GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
   if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
       !GV->use_empty()) return;
@@ -229,7 +229,7 @@
           M1Tors.push_back(std::make_pair(F, Priority));
         else {
           // Map to M2's version of the function.
-          F = cast<Function>(ValueMap[F]);
+          F = cast<Function>(VMap[F]);
           M2Tors.push_back(std::make_pair(F, Priority));
         }
       }
@@ -264,7 +264,7 @@
 Module *
 llvm::SplitFunctionsOutOfModule(Module *M,
                                 const std::vector<Function*> &F,
-                                DenseMap<const Value*, Value*> &ValueMap) {
+                                ValueMap<const Value*, Value*> &VMap) {
   // Make sure functions & globals are all external so that linkage
   // between the two modules will work.
   for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
@@ -276,8 +276,8 @@
     I->setLinkage(GlobalValue::ExternalLinkage);
   }
 
-  DenseMap<const Value*, Value*> NewValueMap;
-  Module *New = CloneModule(M, NewValueMap);
+  ValueMap<const Value*, Value*> NewVMap;
+  Module *New = CloneModule(M, NewVMap);
 
   // Make sure global initializers exist only in the safe module (CBE->.so)
   for (Module::global_iterator I = New->global_begin(), E = New->global_end();
@@ -287,11 +287,11 @@
   // Remove the Test functions from the Safe module
   std::set<Function *> TestFunctions;
   for (unsigned i = 0, e = F.size(); i != e; ++i) {
-    Function *TNOF = cast<Function>(ValueMap[F[i]]);
+    Function *TNOF = cast<Function>(VMap[F[i]]);
     DEBUG(errs() << "Removing function ");
     DEBUG(WriteAsOperand(errs(), TNOF, false));
     DEBUG(errs() << "\n");
-    TestFunctions.insert(cast<Function>(NewValueMap[TNOF]));
+    TestFunctions.insert(cast<Function>(NewVMap[TNOF]));
     DeleteFunctionBody(TNOF);       // Function is now external in this module!
   }
 
@@ -304,8 +304,8 @@
 
   // Make sure that there is a global ctor/dtor array in both halves of the
   // module if they both have static ctor/dtor functions.
-  SplitStaticCtorDtor("llvm.global_ctors", M, New, NewValueMap);
-  SplitStaticCtorDtor("llvm.global_dtors", M, New, NewValueMap);
+  SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
+  SplitStaticCtorDtor("llvm.global_dtors", M, New, NewVMap);
   
   return New;
 }

Modified: llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp (original)
+++ llvm/branches/wendling/eh/tools/bugpoint/Miscompilation.cpp Fri Jul  2 04:57:13 2010
@@ -251,10 +251,10 @@
   outs() << '\n';
 
   // Split the module into the two halves of the program we want.
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *ToNotOptimize = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
   Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, Funcs,
-                                                 ValueMap);
+                                                 VMap);
 
   // Run the predicate, note that the predicate will delete both input modules.
   return TestFn(BD, ToOptimize, ToNotOptimize, Error);
@@ -285,11 +285,11 @@
   while (1) {
     if (BugpointIsInterrupted) return MadeChange;
     
-    DenseMap<const Value*, Value*> ValueMap;
-    Module *ToNotOptimize = CloneModule(BD.getProgram(), ValueMap);
+    ValueMap<const Value*, Value*> VMap;
+    Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
     Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
                                                    MiscompiledFunctions,
-                                                   ValueMap);
+                                                   VMap);
     Module *ToOptimizeLoopExtracted = BD.ExtractLoop(ToOptimize);
     if (!ToOptimizeLoopExtracted) {
       // If the loop extractor crashed or if there were no extractible loops,
@@ -448,11 +448,11 @@
   outs() << '\n';
 
   // Split the module into the two halves of the program we want.
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *ToNotOptimize = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
   Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
                                                  FunctionsBeingTested,
-                                                 ValueMap);
+                                                 VMap);
 
   // Try the extraction.  If it doesn't work, then the block extractor crashed
   // or something, in which case bugpoint can't chase down this possibility.
@@ -505,11 +505,11 @@
       return false;
   }
 
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *ProgClone = CloneModule(BD.getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *ProgClone = CloneModule(BD.getProgram(), VMap);
   Module *ToExtract = SplitFunctionsOutOfModule(ProgClone,
                                                 MiscompiledFunctions,
-                                                ValueMap);
+                                                VMap);
   Module *Extracted = BD.ExtractMappedBlocksFromModule(Blocks, ToExtract);
   if (Extracted == 0) {
     // Weird, extraction should have worked.
@@ -687,11 +687,11 @@
 
   // Output a bunch of bitcode files for the user...
   outs() << "Outputting reduced bitcode files which expose the problem:\n";
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *ToNotOptimize = CloneModule(getProgram(), ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *ToNotOptimize = CloneModule(getProgram(), VMap);
   Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
                                                  MiscompiledFunctions,
-                                                 ValueMap);
+                                                 VMap);
 
   outs() << "  Non-optimized portion: ";
   ToNotOptimize = swapProgramIn(ToNotOptimize);
@@ -894,6 +894,8 @@
   }
   delete Test;
 
+  FileRemover TestModuleBCRemover(TestModuleBC, !SaveTemps);
+
   // Make the shared library
   sys::Path SafeModuleBC("bugpoint.safe.bc");
   if (SafeModuleBC.makeUnique(true, &ErrMsg)) {
@@ -907,11 +909,16 @@
            << "'\nExiting.";
     exit(1);
   }
+
+  FileRemover SafeModuleBCRemover(SafeModuleBC, !SaveTemps);
+
   std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
   if (!Error.empty())
     return false;
   delete Safe;
 
+  FileRemover SharedObjectRemover(sys::Path(SharedObject), !SaveTemps);
+
   // Run the code generator on the `Test' code, loading the shared library.
   // The function returns whether or not the new output differs from reference.
   bool Result = BD.diffProgram(TestModuleBC.str(), SharedObject, false, &Error);
@@ -922,9 +929,6 @@
     errs() << ": still failing!\n";
   else
     errs() << ": didn't fail.\n";
-  TestModuleBC.eraseFromDisk();
-  SafeModuleBC.eraseFromDisk();
-  sys::Path(SharedObject).eraseFromDisk();
 
   return Result;
 }
@@ -956,9 +960,9 @@
     return true;
 
   // Split the module into the two halves of the program we want.
-  DenseMap<const Value*, Value*> ValueMap;
-  Module *ToNotCodeGen = CloneModule(getProgram(), ValueMap);
-  Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, ValueMap);
+  ValueMap<const Value*, Value*> VMap;
+  Module *ToNotCodeGen = CloneModule(getProgram(), VMap);
+  Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, VMap);
 
   // Condition the modules
   CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen);

Modified: llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp (original)
+++ llvm/branches/wendling/eh/tools/edis/EDDisassembler.cpp Fri Jul  2 04:57:13 2010
@@ -364,7 +364,7 @@
   sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
   MCContext context(*AsmInfo);
   OwningPtr<MCStreamer> streamer(createNullStreamer(context));
-  AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo);
+  AsmParser genericParser(*Tgt, sourceMgr, context, *streamer, *AsmInfo);
   OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser));
   
   AsmToken OpcodeToken = genericParser.Lex();

Modified: llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp (original)
+++ llvm/branches/wendling/eh/tools/gold/gold-plugin.cpp Fri Jul  2 04:57:13 2010
@@ -42,6 +42,7 @@
   ld_plugin_get_symbols get_symbols = NULL;
   ld_plugin_add_input_file add_input_file = NULL;
   ld_plugin_add_input_library add_input_library = NULL;
+  ld_plugin_set_extra_library_path set_extra_library_path = NULL;
   ld_plugin_message message = discard_message;
 
   int api_version = 0;
@@ -66,6 +67,7 @@
   static std::string bc_path;
   static std::string as_path;
   static std::vector<std::string> pass_through;
+  static std::string extra_library_path;
   // Additional options to pass into the code generator.
   // Note: This array will contain all plugin options which are not claimed
   // as plugin exclusive to pass to the code generator.
@@ -88,6 +90,8 @@
       } else {
         as_path = opt.substr(strlen("as="));
       }
+    } else if (opt.startswith("extra-library-path=")) {
+      extra_library_path = opt.substr(strlen("extra_library_path="));
     } else if (opt.startswith("pass-through=")) {
       llvm::StringRef item = opt.substr(strlen("pass-through="));
       pass_through.push_back(item.str());
@@ -125,8 +129,6 @@
   // for services.
 
   bool registeredClaimFile = false;
-  bool registeredAllSymbolsRead = false;
-  bool registeredCleanup = false;
 
   for (; tv->tv_tag != LDPT_NULL; ++tv) {
     switch (tv->tv_tag) {
@@ -174,8 +176,6 @@
 
         if ((*callback)(all_symbols_read_hook) != LDPS_OK)
           return LDPS_ERR;
-
-        registeredAllSymbolsRead = true;
       } break;
       case LDPT_REGISTER_CLEANUP_HOOK: {
         ld_plugin_register_cleanup callback;
@@ -183,8 +183,6 @@
 
         if ((*callback)(cleanup_hook) != LDPS_OK)
           return LDPS_ERR;
-
-        registeredCleanup = true;
       } break;
       case LDPT_ADD_SYMBOLS:
         add_symbols = tv->tv_u.tv_add_symbols;
@@ -198,6 +196,9 @@
       case LDPT_ADD_INPUT_LIBRARY:
         add_input_library = tv->tv_u.tv_add_input_file;
         break;
+      case LDPT_SET_EXTRA_LIBRARY_PATH:
+        set_extra_library_path = tv->tv_u.tv_set_extra_library_path;
+        break;
       case LDPT_MESSAGE:
         message = tv->tv_u.tv_message;
         break;
@@ -439,17 +440,23 @@
 
   lto_codegen_dispose(cg);
 
-  if ((*add_input_file)(const_cast<char*>(uniqueObjPath.c_str())) != LDPS_OK) {
+  if ((*add_input_file)(uniqueObjPath.c_str()) != LDPS_OK) {
     (*message)(LDPL_ERROR, "Unable to add .o file to the link.");
     (*message)(LDPL_ERROR, "File left behind in: %s", uniqueObjPath.c_str());
     return LDPS_ERR;
   }
 
+  if (!options::extra_library_path.empty() &&
+      set_extra_library_path(options::extra_library_path.c_str()) != LDPS_OK) {
+    (*message)(LDPL_ERROR, "Unable to set the extra library path.");
+    return LDPS_ERR;
+  }
+
   for (std::vector<std::string>::iterator i = options::pass_through.begin(),
                                           e = options::pass_through.end();
        i != e; ++i) {
     std::string &item = *i;
-    char *item_p = const_cast<char*>(item.c_str());
+    const char *item_p = item.c_str();
     if (llvm::StringRef(item).startswith("-l")) {
       if (add_input_library(item_p + 2) != LDPS_OK) {
         (*message)(LDPL_ERROR, "Unable to add library to the link.");

Modified: llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp (original)
+++ llvm/branches/wendling/eh/tools/llvm-extract/llvm-extract.cpp Fri Jul  2 04:57:13 2010
@@ -112,6 +112,7 @@
   Passes.add(createGVExtractionPass(GVs, DeleteFn, Relink));
   if (!DeleteFn)
     Passes.add(createGlobalDCEPass());           // Delete unreachable globals
+  Passes.add(createStripDeadDebugInfoPass());    // Remove dead debug info
   Passes.add(createDeadTypeEliminationPass());   // Remove dead types...
   Passes.add(createStripDeadPrototypesPass());   // Remove dead func decls
 

Modified: llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp (original)
+++ llvm/branches/wendling/eh/tools/llvm-mc/llvm-mc.cpp Fri Jul  2 04:57:13 2010
@@ -312,7 +312,7 @@
     Str.reset(createLoggingStreamer(Str.take(), errs()));
   }
 
-  AsmParser Parser(SrcMgr, Ctx, *Str.get(), *MAI);
+  AsmParser Parser(*TheTarget, SrcMgr, Ctx, *Str.get(), *MAI);
   OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(Parser));
   if (!TAP) {
     errs() << ProgName 

Modified: llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp (original)
+++ llvm/branches/wendling/eh/tools/llvm-nm/llvm-nm.cpp Fri Jul  2 04:57:13 2010
@@ -89,7 +89,8 @@
 static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
   // Private linkage and available_externally linkage don't exist in symtab.
   if (GV.hasPrivateLinkage() || GV.hasLinkerPrivateLinkage() ||
-      GV.hasAvailableExternallyLinkage()) return;
+      GV.hasLinkerPrivateWeakLinkage() || GV.hasAvailableExternallyLinkage())
+    return;
   
   const std::string SymbolAddrStr = "        "; // Not used yet...
   char TypeChar = TypeCharForSymbol(GV);

Modified: llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in (original)
+++ llvm/branches/wendling/eh/tools/llvmc/plugins/Base/Base.td.in Fri Jul  2 04:57:13 2010
@@ -262,12 +262,12 @@
 ]>;
 
 // Base class for linkers
-class llvm_gcc_based_linker <string cmd_prefix> : Tool<
+class llvm_gcc_based_linker <string cmd_prefix, dag on_empty> : Tool<
 [(in_language ["object-code", "static-library"]),
  (out_language "executable"),
  (output_suffix "out"),
  (command cmd_prefix),
- (works_on_empty (case (not_empty "filelist"), true,
+ (works_on_empty (case (and (not_empty "filelist"), on_empty), true,
                        (default), false)),
  (join),
  (actions (case
@@ -295,9 +295,13 @@
 ]>;
 
 // Default linker
-def llvm_gcc_linker : llvm_gcc_based_linker<"@LLVMGCCCOMMAND@">;
+def llvm_gcc_linker : llvm_gcc_based_linker<"@LLVMGCCCOMMAND@",
+    (not (or (parameter_equals "linker", "g++"),
+         (parameter_equals "linker", "c++")))>;
 // Alternative linker for C++
-def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@">;
+def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@",
+    (or (parameter_equals "linker", "g++"),
+        (parameter_equals "linker", "c++"))>;
 
 // Language map
 

Modified: llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/ARMDecoderEmitter.cpp Fri Jul  2 04:57:13 2010
@@ -1579,6 +1579,7 @@
     if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" ||
         Name == "TCRETURNri" || Name == "TCRETURNriND" ||
         Name == "TAILJMPd"  || Name == "TAILJMPdND" ||
+        Name == "TAILJMPdNDt" ||
         Name == "TAILJMPr"  || Name == "TAILJMPrND" ||
         Name == "MOVr_TC")
       return false;

Modified: llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/utils/TableGen/CMakeLists.txt Fri Jul  2 04:57:13 2010
@@ -41,6 +41,6 @@
 if( MINGW )
   target_link_libraries(tblgen imagehlp psapi)
 endif( MINGW )
-if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD AND NOT BEOS )
   target_link_libraries(tblgen pthread)
 endif()

Modified: llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/ClangAttrEmitter.cpp Fri Jul  2 04:57:13 2010
@@ -34,7 +34,6 @@
     OS << "class " << R.getName() << "Attr : public Attr {\n";
 
     std::vector<Record*> Args = R.getValueAsListOfDefs("Args");
-    std::vector<Record*>::iterator ai, ae = Args.end();
 
     // FIXME: Handle arguments
     assert(Args.empty() && "Can't yet handle arguments");

Modified: llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/CodeGenInstruction.cpp Fri Jul  2 04:57:13 2010
@@ -107,7 +107,6 @@
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
   mayLoad      = R->getValueAsBit("mayLoad");
   mayStore     = R->getValueAsBit("mayStore");
-  bool isTwoAddress = R->getValueAsBit("isTwoAddress");
   isPredicable = R->getValueAsBit("isPredicable");
   isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
   isCommutable = R->getValueAsBit("isCommutable");
@@ -212,16 +211,6 @@
   // Parse Constraints.
   ParseConstraints(R->getValueAsString("Constraints"), this);
 
-  // For backward compatibility: isTwoAddress means operand 1 is tied to
-  // operand 0.
-  if (isTwoAddress) {
-    if (!OperandList[1].Constraints[0].isNone())
-      throw R->getName() + ": cannot use isTwoAddress property: instruction "
-            "already has constraint set!";
-    OperandList[1].Constraints[0] =
-      CodeGenInstruction::ConstraintInfo::getTied(0);
-  }
-
   // Parse the DisableEncoding field.
   std::string DisableEncoding = R->getValueAsString("DisableEncoding");
   while (1) {

Modified: llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/EDEmitter.cpp Fri Jul  2 04:57:13 2010
@@ -500,6 +500,8 @@
       // TODO add support for fixed operands
     } else if (name.find("F") != name.npos) {
       // ignore (this pushes onto the FP stack)
+    } else if (name.find("A") != name.npos) {
+      // ignore (pushes all GP registoers onto the stack)
     } else if (name[name.length() - 1] == 'm') {
       PUSH("src");
     } else if (name.find("i") != name.npos) {
@@ -518,6 +520,8 @@
       // TODO add support for fixed operands
     } else if (name.find("F") != name.npos) {
       // ignore (this pops from the FP stack)
+    } else if (name.find("A") != name.npos) {
+      // ignore (pushes all GP registoers onto the stack)
     } else if (name[name.length() - 1] == 'm') {
       POP("dst");
     } else {

Modified: llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/NeonEmitter.cpp Fri Jul  2 04:57:13 2010
@@ -167,8 +167,6 @@
     case 'c':
       cnst = true;
     case 'p':
-      usgn = false;
-      poly = false;
       pntr = true;
       scal = true;
       break;
@@ -189,7 +187,7 @@
 
 /// TypeString - for a modifier and type, generate the name of the typedef for
 /// that type.  If generic is true, emit the generic vector type rather than
-/// the public NEON type. QUc -> uint8x8t_t / __neon_uint8x8_t.
+/// the public NEON type. QUc -> uint8x8_t / __neon_uint8x8_t.
 static std::string TypeString(const char mod, StringRef typestr,
                               bool generic = false) {
   bool quad = false;
@@ -279,9 +277,9 @@
   return s.str();
 }
 
-/// TypeString - for a modifier and type, generate the clang BuiltinsARM.def 
-/// prototype code for the function.  See the top of clang's Builtins.def for
-/// a description of the type strings.
+/// BuiltinTypeString - for a modifier and type, generate the clang
+/// BuiltinsARM.def prototype code for the function.  See the top of clang's
+/// Builtins.def for a description of the type strings.
 static std::string BuiltinTypeString(const char mod, StringRef typestr,
                                      ClassKind ck, bool ret) {
   bool quad = false;
@@ -302,9 +300,11 @@
   // Based on the modifying character, change the type and width if necessary.
   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
 
-  if (pntr)
+  if (pntr) {
+    usgn = false;
+    poly = false;
     type = 'v';
-  
+  }
   if (type == 'h') {
     type = 's';
     usgn = true;
@@ -330,14 +330,12 @@
   }
 
   // Since the return value must be one type, return a vector type of the
-  // appropriate width which we will bitcast.
+  // appropriate width which we will bitcast.  An exception is made for
+  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
+  // fashion, storing them to a pointer arg.
   if (ret) {
-    if (mod == '2')
-      return quad ? "V32c" : "V16c";
-    if (mod == '3')
-      return quad ? "V48c" : "V24c";
-    if (mod == '4')
-      return quad ? "V64c" : "V32c";
+    if (mod == '2' || mod == '3' || mod == '4')
+      return "vv*";
     if (mod == 'f' || (ck != ClassB && type == 'f'))
       return quad ? "V4f" : "V2f";
     if (ck != ClassB && type == 's')
@@ -370,6 +368,52 @@
   return quad ? "V16c" : "V8c";
 }
 
+/// StructTag - generate the name of the struct tag for a type.
+/// These names are mandated by ARM's ABI.
+static std::string StructTag(StringRef typestr) {
+  bool quad = false;
+  bool poly = false;
+  bool usgn = false;
+  
+  // base type to get the type string for.
+  char type = ClassifyType(typestr, quad, poly, usgn);
+  
+  SmallString<128> s;
+  s += "__simd";
+  s += quad ? "128_" : "64_";
+  if (usgn)
+    s.push_back('u');
+  
+  switch (type) {
+    case 'c':
+      s += poly ? "poly8" : "int8";
+      break;
+    case 's':
+      s += poly ? "poly16" : "int16";
+      break;
+    case 'i':
+      s += "int32";
+      break;
+    case 'l':
+      s += "int64";
+      break;
+    case 'h':
+      s += "float16";
+      break;
+    case 'f':
+      s += "float32";
+      break;
+    default:
+      throw "unhandled type!";
+      break;
+  }
+
+  // Append _t, finishing the struct tag name.
+  s += "_t";
+  
+  return s.str();
+}
+
 /// MangleName - Append a type or width suffix to a base neon function name, 
 /// and insert a 'q' in the appropriate location if the operation works on
 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
@@ -701,7 +745,13 @@
   char arg = 'a';
   std::string s;
 
-  bool unioning = (proto[0] == '2' || proto[0] == '3' || proto[0] == '4');
+  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
+  // sret-like argument.
+  bool sret = (proto[0] == '2' || proto[0] == '3' || proto[0] == '4');
+
+  // If this builtin takes an immediate argument, we need to #define it rather
+  // than use a standard declaration, so that SemaChecking can range check
+  // the immediate passed by the user.
   bool define = proto.find('i') != std::string::npos;
 
   // If all types are the same size, bitcasting the args will take care 
@@ -714,19 +764,14 @@
     std::string ts = TypeString(proto[0], typestr);
     
     if (define) {
-      if (proto[0] != 's')
+      if (sret)
+        s += "({ " + ts + " r; ";
+      else if (proto[0] != 's')
         s += "(" + ts + "){(__neon_" + ts + ")";
+    } else if (sret) {
+      s += ts + " r; ";
     } else {
-      if (unioning) {
-        s += "union { ";
-        s += TypeString(proto[0], typestr, true) + " val; ";
-        s += TypeString(proto[0], typestr, false) + " s; ";
-        s += "} r;";
-      } else {
-        s += ts;
-      }
-      
-      s += " r; r";
+      s += ts + " r; r";
       if (structTypes && proto[0] != 's' && proto[0] != 'i' && proto[0] != 'l')
         s += ".val";
       
@@ -744,6 +789,11 @@
     s += MangleName(name, typestr, ck);
   }
   s += "(";
+
+  // Pass the address of the return variable as the first argument to sret-like
+  // builtins.
+  if (sret)
+    s += "&r, ";
   
   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
     std::string args = std::string(&arg, 1);
@@ -754,7 +804,7 @@
     // argument to the __builtin.
     if (structTypes && (proto[i] == '2' || proto[i] == '3' || proto[i] == '4')){
       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
-        s += args + ".val[" + utostr(vi) + "]";
+        s += args + ".val[" + utostr(vi) + "].val";
         if ((vi + 1) < ve)
           s += ", ";
       }
@@ -788,13 +838,12 @@
 
   if (proto[0] != 'v') {
     if (define) {
-      if (proto[0] != 's')
+      if (sret)
+        s += "; r; })";
+      else if (proto[0] != 's')
         s += "}";
     } else {
-      if (unioning)
-        s += " return r.s;";
-      else
-        s += " return r;";
+      s += " return r;";
     }
   }
   return s;
@@ -875,10 +924,11 @@
   // Emit struct typedefs.
   for (unsigned vi = 1; vi != 5; ++vi) {
     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
-      std::string ts = TypeString('d', TDTypeVec[i]);
-      std::string vs = (vi > 1) ? TypeString('0' + vi, TDTypeVec[i]) : ts;
-      OS << "typedef struct __" << vs << " {\n";
-      OS << "  __neon_" << ts << " val";
+      std::string ts = TypeString('d', TDTypeVec[i], vi == 1);
+      std::string vs = TypeString((vi > 1) ? '0' + vi : 'd', TDTypeVec[i]);
+      std::string tag = (vi > 1) ? vs : StructTag(TDTypeVec[i]);
+      OS << "typedef struct " << tag << " {\n";
+      OS << "  " << ts << " val";
       if (vi > 1)
         OS << "[" << utostr(vi) << "]";
       OS << ";\n} " << vs << ";\n\n";
@@ -1119,13 +1169,22 @@
       } else {
         rangestr = "u = " + utostr(RangeFromType(TypeVec[ti]));
       }
-      // Make sure cases appear only once.
+      // Make sure cases appear only once by uniquing them in a string map.
       namestr = MangleName(name, TypeVec[ti], ck);
       if (EmittedMap.count(namestr))
         continue;
       EmittedMap[namestr] = OpNone;
-      
+
+      // Calculate the index of the immediate that should be range checked.
       unsigned immidx = 0;
+      
+      // Builtins that return a struct of multiple vectors have an extra
+      // leading arg for the struct return.
+      if (Proto[0] == '2' || Proto[0] == '3' || Proto[0] == '4')
+        ++immidx;
+      
+      // Add one to the index for each argument until we reach the immediate 
+      // to be checked.  Structs of vectors are passed as multiple arguments.
       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
         switch (Proto[ii]) {
           default:  immidx += 1; break;

Modified: llvm/branches/wendling/eh/utils/TableGen/Record.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/Record.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/Record.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/Record.cpp Fri Jul  2 04:57:13 2010
@@ -1262,7 +1262,7 @@
   Init *Op = Val->resolveReferences(R, RV);
 
   if (Args != NewArgs || Op != Val)
-    return new DagInit(Op, "", NewArgs, ArgNames);
+    return new DagInit(Op, ValName, NewArgs, ArgNames);
 
   return this;
 }

Modified: llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp (original)
+++ llvm/branches/wendling/eh/utils/TableGen/TGParser.cpp Fri Jul  2 04:57:13 2010
@@ -2020,7 +2020,6 @@
         }
       } else {
         Records.addDef(CurRec);
-        CurRec->resolveReferences();
       }
 
       NewRecDefs.push_back(CurRec);
@@ -2064,9 +2063,6 @@
             if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
                          LetStack[i][j].Bits, LetStack[i][j].Value))
               return true;
-
-        if (!CurMultiClass)
-          CurRec->resolveReferences();
       }
 
       if (Lex.getCode() != tgtok::comma) break;
@@ -2075,6 +2071,10 @@
     }
   }
 
+  if (!CurMultiClass)
+    for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i)
+      NewRecDefs[i]->resolveReferences();
+
   if (Lex.getCode() != tgtok::semi)
     return TokError("expected ';' at end of defm");
   Lex.Lex();

Modified: llvm/branches/wendling/eh/utils/buildit/build_llvm
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/buildit/build_llvm?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/buildit/build_llvm (original)
+++ llvm/branches/wendling/eh/utils/buildit/build_llvm Fri Jul  2 04:57:13 2010
@@ -197,6 +197,7 @@
     UNIVERSAL_SDK_PATH=$SDKROOT \
     NO_RUNTIME_LIBS=1 \
     DISABLE_EDIS=1 \
+    DEBUG_SYMBOLS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
     LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
     CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'" \
@@ -222,6 +223,7 @@
 make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
     NO_RUNTIME_LIBS=1 \
     DISABLE_EDIS=1 \
+    DEBUG_SYMBOLS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
     LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
     OPTIMIZE_OPTION='-O3' VERBOSE=1 install
@@ -242,9 +244,12 @@
 
 if [ "x$LLVM_DEBUG" != "x1" ]; then
     # Strip local symbols from llvm libraries.
-    strip -S $DEST_DIR$DEST_ROOT/lib/*.[oa]
+    #
+    # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+    # PPC objects!
+    strip -Sl $DEST_DIR$DEST_ROOT/lib/*.[oa]
     for f in `ls $DEST_DIR$DEST_ROOT/lib/*.so`; do
-        strip -Sx $f
+        strip -Sxl $f
     done
 fi
 
@@ -268,16 +273,8 @@
         -exec lipo -extract ppc7400 -extract i386 -extract x86_64 {} -output {} \;
 fi
 
-cd $DEST_DIR$DEST_ROOT
-if [ "$INSTALL_LIBLTO" == yes ]; then
-  mkdir -p $DT_HOME/lib
-  mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
-  strip -S $DT_HOME/lib/libLTO.dylib
-fi
-rm -f lib/libLTO.a lib/libLTO.la
-
 # The Hello dylib is an example of how to build a pass. No need to install it.
-rm lib/libLLVMHello.dylib
+rm $DEST_DIR$DEST_ROOT/lib/libLLVMHello.dylib
 
 # Compress manpages
 MDIR=$DEST_DIR$DEST_ROOT/share/man/man1
@@ -323,10 +320,35 @@
     | cpio -pdml $SYM_DIR/src || exit 1
 
 ################################################################################
+# Install and strip libLTO.dylib
+
+cd $DEST_DIR$DEST_ROOT
+if [ "$INSTALL_LIBLTO" = "yes" ]; then
+  mkdir -p $DT_HOME/lib
+  mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
+
+  # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+  # PPC objects!
+  strip -arch all -Sl $DT_HOME/lib/libLTO.dylib
+fi
+rm -f lib/libLTO.a lib/libLTO.la
+
+################################################################################
 # Remove debugging information from DEST_DIR.
 
+cd $DIR || exit 1
+
 find $DEST_DIR -name \*.a -print | xargs ranlib || exit 1
 find $DEST_DIR -name \*.dSYM -print | xargs rm -r || exit 1
+
+# Strip debugging information from files
+#
+# Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+# PPC objects!
+find $DEST_DIR -perm -0111 -type f \
+    ! \( -name '*.la' -o -name gccas -o -name gccld -o -name llvm-config \) \
+    -print | xargs -n 1 -P ${SYSCTL} strip -arch all -Sl
+
 chgrp -h -R wheel $DEST_DIR
 chgrp -R wheel $DEST_DIR
 
@@ -338,7 +360,7 @@
 ################################################################################
 # symlinks so that B&I can find things
 
-if [ "$INSTALL_LIBLTO" == yes ]; then
+if [ "$INSTALL_LIBLTO" = "yes" ]; then
   mkdir -p $DEST_DIR/usr/lib/
   cd $DEST_DIR/usr/lib && \
     ln -s ../../$DEVELOPER_DIR/usr/lib/libLTO.dylib ./libLTO.dylib

Modified: llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM (original)
+++ llvm/branches/wendling/eh/utils/unittest/googletest/README.LLVM Fri Jul  2 04:57:13 2010
@@ -27,3 +27,5 @@
 Modified as follows:
 * To GTestStreamToHelper in include/gtest/internal/gtest-internal.h,
   added the ability to stream with raw_os_ostream.
+* To refresh Haiku support in include/gtest/internal/gtest-port.h,
+  see http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20100621/102898.html

Modified: llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h?rev=107465&r1=107464&r2=107465&view=diff
==============================================================================
--- llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h (original)
+++ llvm/branches/wendling/eh/utils/unittest/googletest/include/gtest/internal/gtest-port.h Fri Jul  2 04:57:13 2010
@@ -80,6 +80,7 @@
 // the given platform; otherwise undefined):
 //   GTEST_OS_AIX      - IBM AIX
 //   GTEST_OS_CYGWIN   - Cygwin
+//   GTEST_OS_HAIKU    - Haiku
 //   GTEST_OS_LINUX    - Linux
 //   GTEST_OS_MAC      - Mac OS X
 //   GTEST_OS_SOLARIS  - Sun Solaris
@@ -220,11 +221,11 @@
 #elif defined(_AIX)
 #define GTEST_OS_AIX 1
 #elif defined(__HAIKU__)
-#define GTEST_OS_HAIKU
+#define GTEST_OS_HAIKU 1
 #endif  // __CYGWIN__
 
-#if GTEST_OS_CYGWIN || GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_SYMBIAN || \
-    GTEST_OS_SOLARIS || GTEST_OS_AIX
+#if GTEST_OS_CYGWIN || GTEST_OS_HAIKU || GTEST_OS_LINUX || GTEST_OS_MAC || \
+    GTEST_OS_SYMBIAN || GTEST_OS_SOLARIS || GTEST_OS_AIX
 
 // On some platforms, <regex.h> needs someone to define size_t, and
 // won't compile otherwise.  We can #include it here as we already





More information about the llvm-branch-commits mailing list