Tue Oct 26 17:48:11 PDT 2010

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================

--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td Tue Oct 26 19:48:03 2010
@@ -1,10 +1,10 @@
-//===----------------------------------------------------------------------===//
-// 
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 instruction set, defining the instructions, and
@@ -46,7 +46,7 @@
                                   [SDTCisInt<0>,
                                    SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
 
-def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, 
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
                                      SDTCisVT<2, i8>]>;
 def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
@@ -64,6 +64,12 @@
                                                          SDTCisVT<1, iPTR>,
                                                          SDTCisVT<2, iPTR>]>;
 
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+                                            SDTCisPtrTy<1>,
+                                            SDTCisVT<2, i32>,
+                                            SDTCisVT<3, i8>,
+                                            SDTCisVT<4, i32>]>;
+
 def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
 
 def SDTX86Void    : SDTypeProfile<0, 0, []>;
@@ -74,8 +80,6 @@
 
 def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
-def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
-
 def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -111,30 +115,30 @@
 
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
-                         SDNPMayLoad]>;
+                         SDNPMayLoad, SDNPMemOperand]>;
 def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
-                         SDNPMayLoad]>;
+                         SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
                         [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
@@ -143,13 +147,16 @@
                  SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
                         SDT_X86VASTART_SAVE_XMM_REGS,
                         [SDNPHasChain, SDNPVariadic]>;
-
+def X86vaarg64 :
+                 SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+                         SDNPMemOperand]>;
 def X86callseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
                         [SDNPHasChain, SDNPOutFlag]>;
 def X86callseq_end :
                  SDNode<"ISD::CALLSEQ_END",   SDT_X86CallSeqEnd,
-                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;       
+                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
@@ -169,13 +176,11 @@
 
 def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
                         [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
-                                 SDT_X86SegmentBaseAddress, []>;
 
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
-def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, 
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
                         [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
 
 def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
@@ -185,7 +190,7 @@
                           [SDNPCommutative]>;
 def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
                           [SDNPCommutative]>;
-                          
+
 def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
 def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
 def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
@@ -197,9 +202,9 @@
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
-def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
-                            [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-                            
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+                          [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
 def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
                         []>;
 
@@ -261,6 +266,14 @@
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
 
 let ParserMatchClass = X86AbsMemAsmOperand,
     PrintMethod = "print_pcrel_imm" in {
@@ -332,18 +345,50 @@
   let ParserMatchClass = ImmSExti32i8AsmOperand;
 }
 
+// 64-bits but only 32 bits are significant.
+def i64i32imm  : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+  let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm   : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
+  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
 //
 
 // Define X86 specific addressing mode.
-def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
 def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
                                [add, sub, mul, X86mul_imm, shl, or, frameindex],
                                []>;
 def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
+                         X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
 def HasCMov      : Predicate<"Subtarget->hasCMov()">;
@@ -353,6 +398,8 @@
 // no AVX version of the desired intructions is present, this is better for
 // incremental dev (without fallbacks it's easier to spot what's missing)
 def HasMMX       : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
+def Has3DNow     : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA    : Predicate<"Subtarget->has3DNowA()">;
 def HasSSE1      : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
 def HasSSE2      : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
 def HasSSE3      : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
@@ -414,46 +461,28 @@
 def X86_COND_P   : PatLeaf<(i8 14)>; // alt. COND_PE
 def X86_COND_S   : PatLeaf<(i8 15)>;
 
-def immSext8 : PatLeaf<(imm), [{
-  return N->getSExtValue() == (int8_t)N->getSExtValue();
-}]>;
+def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
 
 def i16immSExt8  : PatLeaf<(i16 immSext8)>;
 def i32immSExt8  : PatLeaf<(i32 immSext8)>;
-
-/// Load patterns: these constraint the match to the right address space.
-def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
+def i64immSExt8  : PatLeaf<(i64 immSext8)>;
+def i64immSExt32  : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
+def i64immZExt32  : PatLeaf<(i64 imm), [{
+  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // unsignedsign extended field.
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
 }]>;
 
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      return PT->getAddressSpace() == 256;
-  return false;
-}]>;
-
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      return PT->getAddressSpace() == 257;
-  return false;
+def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
+    uint64_t v = N->getZExtValue();
+    return v == (uint32_t)v && (int32_t)v == (int8_t)v;
 }]>;
 
-
 // Helper fragments for loads.
 // It's always safe to treat a anyext i16 load as a i32 load if the i16 is
 // known to be 32-bit aligned or better. Ditto for i8 to i16.
 def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD)
     return true;
@@ -464,10 +493,6 @@
 
 def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::EXTLOAD)
     return LD->getAlignment() >= 2 && !LD->isVolatile();
@@ -476,10 +501,6 @@
 
 def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD)
     return true;
@@ -488,15 +509,18 @@
   return false;
 }]>;
 
-def loadi8  : PatFrag<(ops node:$ptr), (i8  (dsload node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
+def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
 
 def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
 def sextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
 def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
 
 def zextloadi8i1   : PatFrag<(ops node:$ptr), (i8  (zextloadi1 node:$ptr))>;
 def zextloadi16i1  : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
@@ -504,6 +528,10 @@
 def zextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
 def zextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
 def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1  : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
 
 def extloadi8i1    : PatFrag<(ops node:$ptr), (i8  (extloadi1 node:$ptr))>;
 def extloadi16i1   : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
@@ -511,6 +539,10 @@
 def extloadi16i8   : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
 def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
 def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
 
 
 // An 'and' node with a single use.
@@ -526,65 +558,10 @@
   return N->hasOneUse();
 }]>;
 
-// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
-def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
-  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
-  APInt KnownZero0, KnownOne0;
-  CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
-  APInt KnownZero1, KnownOne1;
-  CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
-  return (~KnownZero0 & ~KnownZero1) == 0;
-}]>;
-
 //===----------------------------------------------------------------------===//
-// Instruction list...
+// Instruction list.
 //
 
-// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [ESP, EFLAGS], Uses = [ESP] in {
-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-                           "#ADJCALLSTACKDOWN",
-                           [(X86callseq_start timm:$amt)]>,
-                          Requires<[In32BitMode]>;
-def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
-                           "#ADJCALLSTACKUP",
-                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
-                          Requires<[In32BitMode]>;
-}
-
-// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
-def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
-                              (outs),
-                              (ins GR8:$al,
-                                   i64imm:$regsavefi, i64imm:$offset,
-                                   variable_ops),
-                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
-                              [(X86vastart_save_xmm_regs GR8:$al,
-                                                         imm:$regsavefi,
-                                                         imm:$offset)]>;
-
-// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets.  Calls
-// to _alloca is needed to probe the stack when allocating more than 4k bytes in
-// one go. Touching the stack at 4K increments is necessary to ensure that the
-// guard pages used by the OS virtual memory manager are allocated in correct
-// sequence.
-// The main point of having separate instruction are extra unmodelled effects
-// (compared to ordinary calls) like stack pointer change.
-
-def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
-                     "# dynamic stack allocation",
-                     [(X86MingwAlloca)]>;
-}
-
 // Nop
 let neverHasSideEffects = 1 in {
   def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -594,202 +571,22 @@
                 "nop{l}\t$zero", []>, TB;
 }
 
-// Trap
-def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
-// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
-
-// PIC base construction.  This expands to code that looks like this:
-//     call  $next_inst
-//     popl %destreg"
-let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
-  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
-                      "", []>;
-
-//===----------------------------------------------------------------------===//
-//  Control Flow Instructions.
-//
-
-// Return instructions.
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, FPForm = SpecialFP in {
-  def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
-                    "ret",
-                    [(X86retflag 0)]>;
-  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
-                    "ret\t$amt",
-                    [(X86retflag timm:$amt)]>;
-  def LRET   : I   <0xCB, RawFrm, (outs), (ins),
-                    "lret", []>;
-  def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
-                    "lret\t$amt", []>;
-}
-
-// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
-  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
-                        "jmp\t$dst", [(br bb:$dst)]>;
-  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
-                       "jmp\t$dst", []>;
-}
-
-// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
-  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
-    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
-    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
-                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
-  }
-}
-
-defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
-defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
-
-// FIXME: What about the CX/RCX versions of this instruction?
-let Uses = [ECX], isBranch = 1, isTerminator = 1 in
-  def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                       "jcxz\t$dst", []>;
-
-
-// Indirect branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
-                     [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
-  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
-                     [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
-                     
-  def FARJMP16i  : Iseg16<0xEA, RawFrm, (outs), 
-                          (ins i16imm:$seg, i16imm:$off),
-                          "ljmp{w}\t$seg, $off", []>, OpSize;
-  def FARJMP32i  : Iseg32<0xEA, RawFrm, (outs),
-                          (ins i16imm:$seg, i32imm:$off),
-                          "ljmp{l}\t$seg, $off", []>;                     
-
-  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), 
-                     "ljmp{w}\t{*}$dst", []>, OpSize;
-  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
-                     "ljmp{l}\t{*}$dst", []>;
-}
-
-
-// Loop instructions
-
-def LOOP   : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE  : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
-
-//===----------------------------------------------------------------------===//
-//  Call Instructions...
-//
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. ESP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [ESP] in {
-    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
-                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
-                           "call\t$dst", []>;
-    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
-                        "call\t{*}$dst", [(X86call GR32:$dst)]>;
-    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
-                        "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
-  
-    def FARCALL16i  : Iseg16<0x9A, RawFrm, (outs), 
-                             (ins i16imm:$seg, i16imm:$off),
-                             "lcall{w}\t$seg, $off", []>, OpSize;
-    def FARCALL32i  : Iseg32<0x9A, RawFrm, (outs),
-                             (ins i16imm:$seg, i32imm:$off),
-                             "lcall{l}\t$seg, $off", []>;
-                             
-    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
-                        "lcall{w}\t{*}$dst", []>, OpSize;
-    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
-                        "lcall{l}\t{*}$dst", []>;
-
-    // callw for 16 bit code for the assembler.
-    let isAsmParserOnly = 1 in
-      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
-                       (outs), (ins i16imm_pcrel:$dst, variable_ops),
-                       "callw\t$dst", []>, OpSize;
-  }
 
 // Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+                 "enter\t$len, $lvl", []>;
 
-def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
-              "enter\t$len, $lvl", []>;
-
-// Tail call stuff.
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
-  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [ESP] in {
-  def TCRETURNdi : I<0, Pseudo, (outs), 
-                     (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
-                   "#TC_RETURN $dst $offset", []>;
-  def TCRETURNri : I<0, Pseudo, (outs), 
-                     (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
-                     "#TC_RETURN $dst $offset", []>;
-  let mayLoad = 1 in
-  def TCRETURNmi : I<0, Pseudo, (outs), 
-                     (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
-                     "#TC_RETURN $dst $offset", []>;
-
-  // FIXME: The should be pseudo instructions that are lowered when going to
-  // mcinst.
-  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
-                           (ins i32imm_pcrel:$dst, variable_ops),
-                 "jmp\t$dst  # TAILCALL",
-                 []>;
-  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), 
-                   "", []>;  // FIXME: Remove encoding when JIT is dead.
-  let mayLoad = 1 in
-  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
-                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
-}
-
-//===----------------------------------------------------------------------===//
-//  Miscellaneous Instructions...
-//
 let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
 def LEAVE    : I<0xC9, RawFrm,
                  (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
 
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                   "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                   "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                   "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                   "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64  : I<0xC9, RawFrm,
+                 (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
 let mayLoad = 1 in {
@@ -802,6 +599,10 @@
   OpSize;
 def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
 def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+               Requires<[In32BitMode]>;
 }
 
 let mayStore = 1 in {
@@ -814,28 +615,53 @@
   OpSize;
 def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
 def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
-}
-}
 
-let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), 
+def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
                       "push{l}\t$imm", []>;
-def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
                       "push{w}\t$imm", []>, OpSize;
-def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
                       "push{l}\t$imm", []>;
-}
 
-let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
-               Requires<[In32BitMode]>;
-}
-let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
 def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
 def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
                Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r   : I<0x58, AddRegFrm,
+                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r  : I<0x50, AddRegFrm,
+                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
 }
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+                      "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+               Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+                 Requires<[In64BitMode]>;
+
+
 
 let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
     mayLoad=1, neverHasSideEffects=1 in {
@@ -848,12 +674,16 @@
                Requires<[In32BitMode]>;
 }
 
-let Uses = [EFLAGS], Constraints = "$src = $dst" in     // GR32 = bswap GR32
-  def BSWAP32r : I<0xC8, AddRegFrm,
-                   (outs GR32:$dst), (ins GR32:$src),
-                   "bswap{l}\t$dst", 
-                   [(set GR32:$dst, (bswap GR32:$src))]>, TB;
-
+let Constraints = "$src = $dst" in {    // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+                 (outs GR32:$dst), (ins GR32:$src),
+                 "bswap{l}\t$dst",
+                 [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+                  "bswap{q}\t$dst",
+                  [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
 
 // Bit scan instructions.
 let Defs = [EFLAGS] in {
@@ -870,6 +700,12 @@
 def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
@@ -884,44 +720,23 @@
 def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
 } // Defs = [EFLAGS]
 
-let neverHasSideEffects = 1 in
-def LEA16r   : I<0x8D, MRMSrcMem,
-                 (outs GR16:$dst), (ins i32mem:$src),
-                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
-let isReMaterializable = 1 in
-def LEA32r   : I<0x8D, MRMSrcMem,
-                 (outs GR32:$dst), (ins i32mem:$src),
-                 "lea{l}\t{$src|$dst}, {$dst|$src}",
-                 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
-
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
-                  [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
-                  [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
-                  [(X86rep_movs i32)]>, REP;
-}
 
 // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
 let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
 def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
 def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
 def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
 }
 
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
-                  [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
-                  [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
-                  [(X86rep_stos i32)]>, REP;
-
 // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
 let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
 def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
@@ -929,91 +744,24 @@
 def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
 let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
 def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
 
 def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
 def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
 def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
 
 def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
 def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
 def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
-
-let Defs = [RAX, RDX] in
-def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
-            TB;
-
-let Defs = [RAX, RCX, RDX] in
-def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
-def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
-}
-
-def SYSCALL  : I<0x05, RawFrm,
-                 (outs), (ins), "syscall", []>, TB;
-def SYSRET   : I<0x07, RawFrm,
-                 (outs), (ins), "sysret", []>, TB;
-def SYSENTER : I<0x34, RawFrm,
-                 (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT  : I<0x35, RawFrm,
-                 (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
-
-def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
 
 
 //===----------------------------------------------------------------------===//
-//  Input/Output Instructions...
+//  Move Instructions.
 //
-let Defs = [AL], Uses = [DX] in
-def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
-               "in{b}\t{%dx, %al|%AL, %DX}", []>;
-let Defs = [AX], Uses = [DX] in
-def IN16rr : I<0xED, RawFrm, (outs), (ins),
-               "in{w}\t{%dx, %ax|%AX, %DX}", []>,  OpSize;
-let Defs = [EAX], Uses = [DX] in
-def IN32rr : I<0xED, RawFrm, (outs), (ins),
-               "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
-
-let Defs = [AL] in
-def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{b}\t{$port, %al|%AL, $port}", []>;
-let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
-let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{l}\t{$port, %eax|%EAX, $port}", []>;
-
-let Uses = [DX, AL] in
-def OUT8rr  : I<0xEE, RawFrm, (outs), (ins),
-                "out{b}\t{%al, %dx|%DX, %AL}", []>;
-let Uses = [DX, AX] in
-def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
-                "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
-let Uses = [DX, EAX] in
-def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
-                "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
-
-let Uses = [AL] in
-def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{b}\t{%al, $port|$port, %AL}", []>;
-let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
-let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{l}\t{%eax, $port|$port, %EAX}", []>;
-
-def IN8  : I<0x6C, RawFrm, (outs), (ins),
-             "ins{b}", []>;
-def IN16 : I<0x6D, RawFrm, (outs), (ins),
-             "ins{w}", []>,  OpSize;
-def IN32 : I<0x6D, RawFrm, (outs), (ins),
-             "ins{l}", []>;
 
-//===----------------------------------------------------------------------===//
-//  Move Instructions...
-//
 let neverHasSideEffects = 1 in {
 def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}", []>;
@@ -1021,6 +769,8 @@
                 "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
 }
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
@@ -1032,6 +782,12 @@
 def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+                    "movabs{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(set GR64:$dst, i64immSExt32:$src)]>;
 }
 
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
@@ -1043,6 +799,9 @@
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(store i64immSExt32:$src, addr:$dst)]>;
 
 /// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
 /// 32-bit offset from the PC.  These are only valid in x86-32 mode.
@@ -1064,24 +823,22 @@
 def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
                      Requires<[In32BitMode]>;
-                      
-// Moves to and from segment registers
-def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
 
 let isCodeGenOnly = 1 in {
 def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1090,6 +847,8 @@
                     "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                     "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                     "mov{q}\t{$src, $dst|$dst, $src}", []>;
 }
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1102,6 +861,9 @@
 def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}",
                 [(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(set GR64:$dst, (load addr:$src))]>;
 }
 
 def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
@@ -1113,24 +875,9 @@
 def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}",
                 [(store GR32:$src, addr:$dst)]>;
-
-/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
-    canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}",
-                []>;
-
-let mayStore = 1 in
-def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}",
-                []>;
-}
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(store GR64:$src, addr:$dst)]>;
 
 // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
 // that they can be used for copying and storing h registers, which can't be
@@ -1151,2219 +898,6 @@
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
 }
 
-// Moves to and from debug registers
-def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-                
-// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-//===----------------------------------------------------------------------===//
-//  Fixed-Register Multiplication and Division Instructions...
-//
-
-// Extra precision multiplication
-
-// AL is really implied by AX, by the registers in Defs must match the
-// SDNode results (i8, i32).
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
-               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
-               // This probably ought to be moved to a def : Pat<> if the
-               // syntax can be accepted.
-               [(set AL, (mul AL, GR8:$src)),
-                (implicit EFLAGS)]>;     // AL,AH = AL*GR8
-
-let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
-               "mul{w}\t$src", 
-               []>, OpSize;    // AX,DX = AX*GR16
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
-def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
-               "mul{l}\t$src",
-               []>; // EAX,EDX = EAX*GR32
-
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
-               "mul{b}\t$src",
-               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
-               // This probably ought to be moved to a def : Pat<> if the
-               // syntax can be accepted.
-               [(set AL, (mul AL, (loadi8 addr:$src))),
-                (implicit EFLAGS)]>;   // AL,AH = AL*[mem8]
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
-               "mul{w}\t$src",
-               []>, OpSize; // AX,DX = AX*[mem16]
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
-              "mul{l}\t$src",
-              []>;          // EAX,EDX = EAX*[mem32]
-}
-
-let neverHasSideEffects = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
-              // AL,AH = AL*GR8
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
-              OpSize;    // AX,DX = AX*GR16
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
-              // EAX,EDX = EAX*GR32
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l}\t$src", []>;  // EAX,EDX = EAX*[mem32]
-}
-} // neverHasSideEffects
-
-// unsigned division/remainder
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
-               "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
-               "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
-               "div{l}\t$src", []>;
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-                                                    // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
-               "div{l}\t$src", []>;
-}
-
-// Signed division/remainder.
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
-               "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
-               "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
-               "idiv{l}\t$src", []>;
-let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
-                                                    // EDX:EAX/[mem32] = EAX,EDX
-               "idiv{l}\t$src", []>;
-}
-
-//===----------------------------------------------------------------------===//
-//  Two address Instructions.
-//
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
-let Predicates = [HasCMov] in {
-let isCommutable = 1 in {
-def CMOVB16rr : I<0x42, MRMSrcReg,       // if <u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovb{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_B, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVB32rr : I<0x42, MRMSrcReg,       // if <u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovb{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_B, EFLAGS))]>,
-                   TB;
-def CMOVAE16rr: I<0x43, MRMSrcReg,       // if >=u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovae{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVAE32rr: I<0x43, MRMSrcReg,       // if >=u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovae{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB;
-def CMOVE16rr : I<0x44, MRMSrcReg,       // if ==, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmove{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_E, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVE32rr : I<0x44, MRMSrcReg,       // if ==, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmove{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_E, EFLAGS))]>,
-                   TB;
-def CMOVNE16rr: I<0x45, MRMSrcReg,       // if !=, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovne{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVNE32rr: I<0x45, MRMSrcReg,       // if !=, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovne{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB;
-def CMOVBE16rr: I<0x46, MRMSrcReg,       // if <=u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVBE32rr: I<0x46, MRMSrcReg,       // if <=u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB;
-def CMOVA16rr : I<0x47, MRMSrcReg,       // if >u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmova{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_A, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVA32rr : I<0x47, MRMSrcReg,       // if >u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmova{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_A, EFLAGS))]>,
-                   TB;
-def CMOVL16rr : I<0x4C, MRMSrcReg,       // if <s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovl{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_L, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVL32rr : I<0x4C, MRMSrcReg,       // if <s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovl{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_L, EFLAGS))]>,
-                   TB;
-def CMOVGE16rr: I<0x4D, MRMSrcReg,       // if >=s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovge{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVGE32rr: I<0x4D, MRMSrcReg,       // if >=s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovge{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB;
-def CMOVLE16rr: I<0x4E, MRMSrcReg,       // if <=s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovle{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVLE32rr: I<0x4E, MRMSrcReg,       // if <=s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovle{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB;
-def CMOVG16rr : I<0x4F, MRMSrcReg,       // if >s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovg{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_G, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVG32rr : I<0x4F, MRMSrcReg,       // if >s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovg{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_G, EFLAGS))]>,
-                   TB;
-def CMOVS16rr : I<0x48, MRMSrcReg,       // if signed, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovs{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_S, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVS32rr : I<0x48, MRMSrcReg,       // if signed, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovs{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_S, EFLAGS))]>,
-                  TB;
-def CMOVNS16rr: I<0x49, MRMSrcReg,       // if !signed, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovns{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNS32rr: I<0x49, MRMSrcReg,       // if !signed, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovns{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB;
-def CMOVP16rr : I<0x4A, MRMSrcReg,       // if parity, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovp{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_P, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVP32rr : I<0x4A, MRMSrcReg,       // if parity, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovp{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_P, EFLAGS))]>,
-                  TB;
-def CMOVNP16rr : I<0x4B, MRMSrcReg,       // if !parity, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNP32rr : I<0x4B, MRMSrcReg,       // if !parity, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB;
-def CMOVO16rr : I<0x40, MRMSrcReg,       // if overflow, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovo{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_O, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVO32rr : I<0x40, MRMSrcReg,       // if overflow, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovo{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_O, EFLAGS))]>,
-                  TB;
-def CMOVNO16rr : I<0x41, MRMSrcReg,       // if !overflow, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovno{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNO32rr : I<0x41, MRMSrcReg,       // if !overflow, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovno{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB;
-} // isCommutable = 1
-
-def CMOVB16rm : I<0x42, MRMSrcMem,       // if <u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovb{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_B, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVB32rm : I<0x42, MRMSrcMem,       // if <u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovb{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_B, EFLAGS))]>,
-                   TB;
-def CMOVAE16rm: I<0x43, MRMSrcMem,       // if >=u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovae{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVAE32rm: I<0x43, MRMSrcMem,       // if >=u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovae{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB;
-def CMOVE16rm : I<0x44, MRMSrcMem,       // if ==, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmove{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_E, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVE32rm : I<0x44, MRMSrcMem,       // if ==, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmove{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_E, EFLAGS))]>,
-                   TB;
-def CMOVNE16rm: I<0x45, MRMSrcMem,       // if !=, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovne{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVNE32rm: I<0x45, MRMSrcMem,       // if !=, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovne{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB;
-def CMOVBE16rm: I<0x46, MRMSrcMem,       // if <=u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVBE32rm: I<0x46, MRMSrcMem,       // if <=u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB;
-def CMOVA16rm : I<0x47, MRMSrcMem,       // if >u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmova{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_A, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVA32rm : I<0x47, MRMSrcMem,       // if >u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmova{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_A, EFLAGS))]>,
-                   TB;
-def CMOVL16rm : I<0x4C, MRMSrcMem,       // if <s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovl{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_L, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVL32rm : I<0x4C, MRMSrcMem,       // if <s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovl{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_L, EFLAGS))]>,
-                   TB;
-def CMOVGE16rm: I<0x4D, MRMSrcMem,       // if >=s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovge{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVGE32rm: I<0x4D, MRMSrcMem,       // if >=s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovge{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB;
-def CMOVLE16rm: I<0x4E, MRMSrcMem,       // if <=s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovle{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVLE32rm: I<0x4E, MRMSrcMem,       // if <=s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovle{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB;
-def CMOVG16rm : I<0x4F, MRMSrcMem,       // if >s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovg{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_G, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVG32rm : I<0x4F, MRMSrcMem,       // if >s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovg{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_G, EFLAGS))]>,
-                   TB;
-def CMOVS16rm : I<0x48, MRMSrcMem,       // if signed, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovs{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_S, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVS32rm : I<0x48, MRMSrcMem,       // if signed, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovs{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_S, EFLAGS))]>,
-                  TB;
-def CMOVNS16rm: I<0x49, MRMSrcMem,       // if !signed, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovns{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNS32rm: I<0x49, MRMSrcMem,       // if !signed, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovns{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB;
-def CMOVP16rm : I<0x4A, MRMSrcMem,       // if parity, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovp{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_P, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVP32rm : I<0x4A, MRMSrcMem,       // if parity, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovp{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_P, EFLAGS))]>,
-                  TB;
-def CMOVNP16rm : I<0x4B, MRMSrcMem,       // if !parity, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNP32rm : I<0x4B, MRMSrcMem,       // if !parity, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB;
-def CMOVO16rm : I<0x40, MRMSrcMem,       // if overflow, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovo{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_O, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVO32rm : I<0x40, MRMSrcMem,       // if overflow, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovo{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_O, EFLAGS))]>,
-                  TB;
-def CMOVNO16rm : I<0x41, MRMSrcMem,       // if !overflow, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovno{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNO32rm : I<0x41, MRMSrcMem,       // if !overflow, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovno{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB;
-} // Predicates = [HasCMov]
-
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
-def CMOV_GR8 : I<0, Pseudo,
-                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
-                 "#CMOV_GR8 PSEUDO!",
-                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
-                                          imm:$cond, EFLAGS))]>;
-
-let Predicates = [NoCMov] in {
-def CMOV_GR32 : I<0, Pseudo,
-                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
-                    "#CMOV_GR32* PSEUDO!",
-                    [(set GR32:$dst,
-                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_GR16 : I<0, Pseudo,
-                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
-                    "#CMOV_GR16* PSEUDO!",
-                    [(set GR16:$dst,
-                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_RFP32 : I<0, Pseudo,
-                    (outs RFP32:$dst),
-                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
-                    "#CMOV_RFP32 PSEUDO!",
-                    [(set RFP32:$dst,
-                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-def CMOV_RFP64 : I<0, Pseudo,
-                    (outs RFP64:$dst),
-                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
-                    "#CMOV_RFP64 PSEUDO!",
-                    [(set RFP64:$dst,
-                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-def CMOV_RFP80 : I<0, Pseudo,
-                    (outs RFP80:$dst),
-                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
-                    "#CMOV_RFP80 PSEUDO!",
-                    [(set RFP80:$dst,
-                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] 
-} // Uses = [EFLAGS]
-
-
-// unary instructions
-let CodeSize = 2 in {
-let Defs = [EFLAGS] in {
-def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "neg{b}\t$dst",
-               [(set GR8:$dst, (ineg GR8:$src1)),
-                (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-               "neg{w}\t$dst",
-               [(set GR16:$dst, (ineg GR16:$src1)),
-                (implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-               "neg{l}\t$dst",
-               [(set GR32:$dst, (ineg GR32:$src1)),
-                (implicit EFLAGS)]>;
-                
-let Constraints = "" in {
-  def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
-                 "neg{b}\t$dst",
-                 [(store (ineg (loadi8 addr:$dst)), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
-                 "neg{w}\t$dst",
-                 [(store (ineg (loadi16 addr:$dst)), addr:$dst),
-                  (implicit EFLAGS)]>, OpSize;
-  def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
-                 "neg{l}\t$dst",
-                 [(store (ineg (loadi32 addr:$dst)), addr:$dst),
-                  (implicit EFLAGS)]>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Match xor -1 to not. Favors these over a move imm + xor to save code size.
-let AddedComplexity = 15 in {
-def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "not{b}\t$dst",
-               [(set GR8:$dst, (not GR8:$src1))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-               "not{w}\t$dst",
-               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-               "not{l}\t$dst",
-               [(set GR32:$dst, (not GR32:$src1))]>;
-}
-let Constraints = "" in {
-  def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
-                 "not{b}\t$dst",
-                 [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
-  def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
-                 "not{w}\t$dst",
-                 [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
-  def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
-                 "not{l}\t$dst",
-                 [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-} // Constraints = ""
-} // CodeSize
-
-// TODO: inc/dec is slow for P4, but fast for Pentium-M.
-let Defs = [EFLAGS] in {
-let CodeSize = 2 in
-def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "inc{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
-
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
-               "inc{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
-             OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
-               "inc{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
-             Requires<[In32BitMode]>;
-}
-let Constraints = "", CodeSize = 2 in {
-  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
-               [(store (add (loadi8 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>;
-  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
-               [(store (add (loadi16 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>,
-               OpSize, Requires<[In32BitMode]>;
-  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
-               [(store (add (loadi32 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>,
-               Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-
-let CodeSize = 2 in
-def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "dec{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
-               "dec{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
-             OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
-               "dec{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
-             Requires<[In32BitMode]>;
-} // CodeSize = 2
-
-let Constraints = "", CodeSize = 2 in {
-  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
-               [(store (add (loadi8 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>;
-  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
-               [(store (add (loadi16 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>,
-               OpSize, Requires<[In32BitMode]>;
-  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
-               [(store (add (loadi32 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>,
-               Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-} // Defs = [EFLAGS]
-
-// Logical operators...
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {   // X = AND Y, Z   --> X = AND Z, Y
-def AND8rr  : I<0x20, MRMDestReg,
-               (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-               "and{b}\t{$src2, $dst|$dst, $src2}",
-               [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
-def AND16rr : I<0x21, MRMDestReg,
-                (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                "and{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                      GR16:$src2))]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg, 
-                (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                "and{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                      GR32:$src2))]>;
-}
-
-// AND instructions with the destination register in REG and the source register
-//   in R/M.  Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                  "and{b}\t{$src2, $dst|$dst, $src2}", []>;
-def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                   "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                   "and{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def AND8rm   : I<0x22, MRMSrcMem, 
-                 (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
-                 "and{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
-                                                     (loadi8 addr:$src2)))]>;
-def AND16rm  : I<0x23, MRMSrcMem, 
-                 (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "and{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                      (loadi16 addr:$src2)))]>,
-               OpSize;
-def AND32rm  : I<0x23, MRMSrcMem,
-                 (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "and{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                      (loadi32 addr:$src2)))]>;
-
-def AND8ri   : Ii8<0x80, MRM4r, 
-                   (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
-                   "and{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
-                                                        imm:$src2))]>;
-def AND16ri  : Ii16<0x81, MRM4r, 
-                    (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "and{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                          imm:$src2))]>, OpSize;
-def AND32ri  : Ii32<0x81, MRM4r, 
-                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "and{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                          imm:$src2))]>;
-def AND16ri8 : Ii8<0x83, MRM4r, 
-                   (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "and{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                         i16immSExt8:$src2))]>,
-                   OpSize;
-def AND32ri8 : Ii8<0x83, MRM4r, 
-                   (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "and{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                         i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  def AND8mr   : I<0x20, MRMDestMem,
-                   (outs), (ins i8mem :$dst, GR8 :$src),
-                   "and{b}\t{$src, $dst|$dst, $src}",
-                   [(store (and (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def AND16mr  : I<0x21, MRMDestMem,
-                   (outs), (ins i16mem:$dst, GR16:$src),
-                   "and{w}\t{$src, $dst|$dst, $src}",
-                   [(store (and (load addr:$dst), GR16:$src), addr:$dst),
-                    (implicit EFLAGS)]>,
-                   OpSize;
-  def AND32mr  : I<0x21, MRMDestMem,
-                   (outs), (ins i32mem:$dst, GR32:$src),
-                   "and{l}\t{$src, $dst|$dst, $src}",
-                   [(store (and (load addr:$dst), GR32:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def AND8mi   : Ii8<0x80, MRM4m,
-                     (outs), (ins i8mem :$dst, i8imm :$src),
-                     "and{b}\t{$src, $dst|$dst, $src}",
-                      [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
-                       (implicit EFLAGS)]>;
-  def AND16mi  : Ii16<0x81, MRM4m,
-                      (outs), (ins i16mem:$dst, i16imm:$src),
-                      "and{w}\t{$src, $dst|$dst, $src}",
-                      [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
-                       (implicit EFLAGS)]>,
-                      OpSize;
-  def AND32mi  : Ii32<0x81, MRM4m,
-                      (outs), (ins i32mem:$dst, i32imm:$src),
-                      "and{l}\t{$src, $dst|$dst, $src}",
-                      [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
-                       (implicit EFLAGS)]>;
-  def AND16mi8 : Ii8<0x83, MRM4m,
-                     (outs), (ins i16mem:$dst, i16i8imm :$src),
-                     "and{w}\t{$src, $dst|$dst, $src}",
-                [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
-                 (implicit EFLAGS)]>,
-                     OpSize;
-  def AND32mi8 : Ii8<0x83, MRM4m,
-                     (outs), (ins i32mem:$dst, i32i8imm :$src),
-                     "and{l}\t{$src, $dst|$dst, $src}",
-                [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-  def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
-                   "and{b}\t{$src, %al|%al, $src}", []>;
-  def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
-                      "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
-                      "and{l}\t{$src, %eax|%eax, $src}", []>;
-
-} // Constraints = ""
-
-
-let isCommutable = 1 in {   // X = OR Y, Z   --> X = OR Z, Y
-def OR8rr    : I<0x08, MRMDestReg, (outs GR8 :$dst), 
-                 (ins GR8 :$src1, GR8 :$src2),
-                 "or{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
-def OR16rr   : I<0x09, MRMDestReg, (outs GR16:$dst), 
-                 (ins GR16:$src1, GR16:$src2),
-                 "or{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
-               OpSize;
-def OR32rr   : I<0x09, MRMDestReg, (outs GR32:$dst), 
-                 (ins GR32:$src1, GR32:$src2),
-                 "or{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
-}
-
-// OR instructions with the destination register in REG and the source register
-//   in R/M.  Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                  "or{b}\t{$src2, $dst|$dst, $src2}", []>;
-def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
-                   (ins GR16:$src1, GR16:$src2),
-                   "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), 
-                   (ins GR32:$src1, GR32:$src2),
-                   "or{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-                  
-def OR8rm    : I<0x0A, MRMSrcMem, (outs GR8 :$dst), 
-                 (ins GR8 :$src1, i8mem :$src2),
-                 "or{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
-                                                    (load addr:$src2)))]>;
-def OR16rm   : I<0x0B, MRMSrcMem, (outs GR16:$dst), 
-                 (ins GR16:$src1, i16mem:$src2),
-                 "or{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
-                                                     (load addr:$src2)))]>,
-               OpSize;
-def OR32rm   : I<0x0B, MRMSrcMem, (outs GR32:$dst), 
-                 (ins GR32:$src1, i32mem:$src2),
-                 "or{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
-                                                     (load addr:$src2)))]>;
-
-def OR8ri    : Ii8 <0x80, MRM1r, (outs GR8 :$dst), 
-                    (ins GR8 :$src1, i8imm:$src2),
-                    "or{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
-def OR16ri   : Ii16<0x81, MRM1r, (outs GR16:$dst), 
-                    (ins GR16:$src1, i16imm:$src2),
-                    "or{w}\t{$src2, $dst|$dst, $src2}", 
-                    [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
-                                                        imm:$src2))]>, OpSize;
-def OR32ri   : Ii32<0x81, MRM1r, (outs GR32:$dst), 
-                    (ins GR32:$src1, i32imm:$src2),
-                    "or{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
-                                                         imm:$src2))]>;
-
-def OR16ri8  : Ii8<0x83, MRM1r, (outs GR16:$dst), 
-                   (ins GR16:$src1, i16i8imm:$src2),
-                   "or{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
-                                                i16immSExt8:$src2))]>, OpSize;
-def OR32ri8  : Ii8<0x83, MRM1r, (outs GR32:$dst), 
-                   (ins GR32:$src1, i32i8imm:$src2),
-                   "or{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
-                                                        i32immSExt8:$src2))]>;
-let Constraints = "" in {
-  def OR8mr  : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
-                 "or{b}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), GR8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                 "or{w}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), GR16:$src), addr:$dst),
-                  (implicit EFLAGS)]>, OpSize;
-  def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 "or{l}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), GR32:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR8mi    : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
-                 "or{b}\t{$src, $dst|$dst, $src}",
-                 [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR16mi   : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
-                 "or{w}\t{$src, $dst|$dst, $src}",
-                 [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
-                  (implicit EFLAGS)]>,
-                 OpSize;
-  def OR32mi   : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
-                 "or{l}\t{$src, $dst|$dst, $src}",
-                 [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR16mi8  : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
-                 "or{w}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>,
-                     OpSize;
-  def OR32mi8  : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
-                 "or{l}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-                  
-  def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
-                   "or{b}\t{$src, %al|%al, $src}", []>;
-  def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
-                      "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
-                      "or{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
-  def XOR8rr   : I<0x30, MRMDestReg,
-                   (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                   "xor{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
-                                                        GR8:$src2))]>;
-  def XOR16rr  : I<0x31, MRMDestReg, 
-                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), 
-                   "xor{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                         GR16:$src2))]>, OpSize;
-  def XOR32rr  : I<0x31, MRMDestReg, 
-                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), 
-                   "xor{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                         GR32:$src2))]>;
-} // isCommutable = 1
-
-// XOR instructions with the destination register in REG and the source register
-//   in R/M.  Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                  "xor{b}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                   "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                   "xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def XOR8rm   : I<0x32, MRMSrcMem, 
-                 (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), 
-                 "xor{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
-                                                      (load addr:$src2)))]>;
-def XOR16rm  : I<0x33, MRMSrcMem, 
-                 (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), 
-                 "xor{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                       (load addr:$src2)))]>,
-                 OpSize;
-def XOR32rm  : I<0x33, MRMSrcMem, 
-                 (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), 
-                 "xor{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                       (load addr:$src2)))]>;
-
-def XOR8ri  : Ii8<0x80, MRM6r, 
-                  (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), 
-                  "xor{b}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
-def XOR16ri : Ii16<0x81, MRM6r, 
-                   (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), 
-                   "xor{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                         imm:$src2))]>, OpSize;
-def XOR32ri  : Ii32<0x81, MRM6r, 
-                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), 
-                    "xor{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                          imm:$src2))]>;
-def XOR16ri8 : Ii8<0x83, MRM6r, 
-                   (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "xor{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                         i16immSExt8:$src2))]>,
-                   OpSize;
-def XOR32ri8 : Ii8<0x83, MRM6r, 
-                   (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "xor{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                         i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  def XOR8mr   : I<0x30, MRMDestMem,
-                   (outs), (ins i8mem :$dst, GR8 :$src),
-                   "xor{b}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def XOR16mr  : I<0x31, MRMDestMem,
-                   (outs), (ins i16mem:$dst, GR16:$src),
-                   "xor{w}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
-                    (implicit EFLAGS)]>,
-                   OpSize;
-  def XOR32mr  : I<0x31, MRMDestMem,
-                   (outs), (ins i32mem:$dst, GR32:$src),
-                   "xor{l}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def XOR8mi   : Ii8<0x80, MRM6m,
-                     (outs), (ins i8mem :$dst, i8imm :$src),
-                     "xor{b}\t{$src, $dst|$dst, $src}",
-                    [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
-                     (implicit EFLAGS)]>;
-  def XOR16mi  : Ii16<0x81, MRM6m,
-                      (outs), (ins i16mem:$dst, i16imm:$src),
-                      "xor{w}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
-                    (implicit EFLAGS)]>,
-                      OpSize;
-  def XOR32mi  : Ii32<0x81, MRM6m,
-                      (outs), (ins i32mem:$dst, i32imm:$src),
-                      "xor{l}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def XOR16mi8 : Ii8<0x83, MRM6m,
-                     (outs), (ins i16mem:$dst, i16i8imm :$src),
-                     "xor{w}\t{$src, $dst|$dst, $src}",
-                 [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>,
-                     OpSize;
-  def XOR32mi8 : Ii8<0x83, MRM6m,
-                     (outs), (ins i32mem:$dst, i32i8imm :$src),
-                     "xor{l}\t{$src, $dst|$dst, $src}",
-                 [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-                  
-  def XOR8i8   : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
-                      "xor{b}\t{$src, %al|%al, $src}", []>;
-  def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
-                      "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
-                      "xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Shift instructions
-let Defs = [EFLAGS] in {
-let Uses = [CL] in {
-def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "shl{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shl{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shl{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
-} // Uses = [CL]
-
-def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "shl{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
-                   
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "shl{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "shl{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-
-def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
-                 "shl{b}\t$dst", []>;
-def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shl{w}\t$dst", []>, OpSize;
-def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shl{l}\t$dst", []>;
-
-} // isConvertibleToThreeAddress = 1
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "shl{b}\t{$src, $dst|$dst, $src}",
-                  [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "shl{w}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "shl{l}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Shift by 1
-  def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t$dst",
-                  [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t$dst",
-                 [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t$dst",
-                 [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "shr{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shr{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shr{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
-}
-
-def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                   "shr{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
-def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "shr{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "shr{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
-                 "shr{b}\t$dst",
-                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
-def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shr{w}\t$dst",
-                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
-def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shr{l}\t$dst",
-                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
-                   OpSize;
-  def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "shr{b}\t{$src, $dst|$dst, $src}",
-                  [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "shr{w}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "shr{l}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Shift by 1
-  def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t$dst",
-                  [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t$dst",
-                 [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
-  def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t$dst",
-                 [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "sar{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
-                 "sar{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
-                 "sar{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
-}
-
-def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "sar{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
-def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "sar{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
-                   OpSize;
-def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "sar{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "sar{b}\t$dst",
-                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
-def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
-                 "sar{w}\t$dst",
-                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
-def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
-                 "sar{l}\t$dst",
-                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
-                   "sar{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "sar{b}\t{$src, $dst|$dst, $src}",
-                  [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "sar{w}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "sar{l}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Shift by 1
-  def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t$dst",
-                  [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t$dst",
-                 [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
-                   "sar{l}\t$dst",
-                 [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-// Rotate instructions
-
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
-               "rcl{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-  
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-                "rcl{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-                  
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
-               "rcr{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-  
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-                "rcr{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
-                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Constraints = "" in {
-def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
-               "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
-                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
-                "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
-               "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
-                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
-                "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
-                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = ""
-
-// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
-def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "rol{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rol{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rol{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
-}
-
-def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "rol{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
-def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "rol{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, 
-                   OpSize;
-def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "rol{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "rol{b}\t$dst",
-                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
-def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rol{w}\t$dst",
-                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
-def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rol{l}\t$dst",
-                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "rol{b}\t{$src, $dst|$dst, $src}",
-                 [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "rol{w}\t{$src, $dst|$dst, $src}",
-                [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "rol{l}\t{$src, $dst|$dst, $src}",
-                [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Rotate by 1
-  def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t$dst",
-                 [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t$dst",
-                [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t$dst",
-                [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "ror{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
-                 "ror{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
-                 "ror{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
-}
-
-def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "ror{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
-def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "ror{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, 
-                   OpSize;
-def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "ror{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
-def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
-                 "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
-def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
-                 "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
-                   "ror{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "ror{b}\t{$src, $dst|$dst, $src}",
-                 [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "ror{w}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "ror{l}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Rotate by 1
-  def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t$dst",
-                 [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t$dst",
-                [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
-                   "ror{l}\t$dst",
-                [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-
-// Double shift instructions (generalizations of rotate)
-let Uses = [CL] in {
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), 
-                   (ins GR32:$src1, GR32:$src2),
-                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
-                   (ins GR32:$src1, GR32:$src2),
-                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
-                   (ins GR16:$src1, GR16:$src2),
-                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
-                   TB, OpSize;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), 
-                   (ins GR16:$src1, GR16:$src2),
-                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
-                   TB, OpSize;
-}
-
-let isCommutable = 1 in {  // These instructions commute to each other.
-def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
-                     (outs GR32:$dst), 
-                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
-                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
-                                      (i8 imm:$src3)))]>,
-                 TB;
-def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
-                     (outs GR32:$dst), 
-                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
-                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
-                                      (i8 imm:$src3)))]>,
-                 TB;
-def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
-                     (outs GR16:$dst), 
-                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
-                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
-                                      (i8 imm:$src3)))]>,
-                     TB, OpSize;
-def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
-                     (outs GR16:$dst), 
-                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
-                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
-                                      (i8 imm:$src3)))]>,
-                     TB, OpSize;
-}
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                     [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
-                       addr:$dst)]>, TB;
-  def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                    [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
-                      addr:$dst)]>, TB;
-  }
-  def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
-                      (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
-                      "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
-                                        (i8 imm:$src3)), addr:$dst)]>,
-                      TB;
-  def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
-                       (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
-                       "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                       [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
-                                         (i8 imm:$src3)), addr:$dst)]>,
-                       TB;
-
-  let Uses = [CL] in {
-  def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                     [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
-                       addr:$dst)]>, TB, OpSize;
-  def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                    [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
-                      addr:$dst)]>, TB, OpSize;
-  }
-  def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
-                      (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
-                      "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
-                                        (i8 imm:$src3)), addr:$dst)]>,
-                      TB, OpSize;
-  def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
-                       (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
-                       "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
-                                        (i8 imm:$src3)), addr:$dst)]>,
-                       TB, OpSize;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-
-// Arithmetic.
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {   // X = ADD Y, Z   --> X = ADD Z, Y
-// Register-Register Addition
-def ADD8rr    : I<0x00, MRMDestReg, (outs GR8 :$dst),
-                                    (ins GR8 :$src1, GR8 :$src2),
-                  "add{b}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-// Register-Register Addition
-def ADD16rr  : I<0x01, MRMDestReg, (outs GR16:$dst),
-                                   (ins GR16:$src1, GR16:$src2),
-                 "add{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
-                                                       GR16:$src2))]>, OpSize;
-def ADD32rr  : I<0x01, MRMDestReg, (outs GR32:$dst),
-                                   (ins GR32:$src1, GR32:$src2),
-                 "add{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
-                                                       GR32:$src2))]>;
-} // end isConvertibleToThreeAddress
-} // end isCommutable
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                   "add{b}\t{$src2, $dst|$dst, $src2}", []>;
-  def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
-                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-  def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2),
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Addition
-def ADD8rm   : I<0x02, MRMSrcMem, (outs GR8 :$dst),
-                                  (ins GR8 :$src1, i8mem :$src2),
-                 "add{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
-                                                      (load addr:$src2)))]>;
-def ADD16rm  : I<0x03, MRMSrcMem, (outs GR16:$dst),
-                                  (ins GR16:$src1, i16mem:$src2),
-                 "add{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
-                                                  (load addr:$src2)))]>, OpSize;
-def ADD32rm  : I<0x03, MRMSrcMem, (outs GR32:$dst),
-                                  (ins GR32:$src1, i32mem:$src2),
-                 "add{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
-                                                       (load addr:$src2)))]>;
-                  
-// Register-Integer Addition
-def ADD8ri    : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "add{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, EFLAGS,
-                          (X86add_flag GR8:$src1, imm:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-// Register-Integer Addition
-def ADD16ri  : Ii16<0x81, MRM0r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "add{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, EFLAGS,
-                          (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
-def ADD32ri  : Ii32<0x81, MRM0r, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "add{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, 
-                          (X86add_flag GR32:$src1, imm:$src2))]>;
-def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "add{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS,
-                         (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "add{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS,
-                         (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
-}
-
-let Constraints = "" in {
-  // Memory-Register Addition
-  def ADD8mr   : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                   "add{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "add{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
-                    (implicit EFLAGS)]>, OpSize;
-  def ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                   "add{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
-                     "add{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                      "add{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
-                   (implicit EFLAGS)]>, OpSize;
-  def ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                      "add{l}\t{$src2, $dst|$dst, $src2}",
-                      [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
-                       (implicit EFLAGS)]>;
-  def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                     "add{w}\t{$src2, $dst|$dst, $src2}",
-                     [(store (add (load addr:$dst), i16immSExt8:$src2),
-                                  addr:$dst),
-                      (implicit EFLAGS)]>, OpSize;
-  def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "add{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (add (load addr:$dst), i32immSExt8:$src2),
-                               addr:$dst),
-                   (implicit EFLAGS)]>;
-
-  // addition to rAX
-  def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
-                   "add{b}\t{$src, %al|%al, $src}", []>;
-  def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
-                      "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
-                      "add{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-let isCommutable = 1 in {  // X = ADC Y, Z --> X = ADC Z, Y
-def ADC8rr   : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                 "adc{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
-def ADC16rr  : I<0x11, MRMDestReg, (outs GR16:$dst),
-                                   (ins GR16:$src1, GR16:$src2),
-                 "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
-def ADC32rr  : I<0x11, MRMDestReg, (outs GR32:$dst),
-                                   (ins GR32:$src1, GR32:$src2),
-                 "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
-}
-
-let isCodeGenOnly = 1 in {
-def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                 "adc{b}\t{$src2, $dst|$dst, $src2}", []>;
-def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                    "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                    "adc{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC8rm   : I<0x12, MRMSrcMem , (outs GR8:$dst), 
-                                   (ins GR8:$src1, i8mem:$src2),
-                 "adc{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
-def ADC16rm  : I<0x13, MRMSrcMem , (outs GR16:$dst),
-                                   (ins GR16:$src1, i16mem:$src2),
-                 "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
-                 OpSize;
-def ADC32rm  : I<0x13, MRMSrcMem , (outs GR32:$dst),
-                                   (ins GR32:$src1, i32mem:$src2),
-                 "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
-def ADC8ri   : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "adc{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
-def ADC16ri  : Ii16<0x81, MRM2r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
-def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
-                 OpSize;
-def ADC32ri  : Ii32<0x81, MRM2r, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
-def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  def ADC8mr   : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                   "adc{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
-  def ADC16mr  : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "adc{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
-                   OpSize;
-  def ADC32mr  : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                   "adc{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
-  def ADC8mi   : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
-                      "adc{b}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
-  def ADC16mi  : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                      "adc{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
-                  OpSize;
-  def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                     "adc{w}\t{$src2, $dst|$dst, $src2}",
-               [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
-               OpSize;
-  def ADC32mi  : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                      "adc{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
-  def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "adc{l}\t{$src2, $dst|$dst, $src2}",
-               [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
-  def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
-                   "adc{b}\t{$src, %al|%al, $src}", []>;
-  def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
-                      "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
-                      "adc{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Uses = [EFLAGS]
-
-// Register-Register Subtraction
-def SUB8rr  : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                "sub{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS,
-                      (X86sub_flag GR8:$src1, GR8:$src2))]>;
-def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
-                "sub{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS,
-                      (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
-def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
-                "sub{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS,
-                      (X86sub_flag GR32:$src1, GR32:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                   "sub{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB8rm  : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
-                                 (ins GR8 :$src1, i8mem :$src2),
-                "sub{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS,
-                      (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
-def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16mem:$src2),
-                "sub{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS,
-                      (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
-def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32mem:$src2),
-                "sub{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS,
-                      (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB8ri   : Ii8 <0x80, MRM5r, (outs GR8:$dst),
-                                 (ins GR8:$src1, i8imm:$src2),
-                    "sub{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, EFLAGS,
-                          (X86sub_flag GR8:$src1, imm:$src2))]>;
-def SUB16ri  : Ii16<0x81, MRM5r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "sub{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, EFLAGS,
-                          (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
-def SUB32ri  : Ii32<0x81, MRM5r, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "sub{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS,
-                          (X86sub_flag GR32:$src1, imm:$src2))]>;
-def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS,
-                         (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS,
-                         (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  // Memory-Register Subtraction
-  def SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
-                   "sub{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
-                    (implicit EFLAGS)]>, OpSize;
-  def SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                   "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-
-  // Memory-Integer Subtraction
-  def SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
-                     "sub{b}\t{$src2, $dst|$dst, $src2}",
-                     [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
-                      (implicit EFLAGS)]>;
-  def SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                      "sub{w}\t{$src2, $dst|$dst, $src2}",
-                      [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
-                       (implicit EFLAGS)]>, OpSize;
-  def SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                      "sub{l}\t{$src2, $dst|$dst, $src2}",
-                      [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
-                       (implicit EFLAGS)]>;
-  def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
-                     "sub{w}\t{$src2, $dst|$dst, $src2}",
-                     [(store (sub (load addr:$dst), i16immSExt8:$src2),
-                             addr:$dst),
-                      (implicit EFLAGS)]>, OpSize;
-  def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "sub{l}\t{$src2, $dst|$dst, $src2}",
-                     [(store (sub (load addr:$dst), i32immSExt8:$src2),
-                             addr:$dst),
-                      (implicit EFLAGS)]>;
-                      
-  def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
-                   "sub{b}\t{$src, %al|%al, $src}", []>;
-  def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
-                      "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
-                      "sub{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-def SBB8rr     : I<0x18, MRMDestReg, (outs GR8:$dst),
-                                     (ins GR8:$src1, GR8:$src2),
-                  "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
-def SBB16rr    : I<0x19, MRMDestReg, (outs GR16:$dst),
-                                     (ins GR16:$src1, GR16:$src2),
-                  "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
-def SBB32rr    : I<0x19, MRMDestReg, (outs GR32:$dst),
-                                      (ins GR32:$src1, GR32:$src2),
-                  "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-
-let Constraints = "" in {
-  def SBB8mr   : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), 
-                   "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
-  def SBB16mr  : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), 
-                   "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
-                   OpSize;
-  def SBB32mr  : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
-  def SBB8mi  : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
-                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
-  def SBB16mi  : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                      "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
-                  OpSize;
-  def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
-                     "sbb{w}\t{$src2, $dst|$dst, $src2}",
-               [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
-               OpSize;
-  def SBB32mi  : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                      "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
-  def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
-                     "sbb{l}\t{$src2, $dst|$dst, $src2}",
-               [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-               
-  def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
-                   "sbb{b}\t{$src, %al|%al, $src}", []>;
-  def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
-                      "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
-                      "sbb{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let isCodeGenOnly = 1 in {
-def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                   "sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                    "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                    "sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB8rm   : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
-                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
-def SBB16rm  : I<0x1B, MRMSrcMem, (outs GR16:$dst),
-                                  (ins GR16:$src1, i16mem:$src2),
-                    "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
-                    OpSize;
-def SBB32rm  : I<0x1B, MRMSrcMem, (outs GR32:$dst),
-                                  (ins GR32:$src1, i32mem:$src2),
-                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
-def SBB8ri   : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
-def SBB16ri  : Ii16<0x81, MRM3r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
-def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
-                   OpSize;
-def SBB32ri  : Ii32<0x81, MRM3r, (outs GR32:$dst), 
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
-def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
-// Register-Register Signed Integer Multiply
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
-                 "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS,
-                       (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
-                 "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS,
-                       (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
-}
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
-                                  (ins GR16:$src1, i16mem:$src2),
-                 "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS,
-                       (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
-               TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
-                 (ins GR32:$src1, i32mem:$src2),
-                 "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS,
-                       (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
-} // Defs = [EFLAGS]
-} // end Two Address instructions
-
-// Suprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
-// Register-Integer Signed Integer Multiply
-def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
-                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS, 
-                            (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
-def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
-                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag GR32:$src1, imm:$src2))]>;
-def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
-                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR16:$dst, EFLAGS,
-                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
-                 OpSize;
-def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
-                     (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR32:$dst, EFLAGS,
-                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-// Memory-Integer Signed Integer Multiply
-def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
-                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
-                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1), imm:$src2))]>,
-                 OpSize;
-def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
-                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
-                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1), imm:$src2))]>;
-def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
-                     (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
-                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR16:$dst, EFLAGS,
-                           (X86smul_flag (load addr:$src1),
-                                         i16immSExt8:$src2))]>, OpSize;
-def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
-                     (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
-                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR32:$dst, EFLAGS,
-                           (X86smul_flag (load addr:$src1),
-                                         i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Test instructions are just like AND, except they don't generate a result.
-//
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {   // TEST X, Y   --> TEST Y, X
-def TEST8rr  : I<0x84, MRMSrcReg, (outs),  (ins GR8:$src1, GR8:$src2),
-                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
-def TEST16rr : I<0x85, MRMSrcReg, (outs),  (ins GR16:$src1, GR16:$src2),
-                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
-                      0))]>,
-                 OpSize;
-def TEST32rr : I<0x85, MRMSrcReg, (outs),  (ins GR32:$src1, GR32:$src2),
-                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
-                      0))]>;
-}
-
-def TEST8i8  : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
-                   "test{b}\t{$src, %al|%al, $src}", []>;
-def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
-                     "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
-                     "test{l}\t{$src, %eax|%eax, $src}", []>;
-
-def TEST8rm  : I<0x84, MRMSrcMem, (outs),  (ins GR8 :$src1, i8mem :$src2),
-                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
-                       0))]>;
-def TEST16rm : I<0x85, MRMSrcMem, (outs),  (ins GR16:$src1, i16mem:$src2),
-                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR16:$src1,
-                                         (loadi16 addr:$src2)), 0))]>, OpSize;
-def TEST32rm : I<0x85, MRMSrcMem, (outs),  (ins GR32:$src1, i32mem:$src2),
-                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR32:$src1,
-                                                (loadi32 addr:$src2)), 0))]>;
-
-def TEST8ri  : Ii8 <0xF6, MRM0r,                     // flags = GR8  & imm8
-                    (outs),  (ins GR8:$src1, i8imm:$src2),
-                    "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
-def TEST16ri : Ii16<0xF7, MRM0r,                     // flags = GR16 & imm16
-                    (outs),  (ins GR16:$src1, i16imm:$src2),
-                    "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
-                    OpSize;
-def TEST32ri : Ii32<0xF7, MRM0r,                     // flags = GR32 & imm32
-                    (outs),  (ins GR32:$src1, i32imm:$src2),
-                    "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
-
-def TEST8mi  : Ii8 <0xF6, MRM0m,                   // flags = [mem8]  & imm8
-                    (outs), (ins i8mem:$src1, i8imm:$src2),
-                    "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
-                     0))]>;
-def TEST16mi : Ii16<0xF7, MRM0m,                   // flags = [mem16] & imm16
-                    (outs), (ins i16mem:$src1, i16imm:$src2),
-                    "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
-                     0))]>, OpSize;
-def TEST32mi : Ii32<0xF7, MRM0m,                   // flags = [mem32] & imm32
-                    (outs), (ins i32mem:$src1, i32imm:$src2),
-                    "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
-                     0))]>;
-} // Defs = [EFLAGS]
-
 
 // Condition code ops, incl. set if equal/not equal/...
 let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
@@ -3371,305 +905,10 @@
 let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
 def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
 
-let Uses = [EFLAGS] in {
-// Use sbb to materialize carry bit.
-let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
-                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
-                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
-                OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-} // isCodeGenOnly
-
-def SETEr    : I<0x94, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "sete\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
-               TB;                        // GR8 = ==
-def SETEm    : I<0x94, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "sete\t$dst",
-                 [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = ==
-
-def SETNEr   : I<0x95, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setne\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
-               TB;                        // GR8 = !=
-def SETNEm   : I<0x95, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setne\t$dst",
-                 [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = !=
-
-def SETLr    : I<0x9C, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setl\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
-               TB;                        // GR8 = <  signed
-def SETLm    : I<0x9C, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setl\t$dst",
-                 [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <  signed
-
-def SETGEr   : I<0x9D, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setge\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
-               TB;                        // GR8 = >= signed
-def SETGEm   : I<0x9D, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setge\t$dst",
-                 [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >= signed
-
-def SETLEr   : I<0x9E, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setle\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
-               TB;                        // GR8 = <= signed
-def SETLEm   : I<0x9E, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setle\t$dst",
-                 [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <= signed
-
-def SETGr    : I<0x9F, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setg\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
-               TB;                        // GR8 = >  signed
-def SETGm    : I<0x9F, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setg\t$dst",
-                 [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >  signed
-
-def SETBr    : I<0x92, MRM0r,
-                 (outs GR8   :$dst), (ins),
-                 "setb\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
-               TB;                        // GR8 = <  unsign
-def SETBm    : I<0x92, MRM0m,
-                 (outs), (ins i8mem:$dst),
-                 "setb\t$dst",
-                 [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <  unsign
-
-def SETAEr   : I<0x93, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setae\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
-               TB;                        // GR8 = >= unsign
-def SETAEm   : I<0x93, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setae\t$dst",
-                 [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >= unsign
-
-def SETBEr   : I<0x96, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setbe\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
-               TB;                        // GR8 = <= unsign
-def SETBEm   : I<0x96, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setbe\t$dst",
-                 [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <= unsign
-
-def SETAr    : I<0x97, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "seta\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
-               TB;                        // GR8 = >  signed
-def SETAm    : I<0x97, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "seta\t$dst",
-                 [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >  signed
-
-def SETSr    : I<0x98, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "sets\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
-               TB;                        // GR8 = <sign bit>
-def SETSm    : I<0x98, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "sets\t$dst",
-                 [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <sign bit>
-def SETNSr   : I<0x99, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setns\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
-               TB;                        // GR8 = !<sign bit>
-def SETNSm   : I<0x99, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setns\t$dst",
-                 [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = !<sign bit>
-
-def SETPr    : I<0x9A, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setp\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
-               TB;                        // GR8 = parity
-def SETPm    : I<0x9A, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setp\t$dst",
-                 [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = parity
-def SETNPr   : I<0x9B, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setnp\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
-               TB;                        // GR8 = not parity
-def SETNPm   : I<0x9B, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setnp\t$dst",
-                 [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = not parity
-
-def SETOr    : I<0x90, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "seto\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
-               TB;                        // GR8 = overflow
-def SETOm    : I<0x90, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "seto\t$dst",
-                 [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = overflow
-def SETNOr   : I<0x91, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setno\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
-               TB;                        // GR8 = not overflow
-def SETNOm   : I<0x91, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setno\t$dst",
-                 [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = not overflow
-} // Uses = [EFLAGS]
-
-
-// Integer comparisons
-let Defs = [EFLAGS] in {
-def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
-                 "cmp{b}\t{$src, %al|%al, $src}", []>;
-def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
-                    "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
-                    "cmp{l}\t{$src, %eax|%eax, $src}", []>;
-
-def CMP8rr  : I<0x38, MRMDestReg,
-                (outs), (ins GR8 :$src1, GR8 :$src2),
-                "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
-def CMP16rr : I<0x39, MRMDestReg,
-                (outs), (ins GR16:$src1, GR16:$src2),
-                "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
-def CMP32rr : I<0x39, MRMDestReg,
-                (outs), (ins GR32:$src1, GR32:$src2),
-                "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
-def CMP8mr  : I<0x38, MRMDestMem,
-                (outs), (ins i8mem :$src1, GR8 :$src2),
-                "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
-def CMP16mr : I<0x39, MRMDestMem,
-                (outs), (ins i16mem:$src1, GR16:$src2),
-                "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
-                 OpSize;
-def CMP32mr : I<0x39, MRMDestMem,
-                (outs), (ins i32mem:$src1, GR32:$src2),
-                "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
-def CMP8rm  : I<0x3A, MRMSrcMem,
-                (outs), (ins GR8 :$src1, i8mem :$src2),
-                "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
-def CMP16rm : I<0x3B, MRMSrcMem,
-                (outs), (ins GR16:$src1, i16mem:$src2),
-                "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
-                 OpSize;
-def CMP32rm : I<0x3B, MRMSrcMem,
-                (outs), (ins GR32:$src1, i32mem:$src2),
-                "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
-                    "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
-  def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
-                     "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
-  def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
-                     "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
-}
 
-def CMP8ri  : Ii8<0x80, MRM7r,
-                  (outs), (ins GR8:$src1, i8imm:$src2),
-                  "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                  [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
-def CMP16ri : Ii16<0x81, MRM7r,
-                   (outs), (ins GR16:$src1, i16imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
-def CMP32ri : Ii32<0x81, MRM7r,
-                   (outs), (ins GR32:$src1, i32imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
-def CMP8mi  : Ii8 <0x80, MRM7m,
-                   (outs), (ins i8mem :$src1, i8imm :$src2),
-                   "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
-def CMP16mi : Ii16<0x81, MRM7m,
-                   (outs), (ins i16mem:$src1, i16imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
-                   OpSize;
-def CMP32mi : Ii32<0x81, MRM7m,
-                   (outs), (ins i32mem:$src1, i32imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
-def CMP16ri8 : Ii8<0x83, MRM7r,
-                   (outs), (ins GR16:$src1, i16i8imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
-                    OpSize;
-def CMP16mi8 : Ii8<0x83, MRM7m,
-                   (outs), (ins i16mem:$src1, i16i8imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
-                                         i16immSExt8:$src2))]>, OpSize;
-def CMP32mi8 : Ii8<0x83, MRM7m,
-                   (outs), (ins i32mem:$src1, i32i8imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
-                                         i32immSExt8:$src2))]>;
-def CMP32ri8 : Ii8<0x83, MRM7r,
-                   (outs), (ins GR32:$src1, i32i8imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
 
-// Bit tests.
-// TODO: BTC, BTR, and BTS
 let Defs = [EFLAGS] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3677,6 +916,9 @@
 def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                "bt{l}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
@@ -3684,17 +926,23 @@
 // only for now.
 
 def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
-               "bt{w}\t{$src2, $src1|$src1, $src2}", 
+               "bt{w}\t{$src2, $src1|$src1, $src2}",
 //               [(X86bt (loadi16 addr:$src1), GR16:$src2),
 //                (implicit EFLAGS)]
                []
                >, OpSize, TB, Requires<[FastBTMem]>;
 def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
-               "bt{l}\t{$src2, $src1|$src1, $src2}", 
+               "bt{l}\t{$src2, $src1|$src1, $src2}",
 //               [(X86bt (loadi32 addr:$src1), GR32:$src2),
 //                (implicit EFLAGS)]
                []
                >, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
+//                (implicit EFLAGS)]
+                []
+                >, TB;
 
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3703,6 +951,10 @@
 def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
@@ -3714,307 +966,132 @@
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
                  ]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+                                     i64immSExt8:$src2))]>, TB;
+
 
 def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 
 def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 
 def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // Defs = [EFLAGS]
 
-// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update.  Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
-                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
-def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
-                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
-def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
-                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR16:$src))]>, TB;
-def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
-                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update.  Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;  
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
-                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
-def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
-                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
-def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
-                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR16:$src))]>, TB;
-def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
-                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
-
-// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
-// except that they use GR32_NOREX for the output operand register class
-// instead of GR32. This allows them to operate on h registers on x86-64.
-def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
-                         (outs GR32_NOREX:$dst), (ins GR8:$src),
-                         "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         []>, TB;
-let mayLoad = 1 in
-def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
-                         (outs GR32_NOREX:$dst), (ins i8mem:$src),
-                         "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         []>, TB;
-
-let neverHasSideEffects = 1 in {
-  let Defs = [AX], Uses = [AL] in
-  def CBW : I<0x98, RawFrm, (outs), (ins),
-              "{cbtw|cbw}", []>, OpSize;   // AX = signext(AL)
-  let Defs = [EAX], Uses = [AX] in
-  def CWDE : I<0x98, RawFrm, (outs), (ins),
-              "{cwtl|cwde}", []>;   // EAX = signext(AX)
-
-  let Defs = [AX,DX], Uses = [AX] in
-  def CWD : I<0x99, RawFrm, (outs), (ins),
-              "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
-  let Defs = [EAX,EDX], Uses = [EAX] in
-  def CDQ : I<0x99, RawFrm, (outs), (ins),
-              "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map movr0 to xor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: Set encoding to pseudo.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isCodeGenOnly = 1 in {
-def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
-                 [(set GR8:$dst, 0)]>;
-
-// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
-// encoding and avoids a partial-register update sometimes, but doing so
-// at isel time interferes with rematerialization in the current register
-// allocator. For now, this is rewritten when the instruction is lowered
-// to an MCInst.
-def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
-                 "",
-                 [(set GR16:$dst, 0)]>, OpSize;
-                 
-// FIXME: Set encoding to pseudo.
-def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, 0)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. ESP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
-                  "leal\t$sym, %eax; "
-                  "call\t___tls_get_addr at PLT",
-                  [(X86tlsaddr tls32addr:$sym)]>,
-                  Requires<[In32BitMode]>;
-
-// Darwin TLS Support
-// For i386, the address of the thunk is passed on the stack, on return the 
-// address of the variable is in %eax.  %ecx is trashed during the function 
-// call.  All other registers are preserved.
-let Defs = [EAX, ECX],
-    Uses = [ESP],
-    usesCustomInserter = 1 in
-def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
-                "# TLSCall_32",
-                [(X86TLSCall addr:$sym)]>,
-                Requires<[In32BitMode]>;
-                
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                   "movl\t%gs:$src, $dst",
-                   [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                   "movl\t%fs:$src, $dst",
-                   [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
-                    "ret\t#eh_return, addr: $addr",
-                    [(X86ehret GR32:$addr)]>;
-
-}
 
 //===----------------------------------------------------------------------===//
 // Atomic support
 //
 
-// Memory barriers
-let hasSideEffects = 1 in {
-def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
-                     "#MEMBARRIER",
-                     [(X86MemBarrier)]>, Requires<[HasSSE2]>;
-
-// TODO: Get this to fold the constant into the instruction.           
-let Uses = [ESP] in
-def Int_MemBarrierNoSSE  : I<0x0B, Pseudo, (outs), (ins GR32:$zero),
-                           "lock\n\t"
-                           "or{l}\t{$zero, (%esp)|(%esp), $zero}",
-                           [(X86MemBarrierNoSSE GR32:$zero)]>, LOCK;
-}
 
 // Atomic swap. These are just normal xchg instructions. But since a memory
 // operand is referenced, the atomicity is ensured.
 let Constraints = "$val = $dst" in {
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), 
-                 (ins GR32:$val, i32mem:$ptr),
-               "xchg{l}\t{$val, $ptr|$ptr, $val}", 
-               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), 
-                 (ins GR16:$val, i16mem:$ptr),
-               "xchg{w}\t{$val, $ptr|$ptr, $val}", 
-               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, 
-                OpSize;
 def XCHG8rm  : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
-               "xchg{b}\t{$val, $ptr|$ptr, $val}", 
+               "xchg{b}\t{$val, $ptr|$ptr, $val}",
                [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),
+                 (ins GR16:$val, i16mem:$ptr),
+               "xchg{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+                OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),
+                 (ins GR32:$val, i32mem:$ptr),
+               "xchg{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),
+                  (ins GR64:$val,i64mem:$ptr),
+                  "xchg{q}\t{$val, $ptr|$ptr, $val}",
+                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
 
-def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
-                 "xchg{l}\t{$val, $src|$src, $val}", []>;
-def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
-                 "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
 def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
                 "xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+                 "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+                 "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+                  "xchg{q}\t{$val, $src|$src, $val}", []>;
 }
 
 def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
                   "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
 def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
                   "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+                  "xchg{q}\t{$src, %rax|%rax, $src}", []>;
 
-// Atomic compare and swap.
-let Defs = [EAX, EFLAGS], Uses = [EAX] in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
-               "lock\n\t"
-               "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
-}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
-               "lock\n\t"
-               "cmpxchg8b\t$ptr",
-               [(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
 
-let Defs = [AX, EFLAGS], Uses = [AX] in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
-               "lock\n\t"
-               "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
-}
-let Defs = [AL, EFLAGS], Uses = [AL] in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
-               "lock\n\t"
-               "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
-}
-
-// Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS] in {
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
-               "lock\n\t"
-               "xadd{l}\t{$val, $ptr|$ptr, $val}",
-               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
-                TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
-               "lock\n\t"
-               "xadd{w}\t{$val, $ptr|$ptr, $val}",
-               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
-                TB, OpSize, LOCK;
-def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
-               "lock\n\t"
-               "xadd{b}\t{$val, $ptr|$ptr, $val}",
-               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
-                TB, LOCK;
-}
 
 def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                 "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -4022,6 +1099,8 @@
                  "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
 let mayLoad = 1, mayStore = 1 in {
 def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4030,6 +1109,9 @@
                  "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def XADD32rm  : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
 }
 
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
@@ -4038,6 +1120,8 @@
                     "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
 let mayLoad = 1, mayStore = 1 in {
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4046,281 +1130,20 @@
                      "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
 def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
                   "cmpxchg8b\t$dst", []>, TB;
 
-// Optimized codegen when the non-memory output is not used.
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                    "lock\n\t"
-                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "lock\n\t"
-                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "lock\n\t"
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
-                    "lock\n\t"
-                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                    "lock\n\t"
-                     "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                    "lock\n\t"
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                    "lock\n\t"
-                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                    "lock\n\t"
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
-                    "lock\n\t"
-                    "inc{b}\t$dst", []>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
-                    "lock\n\t"
-                    "inc{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
-                    "lock\n\t"
-                    "inc{l}\t$dst", []>, LOCK;
-
-def LOCK_SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
-                    "lock\n\t"
-                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "lock\n\t"
-                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                    "lock\n\t"
-                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
-                    "lock\n\t"
-                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                    "lock\n\t"
-                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                    "lock\n\t"
-                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                    "lock\n\t"
-                     "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                    "lock\n\t"
-                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
-                    "lock\n\t"
-                    "dec{b}\t$dst", []>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
-                    "lock\n\t"
-                    "dec{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
-                    "lock\n\t"
-                    "dec{l}\t$dst", []>, LOCK;
-}
-
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomInserter = 1 in {
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMAND32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMOR32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMXOR32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMNAND32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
-               "#ATOMMIN32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMMAX32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMUMIN32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMUMAX32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMAND16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMOR16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMXOR16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMNAND16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
-               "#ATOMMIN16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMMAX16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMUMIN16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMUMAX16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMAND8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMOR8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMXOR8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMNAND8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-}
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+                    "cmpxchg16b\t$dst", []>, TB;
 
-let Constraints = "$val1 = $dst1, $val2 = $dst2", 
-                  Defs = [EFLAGS, EAX, EBX, ECX, EDX],
-                  Uses = [EAX, EBX, ECX, EDX],
-                  mayLoad = 1, mayStore = 1,
-                  usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMSWAP6432 PSEUDO!", []>;
-}
 
-// Segmentation support instructions.
 
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
-                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), 
-                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; 
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; 
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-                
-def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
-             "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
-             "str{w}\t{$dst}", []>, TB;
-def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
-             "ltr{w}\t{$src}", []>, TB;
-def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
-             "ltr{w}\t{$src}", []>, TB;
-             
-def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
-                 "push{w}\t%fs", []>, OpSize, TB;
-def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
-                 "push{l}\t%fs", []>, TB;
-def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
-                 "push{w}\t%gs", []>, OpSize, TB;
-def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
-                 "push{l}\t%gs", []>, TB;
-
-def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
-                "pop{w}\t%fs", []>, OpSize, TB;
-def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
-                "pop{l}\t%fs", []>, TB;
-def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
-                "pop{w}\t%gs", []>, OpSize, TB;
-def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
-                "pop{l}\t%gs", []>, TB;
-
-def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lds{l}\t{$src, $dst|$dst, $src}", []>;
-def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "les{l}\t{$src, $dst|$dst, $src}", []>;
-def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
-              "verr\t$seg", []>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
-              "verr\t$seg", []>, TB;
-def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
-              "verw\t$seg", []>, TB;
-def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
-              "verw\t$seg", []>, TB;
-
-// Descriptor-table support instructions
-
-def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
-              "sgdt\t$dst", []>, TB;
-def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
-              "sidt\t$dst", []>, TB;
-def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
-                "sldt{w}\t$dst", []>, TB;
-def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
-                "sldt{w}\t$dst", []>, TB;
-def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
-              "lgdt\t$src", []>, TB;
-def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
-              "lidt\t$src", []>, TB;
-def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
-                "lldt{w}\t$src", []>, TB;
-def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
-                "lldt{w}\t$src", []>, TB;
-                
 // Lock instruction prefix
 def LOCK_PREFIX : I<0xF0, RawFrm, (outs),  (ins), "lock", []>;
 
@@ -4333,35 +1156,19 @@
 def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
 }
 
-// Segment override instruction prefixes
-def CS_PREFIX : I<0x2E, RawFrm, (outs),  (ins), "cs", []>;
-def SS_PREFIX : I<0x36, RawFrm, (outs),  (ins), "ss", []>;
-def DS_PREFIX : I<0x3E, RawFrm, (outs),  (ins), "ds", []>;
-def ES_PREFIX : I<0x26, RawFrm, (outs),  (ins), "es", []>;
-def FS_PREFIX : I<0x64, RawFrm, (outs),  (ins), "fs", []>;
-def GS_PREFIX : I<0x65, RawFrm, (outs),  (ins), "gs", []>;
 
 // String manipulation instructions
-
 def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
 def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
 def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
 
 def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
 def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
 def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
 
-// CPU flow control instructions
-
-def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
-def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
-
-// FPU control instructions
-
-def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
 
 // Flag instructions
-
 def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
 def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
 def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
@@ -4373,620 +1180,77 @@
 def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
 
 // Table lookup instructions
-
 def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
 
-// Specialized register support
-
-def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
-def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
-
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), 
-                "smsw{w}\t$dst", []>, OpSize, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), 
-                "smsw{l}\t$dst", []>, TB;
-// For memory operands, there is only a 16-bit form
-def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
-                "smsw{w}\t$dst", []>, TB;
-
-def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
-                "lmsw{w}\t$src", []>, TB;
-def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
-                "lmsw{w}\t$src", []>, TB;
-                
-def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
-
-// Cache instructions
-
-def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
-
-// VMX instructions
-
-// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
-// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
-// 0F 01 C1
-def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
-def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
-  "vmclear\t$vmcs", []>, OpSize, TB;
-// 0F 01 C2
-def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
-// 0F 01 C3
-def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
-def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
-  "vmptrld\t$vmcs", []>, TB;
-def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
-  "vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
-  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
-  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
-  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-// 0F 01 C4
-def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
-def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
-  "vmxon\t{$vmxon}", []>, XS;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
-def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
-def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
-def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
-
-def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
-          (ADD32ri GR32:$src1, tconstpool:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
-          (ADD32ri GR32:$src1, tjumptable:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
-          (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
-          (ADD32ri GR32:$src1, texternalsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
-          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
-
-def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
-          (MOV32mi addr:$dst, tglobaladdr:$src)>;
-def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
-          (MOV32mi addr:$dst, texternalsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
-          (MOV32mi addr:$dst, tblockaddress:$src)>;
-
-// Calls
-// tailcall stuff
-def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
-          (TCRETURNri GR32_TC:$dst, imm:$off)>,
-	  Requires<[In32BitMode]>;
-
-// FIXME: This is disabled for 32-bit PIC mode because the global base
-// register which is part of the address mode may be assigned a 
-// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
-          (TCRETURNmi addr:$dst, imm:$off)>,
-	  Requires<[In32BitMode, IsNotPIC]>;
-
-def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>,
-	  Requires<[In32BitMode]>;
-
-def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>,
-	  Requires<[In32BitMode]>;
-
-// Normal calls, with various flavors of addresses.
-def : Pat<(X86call (i32 tglobaladdr:$dst)),
-          (CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86call (i32 texternalsym:$dst)),
-          (CALLpcrel32 texternalsym:$dst)>;
-def : Pat<(X86call (i32 imm:$dst)),
-          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR32:$src1, GR32:$src2),
-          (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(addc GR32:$src1, (load addr:$src2)),
-          (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(addc GR32:$src1, imm:$src2),
-          (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
-          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(subc GR32:$src1, GR32:$src2),
-          (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(subc GR32:$src1, (load addr:$src2)),
-          (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(subc GR32:$src1, imm:$src2),
-          (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
-          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR8:$src1, 0),
-          (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(X86cmp GR16:$src1, 0),
-          (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86cmp GR32:$src1, 0),
-          (TEST32rr GR32:$src1, GR32:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
-          (CMOVAE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
-          (CMOVAE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
-          (CMOVB16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
-          (CMOVB32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
-          (CMOVNE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
-          (CMOVNE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
-          (CMOVE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
-          (CMOVE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
-          (CMOVA16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
-          (CMOVA32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
-          (CMOVBE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
-          (CMOVBE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
-          (CMOVGE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
-          (CMOVGE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
-          (CMOVL16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
-          (CMOVL32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
-          (CMOVG16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
-          (CMOVG32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
-          (CMOVLE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
-          (CMOVLE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
-          (CMOVNP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
-          (CMOVNP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
-          (CMOVP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
-          (CMOVP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
-          (CMOVNS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
-          (CMOVNS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
-          (CMOVS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
-          (CMOVS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
-          (CMOVNO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
-          (CMOVNO32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
-          (CMOVO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
-          (CMOVO32rm GR32:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-
-// extload bool -> extload byte
-def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
-def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
-def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
-def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
-
-// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
-def : Pat<(i32 (anyext GR16:$src)),
-          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
-
-
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+                 "aad\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+                 "aam\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+                   Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>,
+                   Requires<[In32BitMode]>;
 
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR16:$src1, 128),
-          (SUB16ri8 GR16:$src1, -128)>;
-def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
-          (SUB16mi8 addr:$dst, -128)>;
-def : Pat<(add GR32:$src1, 128),
-          (SUB32ri8 GR32:$src1, -128)>;
-def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
-          (SUB32mi8 addr:$dst, -128)>;
-
-// r & (2^16-1) ==> movz
-def : Pat<(and GR32:$src1, 0xffff),
-          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, 
-                                                             GR32_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
-          (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, 
-                                                             GR16_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR32:$src, i16),
-          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                             GR32_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
-                                                             GR16_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-
-// trunc patterns
-def : Pat<(i16 (trunc GR32:$src)),
-          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                          sub_8bit)>,
-      Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
-          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                          sub_8bit)>,
-      Requires<[In32BitMode]>;
-
-// h-register tricks
-def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                          sub_8bit_hi)>,
-      Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                          sub_8bit_hi)>,
-      Requires<[In32BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
-          (EXTRACT_SUBREG
-            (MOVZX32rr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_16bit)>,
-      Requires<[In32BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
-                                                             GR16_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
-                                                             GR16_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                             GR32_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                             GR32_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-
-// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
-          (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
-          (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
-          (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHL32mCL addr:$dst)>;
-
-def : Pat<(srl GR8:$src1, (and CL, 31)),
-          (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
-          (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
-          (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHR32mCL addr:$dst)>;
-
-def : Pat<(sra GR8:$src1, (and CL, 31)),
-          (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
-          (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
-          (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
-          (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
-          (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
-          (SAR32mCL addr:$dst)>;
-
-// (anyext (setcc_carry)) -> (setcc_carry)
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C32r)>;
-def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C32r)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR16:$src1, imm:$src2),
-          (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR32:$src1, imm:$src2),
-          (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
-          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
-          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(or_is_add GR16:$src1, GR16:$src2),
-          (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or_is_add GR32:$src1, GR32:$src2),
-          (ADD32rr GR32:$src1, GR32:$src2)>;
-} // AddedComplexity
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
 
 //===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
+// Subsystems.
 //===----------------------------------------------------------------------===//
 
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
-          (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
-          (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
-          (ADD32rm GR32:$src1, addr:$src2)>;
-
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri  GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR16:$src1, i16immSExt8:$src2),
-          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(add GR32:$src1, i32immSExt8:$src2),
-          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
-          (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
-          (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
-          (SUB32rm GR32:$src1, addr:$src2)>;
-
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
-          (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
-          (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
-          (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
-          (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
-          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
-          (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
-          (IMUL32rr GR32:$src1, GR32:$src2)>;
-
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
-          (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
-          (IMUL32rm GR32:$src1, addr:$src2)>;
-
-// mul reg, imm
-def : Pat<(mul GR16:$src1, imm:$src2),
-          (IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(mul GR32:$src1, imm:$src2),
-          (IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
-          (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
-          (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// reg = mul mem, imm
-def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
-          (IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
-          (IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
-          (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
-          (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
-
-// Patterns for nodes that do not produce flags, for instructions that do.
-
-// Increment reg.
-def : Pat<(add GR8:$src1 ,  1), (INC8r  GR8:$src1)>;
-def : Pat<(add GR16:$src1,  1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1,  1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// Decrement reg.
-def : Pat<(add GR8:$src1 , -1), (DEC8r  GR8:$src1)>;
-def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
-          (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
-          (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
-          (OR32rm GR32:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri  GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, i16immSExt8:$src2),
-          (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or GR32:$src1, i32immSExt8:$src2),
-          (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
-          (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
-          (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
-          (XOR32rm GR32:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
-          (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
-          (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
-          (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
-          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
-          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
-          (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
-          (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
-          (AND32rm GR32:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
-          (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
-          (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
-          (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, i16immSExt8:$src2),
-          (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(and GR32:$src1, i32immSExt8:$src2),
-          (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// Floating Point Stack Support
-//===----------------------------------------------------------------------===//
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
 
+// X87 Floating Point Stack.
 include "X86InstrFPStack.td"
 
-//===----------------------------------------------------------------------===//
-// X86-64 Support
-//===----------------------------------------------------------------------===//
-
-include "X86Instr64bit.td"
-
-//===----------------------------------------------------------------------===//
 // SIMD support (SSE, MMX and AVX)
-//===----------------------------------------------------------------------===//
-
 include "X86InstrFragmentsSIMD.td"
 
-//===----------------------------------------------------------------------===//
 // FMA - Fused Multiply-Add support (requires FMA)
-//===----------------------------------------------------------------------===//
-
 include "X86InstrFMA.td"
 
-//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
-//===----------------------------------------------------------------------===//
-
+// SSE, MMX and 3DNow! vector support.
 include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
 
-//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
-//===----------------------------------------------------------------------===//
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
 
-include "X86InstrMMX.td"

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,6 +11,9 @@
 // and properties of the instructions which are needed for code generation,
 // machine code emission, and analysis.
 //
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -18,58 +21,23 @@
 //===----------------------------------------------------------------------===//
 
 let Constraints = "$src1 = $dst" in {
-  // MMXI_binop_rm - Simple MMX binary operator.
-  multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           ValueType OpVT, bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
-                  (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
-      let isCommutable = Commutable;
-    }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
-                  (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
-                                         (bitconvert
-                                          (load_mmx addr:$src2)))))]>;
-  }
-
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
   multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                                bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+    def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
                  (ins VR64:$src1, VR64:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
       let isCommutable = Commutable;
     }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+    def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
                  (ins VR64:$src1, i64mem:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))]>;
   }
 
-  // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
-  //
-  // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
-  // to collapse (bitconvert VT to VT) into its operand.
-  //
-  multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                 bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
-                                  (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
-      let isCommutable = Commutable;
-    }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
-                                  (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst,
-                    (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
-  }
-
   multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                                 string OpcodeStr, Intrinsic IntId,
                                 Intrinsic IntId2> {
@@ -89,14 +57,75 @@
   }
 }
 
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId64> {
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst,
+                     (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                             Intrinsic IntId64> {
+  let isCommutable = 0 in
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+       (ins VR64:$src1, VR64:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+       (ins VR64:$src1, i64mem:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+  def R64irr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 
+      [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+  def R64irm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1,
+                       (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+                   (ins DstRC:$src1, x86memop:$src2), asm,
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
 //===----------------------------------------------------------------------===//
-// MMX EMMS & FEMMS Instructions
+// MMX EMMS Instruction
 //===----------------------------------------------------------------------===//
 
 def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",
                      [(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
-                     [(int_x86_mmx_femms)]>;
 
 //===----------------------------------------------------------------------===//
 // MMX Scalar Instructions
@@ -106,12 +135,12 @@
 def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, 
-                         (v2i32 (scalar_to_vector GR32:$src)))]>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+                         (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
               [(set VR64:$dst,
-               (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+               (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
 let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
@@ -123,42 +152,41 @@
                              "movd\t{$src, $dst|$dst, $src}",
                              []>;
 
-let neverHasSideEffects = 1 in
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
-                               "movd\t{$src, $dst|$dst, $src}", []>;
+                               "movd\t{$src, $dst|$dst, $src}", 
+                             [(set GR64:$dst,
+                              (bitconvert VR64:$src))]>;
 def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movd\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
-                              (v1i64 (scalar_to_vector GR64:$src)))]>;
-
+                              (bitconvert GR64:$src))]>;
 let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1 in
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, (load_mmx addr:$src))]>;
 def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
-                        [(store (v1i64 VR64:$src), addr:$dst)]>;
+                        [(store (x86mmx VR64:$src), addr:$dst)]>;
 
 def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                           "movdq2q\t{$src, $dst|$dst, $src}",
                           [(set VR64:$dst,
-                            (v1i64 (bitconvert
+                            (x86mmx (bitconvert
                             (i64 (vector_extract (v2i64 VR128:$src),
                                   (iPTR 0))))))]>;
 
 def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
                            "movq2dq\t{$src, $dst|$dst, $src}",
           [(set VR128:$dst,
-            (movl immAllZerosV,
-                  (v2i64 (scalar_to_vector
-                              (i64 (bitconvert (v1i64 VR64:$src)))))))]>;
+            (v2i64 (scalar_to_vector
+                              (i64 (bitconvert (x86mmx VR64:$src))))))]>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
@@ -176,34 +204,40 @@
 def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                              "movd\t{$src, $dst|$dst, $src}",
               [(set VR64:$dst,
-                    (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+                    (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
 let AddedComplexity = 20 in
 def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
                            (ins i32mem:$src),
                              "movd\t{$src, $dst|$dst, $src}",
           [(set VR64:$dst,
-                (v2i32 (X86vzmovl (v2i32
+                (x86mmx (X86vzmovl (x86mmx
                                    (scalar_to_vector (loadi32 addr:$src))))))]>;
 
 // Arithmetic Instructions
-
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
 // -- Addition
-defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8,  1>;
-defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
-defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
-defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
-
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
 defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
 defm MMX_PADDSW  : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
 
 defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
 defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
 
+defm MMX_PHADDW  : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD   : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
 // -- Subtraction
-defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
-defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
-defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
-defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
 
 defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
 defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
@@ -211,16 +245,25 @@
 defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
 defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
 
+defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD  : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
 // -- Multiplication
-defm MMX_PMULLW  : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
 
 defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,  1>;
 defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
 defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw>;
 
 // -- Miscellanea
 defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
 
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+                                     int_x86_ssse3_pmadd_ub_sw>;
 defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
 defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
 
@@ -232,23 +275,17 @@
 
 defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
 
-// Logical Instructions
-defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
-defm MMX_POR  : MMXI_binop_rm_v1i64<0xEB, "por" , or,  1>;
-defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW :  SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND :  SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+  defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
 
-let Constraints = "$src1 = $dst" in {
-  def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
-                         (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                         "pandn\t{$src2, $dst|$dst, $src2}",
-                         [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
-                                                  VR64:$src2)))]>;
-  def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
-                         (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                         "pandn\t{$src2, $dst|$dst, $src2}",
-                         [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
-                                                  (load addr:$src2))))]>;
-}
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR  : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,  1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
 
 // Shift Instructions
 defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
@@ -270,12 +307,6 @@
 defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
                                     int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
 
-// Shift up / down and insert zero's.
-def : Pat<(v1i64 (X86vshl     VR64:$src, (i8 imm:$amt))),
-          (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-def : Pat<(v1i64 (X86vshr     VR64:$src, (i8 imm:$amt))),
-          (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-
 // Comparison Instructions
 defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
 defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
@@ -285,84 +316,19 @@
 defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
 defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
 
-// Conversion Instructions
-
 // -- Unpack Instructions
-let Constraints = "$src1 = $dst" in {
-  // Unpack High Packed Data Instructions
-  def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckh VR64:$src1,
-                                      (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckh VR64:$src1,
-                                       (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhdq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhdq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckh VR64:$src1,
-                                       (bc_v2i32 (load_mmx addr:$src2)))))]>;
-
-  // Unpack Low Packed Data Instructions
-  def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpcklbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpcklbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckl VR64:$src1,
-                                      (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpcklwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpcklwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckl VR64:$src1,
-                                       (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckldq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckldq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckl VR64:$src1,
-                                       (bc_v2i32 (load_mmx addr:$src2)))))]>;
-}
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", 
+                                       int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", 
+                                       int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", 
+                                       int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", 
+                                       int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", 
+                                       int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+                                       int_x86_mmx_punpckldq>;
 
 // -- Pack Instructions
 defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
@@ -370,93 +336,80 @@
 defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
 
 // -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
 def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
                           (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
-                            (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+                             (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
 def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
                           (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
-                            (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
-                                              (undef)))]>;
+                             (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+                                                   imm:$src2))]>;
 
-// -- Conversion Instructions
-let neverHasSideEffects = 1 in {
-def MMX_CVTPD2PIrr  : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                            "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPD2PIrm  : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
-                            (ins f128mem:$src),
-                            "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PDrr  : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                            "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PDrm  : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
-                            (ins i64mem:$src),
-                            "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PSrr  : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                           "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PSrm  : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
-                           (ins i64mem:$src),
-                           "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPS2PIrr  : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                           "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPS2PIrm  : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                           "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                            "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
-                            (ins f128mem:$src),
-                            "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                           "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                           "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-} // end neverHasSideEffects
 
 
-// Extract / Insert
-def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
-                    SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
-                                         SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
 
 
-def MMX_PEXTRWri  : MMXIi8<0xC5, MRMSrcReg,
-                           (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+  defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+                           (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
                            "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                           [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
+                           [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
                                              (iPTR imm:$src2)))]>;
 let Constraints = "$src1 = $dst" in {
-  def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+  def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst), 
-                      (ins VR64:$src1, GR32:$src2,i16i8imm:$src3),
+                      (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
-                                               GR32:$src2,(iPTR imm:$src3))))]>;
-  def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                        GR32:$src2, (iPTR imm:$src3)))]>;
+
+  def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
                      (outs VR64:$dst),
-                     (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+                     (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set VR64:$dst,
-                       (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
-                               (i32 (anyext (loadi16 addr:$src2))),
-                               (iPTR imm:$src3))))]>;
+                     [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                         (i32 (anyext (loadi16 addr:$src2))),
+                                       (iPTR imm:$src3)))]>;
 }
 
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
+                          [(set GR32:$dst, 
+                                (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
 // MMX to XMM for vector types
 def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
-                            [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+                            [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
 
 def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
           (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
@@ -464,14 +417,19 @@
 def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
           (v2i64 (MOVQI2PQIrm addr:$src))>;
 
-def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
-                            (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+def : Pat<(v2i64 (MMX_X86movq2dq 
+                    (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
           (v2i64 (MOVDI2PDIrm addr:$src))>;
 
-// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
-                          "pmovmskb\t{$src, $dst|$dst, $src}",
-                          [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+          (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+          (x86mmx (MMX_MOVQ64rm addr:$src))>;
 
 // Misc.
 let Uses = [EDI] in
@@ -483,181 +441,14 @@
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
 
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
-  // FIXME: Change encoding to pseudo.
-  def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "",
-                              [(set VR64:$dst, (v2i32 immAllZerosV))]>;
-  def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "",
-                              [(set VR64:$dst, (v2i32 immAllOnesV))]>;
-}
-
-let Predicates = [HasMMX] in {
-  def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
-  def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
-  def : Pat<(v8i8  immAllZerosV), (MMX_V_SET0)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Store 64-bit integer vector values.
-def : Pat<(store (v8i8  VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v4i16 VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2i32 VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v1i64 VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-
-// Bit convert.
-def : Pat<(v8i8  (bitconvert (v1i64 VR64:$src))), (v8i8  VR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v2i32 VR64:$src))), (v8i8  VR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v4i16 VR64:$src))), (v8i8  VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8  VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8  VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8  VR64:$src))), (v1i64 VR64:$src)>;
-
 // 64-bit bit convert.
-def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v8i8  (bitconvert (i64 GR64:$src))),
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
           (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64  (bitconvert (v8i8 VR64:$src))),
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
           (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
-          (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
-          (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
-          (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
-          (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
-          (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
-          (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
           (MMX_MOVFR642Qrr FR64:$src)>;
 
-let AddedComplexity = 20 in {
-  def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>;
-}
 
-// Clear top half.
-let AddedComplexity = 15 in {
-  def : Pat<(v2i32 (X86vzmovl VR64:$src)),
-            (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>;
-}
-
-// Patterns to perform canonical versions of vector shuffling.
-let AddedComplexity = 10 in {
-  def : Pat<(v8i8  (mmx_unpckl_undef VR64:$src, (undef))),
-            (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
-  def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
-            (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
-  def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
-            (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
-}
-
-let AddedComplexity = 10 in {
-  def : Pat<(v8i8  (mmx_unpckh_undef VR64:$src, (undef))),
-            (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
-  def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
-            (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
-  def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
-            (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
-}
-
-// Some special case PANDN patterns.
-// FIXME: Get rid of these.
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
-                  VR64:$src2)),
-          (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
-                  (load addr:$src2))),
-          (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-
-// Move MMX to lower 64-bit of XMM
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-// Move lower 64-bit of XMM to MMX.
-def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
-                                                  (iPTR 0))))),
-          (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
-                                                  (iPTR 0))))),
-          (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
-                                                  (iPTR 0))))),
-          (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
-
-// Patterns for vector comparisons
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
-          (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
-          (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
-          (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
-
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
-          (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
-          (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
-          (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
-
-// CMOV* - Used to implement the SELECT DAG operation.  Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
-  def CMOV_V1I64 : I<0, Pseudo,
-                    (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
-                    "#CMOV_V1I64 PSEUDO!",
-                    [(set VR64:$dst,
-                      (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
-                                          EFLAGS)))]>;
-}

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td Tue Oct 26 19:48:03 2010
@@ -15,43 +15,6 @@
 
 
 //===----------------------------------------------------------------------===//
-// SSE scalar FP Instructions
-//===----------------------------------------------------------------------===//
-
-// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
-  def CMOV_FR32 : I<0, Pseudo,
-                    (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
-                    "#CMOV_FR32 PSEUDO!",
-                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
-                                                  EFLAGS))]>;
-  def CMOV_FR64 : I<0, Pseudo,
-                    (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
-                    "#CMOV_FR64 PSEUDO!",
-                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
-                                                  EFLAGS))]>;
-  def CMOV_V4F32 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V4F32 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V2F64 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V2F64 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V2I64 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V2I64 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-}
-
-//===----------------------------------------------------------------------===//
 // SSE 1 & 2 Instructions Classes
 //===----------------------------------------------------------------------===//
 
@@ -82,17 +45,15 @@
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
-                       !strconcat(SSEVer, !strconcat("_",
-                       !strconcat(OpcodeStr, FPSizeStr))))
+       [(set RC:$dst, (!cast<Intrinsic>(
+                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
              RC:$src1, RC:$src2))]>;
   def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
-                       !strconcat(SSEVer, !strconcat("_",
-                       !strconcat(OpcodeStr, FPSizeStr))))
+       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+                                          SSEVer, "_", OpcodeStr, FPSizeStr))
              RC:$src1, mem_cpat:$src2))]>;
 }
 
@@ -142,17 +103,15 @@
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-           [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
-                           !strconcat(SSEVer, !strconcat("_",
-                           !strconcat(OpcodeStr, FPSizeStr))))
+           [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
                  RC:$src1, RC:$src2))], d>;
   def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
-                       !strconcat(SSEVer, !strconcat("_",
-                       !strconcat(OpcodeStr, FPSizeStr))))
+       [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
              RC:$src1, (mem_frag addr:$src2)))], d>;
 }
 
@@ -340,6 +299,15 @@
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
 }
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+          (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+          (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
 def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -394,7 +362,7 @@
                                  string asm_opr> {
   def PSrm : PI<opc, MRMSrcMem,
          (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-         !strconcat(!strconcat(base_opc,"s"), asm_opr),
+         !strconcat(base_opc, "s", asm_opr),
      [(set RC:$dst,
        (mov_frag RC:$src1,
               (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
@@ -402,7 +370,7 @@
 
   def PDrm : PI<opc, MRMSrcMem,
          (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
-         !strconcat(!strconcat(base_opc,"d"), asm_opr),
+         !strconcat(base_opc, "d", asm_opr),
      [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
                               (scalar_to_vector (loadf64 addr:$src2)))))],
               SSEPackedDouble>, TB, OpSize;
@@ -589,14 +557,6 @@
 
 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
 // and/or XMM operand(s).
-multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm, Domain d> {
-  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
-  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
-}
 
 multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
@@ -609,16 +569,6 @@
               [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
 }
 
-multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
-                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
-                    PatFrag ld_frag, string asm, Domain d> {
-  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
-              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
-  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
-                   (ins DstRC:$src1, x86memop:$src2), asm,
-              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
-}
-
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
                     RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
                     PatFrag ld_frag, string asm, bit Is2Addr = 1> {
@@ -660,13 +610,11 @@
                       f32mem, load, "cvtss2si">, XS;
 defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
                       f32mem, load, "cvtss2si{q}">, XS, REX_W;
-defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                      f128mem, load, "cvtsd2si">, XD;
-defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
-                        f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+                  f128mem, load, "cvtsd2si{q}">, XD, REX_W;
 
-defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
-                        REX_W;
 
 let isAsmParserOnly = 1 in {
   defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -696,29 +644,6 @@
                         "cvtsi2sd">, XD, REX_W;
 }
 
-// Instructions below don't have an AVX form.
-defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
-                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
-                      SSEPackedSingle>, TB;
-defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
-                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
-                      SSEPackedDouble>, TB, OpSize;
-defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
-                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
-                       SSEPackedSingle>, TB;
-defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
-                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
-                       SSEPackedDouble>, TB, OpSize;
-defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
-                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
-                         SSEPackedDouble>, TB, OpSize;
-let Constraints = "$src1 = $dst" in {
-  defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
-                         int_x86_sse_cvtpi2ps,
-                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
-                         SSEPackedSingle>, TB;
-}
-
 /// SSE 1 Only
 
 // Aliases for intrinsics
@@ -751,10 +676,10 @@
 defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
                                "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
                                VEX_W;
-defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
+defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
                                "cvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle>, TB, VEX;
-defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
+defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
                                "cvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle>, TB, VEX;
 }
@@ -763,7 +688,7 @@
                           "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
 defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
                           "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
                             "cvtdq2ps\t{$src, $dst|$dst, $src}",
                             SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
 }
@@ -894,6 +819,7 @@
                                         (bitconvert (memopv2i64 addr:$src))))]>,
                      XS, Requires<[HasSSE2]>;
 
+
 // Convert packed single/double fp to doubleword
 let isAsmParserOnly = 1 in {
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -963,9 +889,13 @@
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq VR128:$src))]>;
 def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
 
 
 let isAsmParserOnly = 1 in {
@@ -980,16 +910,6 @@
                                            (memop addr:$src)))]>,
                       XS, VEX, Requires<[HasAVX]>;
 }
-def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                              (int_x86_sse2_cvttps2dq VR128:$src))]>,
-                      XS, Requires<[HasSSE2]>;
-def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
-                                           (memop addr:$src)))]>,
-                      XS, Requires<[HasSSE2]>;
 
 let isAsmParserOnly = 1 in {
 def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
@@ -1003,13 +923,13 @@
                           [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                              (memop addr:$src)))]>, VEX;
 }
-def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
-def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
-                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                             (memop addr:$src)))]>;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                        (memop addr:$src)))]>;
 
 let isAsmParserOnly = 1 in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -1117,6 +1037,39 @@
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
                                             (memop addr:$src)))]>;
 
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+          (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+          (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+          (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+          (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+          (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+          (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+          (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+          (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+          (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+          (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+          (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+          (VCVTTPS2DQYrm addr:$src)>;
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Compare Instructions
 //===----------------------------------------------------------------------===//
@@ -1281,16 +1234,14 @@
                  "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
                  "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
                  SSEPackedDouble>, OpSize, VEX_4V;
-  let Pattern = []<dag> in {
-    defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
-                   "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
-                   "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
-                   SSEPackedSingle>, VEX_4V;
-    defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
-                   "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
-                   "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
-                   SSEPackedDouble>, OpSize, VEX_4V;
-  }
+  defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+                 "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedSingle>, VEX_4V;
+  defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedDouble>, OpSize, VEX_4V;
 }
 let Constraints = "$src1 = $dst" in {
   defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -1428,9 +1379,11 @@
 /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
 multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
                                 Domain d> {
-  def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
-              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+  def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
                      [(set GR32:$dst, (Int RC:$src))], d>;
+  def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
 }
 
 // Mask creation
@@ -1445,20 +1398,18 @@
   defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
                                         "movmskpd", SSEPackedDouble>, OpSize,
                                         VEX;
+  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+                                        "movmskps", SSEPackedSingle>, VEX;
+  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+                                        "movmskpd", SSEPackedDouble>, OpSize,
+                                        VEX;
 
-  // FIXME: merge with multiclass above when the intrinsics come.
+  // Assembler Only
   def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
   def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
              "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
              VEX;
-
-  def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
-             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-  def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
-             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
-             VEX;
-
   def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
   def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
@@ -1643,6 +1594,9 @@
 ///
 /// These three forms can each be reg+reg or reg+mem.
 ///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
 multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                   bit Is2Addr = 1> {
   defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -1682,14 +1636,24 @@
 multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
                                       bit Is2Addr = 1> {
   defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
-     !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+     !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
                                               SSEPackedSingle, Is2Addr>, TB;
 
   defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
-     !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+     !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
                                       SSEPackedDouble, Is2Addr>, TB, OpSize;
 }
 
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+  defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+     !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+      SSEPackedSingle, 0>, TB;
+
+  defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+     !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+      SSEPackedDouble, 0>, TB, OpSize;
+}
+
 // Binary Arithmetic instructions
 let isAsmParserOnly = 1 in {
   defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
@@ -1714,11 +1678,13 @@
                 basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
                 basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
                 basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
-                basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+                basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+                basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
     defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
                 basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
                 basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
                 basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+                basic_sse12_fp_binop_p_y_int<0x5D, "min">,
                 basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
   }
 }
@@ -1830,6 +1796,16 @@
                     [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
 }
 
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic V4F32Int> {
+  def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+  def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
 
 /// sse2_fp_unop_s - SSE2 unops in scalar form.
 multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -1900,6 +1876,17 @@
                     [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
 }
 
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic V2F64Int> {
+  def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+  def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
   // Square root.
   defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
@@ -1910,8 +1897,10 @@
                 sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
                 sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
                 sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
-                sse1_fp_unop_p_int<0x51, "vsqrt",  int_x86_sse_sqrt_ps>,
+                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
                 sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
                 VEX;
 
   // Reciprocal approximations. Note that these typically require refinement
@@ -1920,12 +1909,14 @@
                                    int_x86_sse_rsqrt_ss>, VEX_4V;
   defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
                 sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
                 sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
 
   defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
                                    VEX_4V;
   defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
                 sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
                 sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
 }
 
@@ -2015,6 +2006,13 @@
   }
 }
 
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+          (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+          (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+          (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
 def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                     "movntps\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
@@ -2083,7 +2081,9 @@
 // Alias instructions that map zero vector to pxor / xorp* for sse.
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
     isCodeGenOnly = 1 in {
 def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
@@ -2095,6 +2095,26 @@
                  [(set VR128:$dst, (v4i32 immAllZerosV))]>;
 }
 
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS  : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD  : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
 def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
 def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
 def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
@@ -2214,6 +2234,10 @@
 
 } // ExeDomain = SSEPackedInt
 
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+          (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Arithmetic Instructions
 //===---------------------------------------------------------------------===//
@@ -2897,6 +2921,13 @@
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
 
 
 // Move Int Doubleword to Single Scalar
@@ -2939,6 +2970,21 @@
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)]>;
 
+def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                         "mov{d|q}\t{$src, $dst|$dst, $src}",
+                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+                                           (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+                       "movq\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
 // Move Scalar Single to Double Int
 let isAsmParserOnly = 1 in {
 def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
@@ -3181,13 +3227,13 @@
 // Convert Packed DW Integers to Packed Double FP
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
 def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTDQ2PDYrm  : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 
 def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3195,6 +3241,17 @@
 def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
 
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+           (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+           (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+          (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+          (VCVTPD2DQYrm addr:$src)>;
+
 //===---------------------------------------------------------------------===//
 // SSE3 - Move Instructions
 //===---------------------------------------------------------------------===//
@@ -3260,10 +3317,11 @@
 // Move Unaligned Integer
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
   def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                     "vlddqu\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+                   "vlddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
   def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
-                     "vlddqu\t{$src, $dst|$dst, $src}", []>, VEX;
+                   "vlddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
 }
 def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "lddqu\t{$src, $dst|$dst, $src}",
@@ -3327,12 +3385,10 @@
                                f128mem, 0>, XD, VEX_4V;
   defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
                                f128mem, 0>, OpSize, VEX_4V;
-  let Pattern = []<dag> in {
-  defm VADDSUBPSY : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR256,
+  defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
                                f256mem, 0>, XD, VEX_4V;
-  defm VADDSUBPDY : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR256,
+  defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
                                f256mem, 0>, OpSize, VEX_4V;
-  }
 }
 let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
     ExeDomain = SSEPackedDouble in {
@@ -3377,24 +3433,22 @@
 }
 
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
-  defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
-                         int_x86_sse3_hadd_ps, 0>, VEX_4V;
-  defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
-                         int_x86_sse3_hadd_pd, 0>, VEX_4V;
-  defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
-                         int_x86_sse3_hsub_ps, 0>, VEX_4V;
-  defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
-                         int_x86_sse3_hsub_pd, 0>, VEX_4V;
-  let Pattern = []<dag> in {
-  defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+  defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
                           int_x86_sse3_hadd_ps, 0>, VEX_4V;
-  defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+  defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
                           int_x86_sse3_hadd_pd, 0>, VEX_4V;
-  defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+  defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
                           int_x86_sse3_hsub_ps, 0>, VEX_4V;
-  defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+  defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
                           int_x86_sse3_hsub_pd, 0>, VEX_4V;
-  }
+  defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+                          int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+  defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+                          int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+  defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+                          int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+  defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+                          int_x86_avx_hsub_pd_256, 0>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -3412,19 +3466,10 @@
 // SSSE3 - Packed Absolute Instructions
 //===---------------------------------------------------------------------===//
 
+
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
-                            PatFrag mem_frag64, PatFrag mem_frag128,
-                            Intrinsic IntId64, Intrinsic IntId128> {
-  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
-  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst,
-                     (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
-
+                            PatFrag mem_frag128, Intrinsic IntId128> {
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3440,26 +3485,20 @@
 }
 
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
-  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
-                                  int_x86_ssse3_pabs_b,
+  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
                                   int_x86_ssse3_pabs_b_128>, VEX;
-  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
-                                  int_x86_ssse3_pabs_w,
+  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
                                   int_x86_ssse3_pabs_w_128>, VEX;
-  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
-                                  int_x86_ssse3_pabs_d,
+  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
                                   int_x86_ssse3_pabs_d_128>, VEX;
 }
 
-defm PABSB       : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
-                                    int_x86_ssse3_pabs_b,
-                                    int_x86_ssse3_pabs_b_128>;
-defm PABSW       : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
-                                    int_x86_ssse3_pabs_w,
-                                    int_x86_ssse3_pabs_w_128>;
-defm PABSD       : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
-                                    int_x86_ssse3_pabs_d,
-                                    int_x86_ssse3_pabs_d_128>;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+                              int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+                              int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+                              int_x86_ssse3_pabs_d_128>;
 
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
@@ -3467,26 +3506,9 @@
 
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
-                             PatFrag mem_frag64, PatFrag mem_frag128,
-                             Intrinsic IntId64, Intrinsic IntId128,
+                             PatFrag mem_frag128, Intrinsic IntId128,
                              bit Is2Addr = 1> {
   let isCommutable = 1 in
-  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-       (ins VR64:$src1, VR64:$src2),
-       !if(Is2Addr,
-         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
-  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-       (ins VR64:$src1, i64mem:$src2),
-       !if(Is2Addr,
-         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR64:$dst,
-         (IntId64 VR64:$src1,
-          (bitconvert (memopv8i8 addr:$src2))))]>;
-
-  let isCommutable = 1 in
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, VR128:$src2),
        !if(Is2Addr,
@@ -3506,84 +3528,60 @@
 
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
 let isCommutable = 0 in {
-  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
-                                      int_x86_ssse3_phadd_w,
+  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
                                       int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
-  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
-                                      int_x86_ssse3_phadd_d,
+  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
                                       int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
-  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
-                                      int_x86_ssse3_phadd_sw,
+  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
                                       int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
-  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
-                                      int_x86_ssse3_phsub_w,
+  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
                                       int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
-  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
-                                      int_x86_ssse3_phsub_d,
+  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
                                       int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
-  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
-                                      int_x86_ssse3_phsub_sw,
+  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
                                       int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
-  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
-                                      int_x86_ssse3_pmadd_ub_sw,
+  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
                                       int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
-  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
-                                      int_x86_ssse3_pshuf_b,
+  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
                                       int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
-  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
-                                      int_x86_ssse3_psign_b,
+  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
                                       int_x86_ssse3_psign_b_128, 0>, VEX_4V;
-  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
-                                      int_x86_ssse3_psign_w,
+  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
                                       int_x86_ssse3_psign_w_128, 0>, VEX_4V;
-  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
-                                      int_x86_ssse3_psign_d,
+  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
                                       int_x86_ssse3_psign_d_128, 0>, VEX_4V;
 }
-defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
-                                      int_x86_ssse3_pmul_hr_sw,
+defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
                                       int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
 }
 
 // None of these have i8 immediate fields.
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 let isCommutable = 0 in {
-  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
-                                     int_x86_ssse3_phadd_w,
+  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
                                      int_x86_ssse3_phadd_w_128>;
-  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
-                                     int_x86_ssse3_phadd_d,
+  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
                                      int_x86_ssse3_phadd_d_128>;
-  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
-                                     int_x86_ssse3_phadd_sw,
+  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
                                      int_x86_ssse3_phadd_sw_128>;
-  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
-                                     int_x86_ssse3_phsub_w,
+  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
                                      int_x86_ssse3_phsub_w_128>;
-  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
-                                     int_x86_ssse3_phsub_d,
+  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
                                      int_x86_ssse3_phsub_d_128>;
-  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
-                                     int_x86_ssse3_phsub_sw,
+  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
                                      int_x86_ssse3_phsub_sw_128>;
-  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
-                                     int_x86_ssse3_pmadd_ub_sw,
+  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
                                      int_x86_ssse3_pmadd_ub_sw_128>;
-  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
-                                     int_x86_ssse3_pshuf_b,
+  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
                                      int_x86_ssse3_pshuf_b_128>;
-  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
-                                     int_x86_ssse3_psign_b,
+  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
                                      int_x86_ssse3_psign_b_128>;
-  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
-                                     int_x86_ssse3_psign_w,
+  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
                                      int_x86_ssse3_psign_w_128>;
-  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
-                                       int_x86_ssse3_psign_d,
+  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
                                        int_x86_ssse3_psign_d_128>;
 }
-defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
-                                     int_x86_ssse3_pmul_hr_sw,
+defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
                                      int_x86_ssse3_pmul_hr_sw_128>;
 }
 
@@ -3596,22 +3594,7 @@
 // SSSE3 - Packed Align Instruction Patterns
 //===---------------------------------------------------------------------===//
 
-multiclass sse3_palign<string asm, bit Is2Addr = 1> {
-  def R64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
-      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
-      !if(Is2Addr,
-        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-        !strconcat(asm,
-                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      []>;
-  def R64rm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
-      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
-      !if(Is2Addr,
-        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-        !strconcat(asm,
-                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      []>;
-
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
   def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       !if(Is2Addr,
@@ -3629,29 +3612,11 @@
 }
 
 let isAsmParserOnly = 1, Predicates = [HasAVX] in
-  defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+  defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
-  defm PALIGN : sse3_palign<"palignr">;
+  defm PALIGN : ssse3_palign<"palignr">;
 
 let AddedComplexity = 5 in {
-
-def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-
 def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
           (PALIGNR128rr VR128:$src2, VR128:$src1,
                         (SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3925,38 +3890,65 @@
 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
           (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-          (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
-          (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
-          (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+          (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
 
 // Use movaps / movups for SSE integer load / store (one byte shorter).
-def : Pat<(alignedloadv4i32 addr:$src),
-          (MOVAPSrm addr:$src)>;
-def : Pat<(loadv4i32 addr:$src),
-          (MOVUPSrm addr:$src)>;
-def : Pat<(alignedloadv2i64 addr:$src),
-          (MOVAPSrm addr:$src)>;
-def : Pat<(loadv2i64 addr:$src),
-          (MOVUPSrm addr:$src)>;
-
-def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v2i64 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v4i32 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v8i16 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v16i8 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE1] in {
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (MOVUPSrm addr:$src)>;
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (MOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+let Predicates = [HasAVX] in {
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (VMOVAPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (VMOVUPSrm addr:$src)>;
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (VMOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (VMOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+}
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Packed Move with Sign/Zero Extend
@@ -4350,44 +4342,44 @@
 // SSE4.1 - Round Instructions
 //===----------------------------------------------------------------------===//
 
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
-                            string OpcodeStr,
-                            Intrinsic V4F32Int,
-                            Intrinsic V2F64Int> {
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+                            X86MemOperand x86memop, RegisterClass RC,
+                            PatFrag mem_frag32, PatFrag mem_frag64,
+                            Intrinsic V4F32Int, Intrinsic V2F64Int> {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
-  def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+  def PSr : SS4AIi8<opcps, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
                     OpSize;
 
   // Vector intrinsic operation, mem
-  def PSm_Int : Ii8<opcps, MRMSrcMem,
-                    (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+  def PSm : Ii8<opcps, MRMSrcMem,
+                    (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst,
-                          (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+                    [(set RC:$dst,
+                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
                     TA, OpSize,
                 Requires<[HasSSE41]>;
 
   // Vector intrinsic operation, reg
-  def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+  def PDr : SS4AIi8<opcpd, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+                    [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
                     OpSize;
 
   // Vector intrinsic operation, mem
-  def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
-                    (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+  def PDm : SS4AIi8<opcpd, MRMSrcMem,
+                    (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set VR128:$dst,
-                          (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+                    [(set RC:$dst,
+                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
                     OpSize;
 }
 
@@ -4395,28 +4387,28 @@
                    RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
-  def PSr : SS4AIi8<opcps, MRMSrcReg,
+  def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, OpSize;
 
   // Vector intrinsic operation, mem
-  def PSm : Ii8<opcps, MRMSrcMem,
+  def PSm_AVX : Ii8<opcps, MRMSrcMem,
                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, TA, OpSize, Requires<[HasSSE41]>;
 
   // Vector intrinsic operation, reg
-  def PDr : SS4AIi8<opcpd, MRMSrcReg,
+  def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, OpSize;
 
   // Vector intrinsic operation, mem
-  def PDm : SS4AIi8<opcpd, MRMSrcMem,
+  def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4428,7 +4420,7 @@
                             Intrinsic F32Int,
                             Intrinsic F64Int, bit Is2Addr = 1> {
   // Intrinsic operation, reg.
-  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+  def SSr : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4439,7 +4431,7 @@
         OpSize;
 
   // Intrinsic operation, mem.
-  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4451,7 +4443,7 @@
         OpSize;
 
   // Intrinsic operation, reg.
-  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+  def SDr : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4462,7 +4454,7 @@
         OpSize;
 
   // Intrinsic operation, mem.
-  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4477,28 +4469,28 @@
 multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
                                    string OpcodeStr> {
   // Intrinsic operation, reg.
-  def SSr : SS4AIi8<opcss, MRMSrcReg,
+  def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !strconcat(OpcodeStr,
                 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         []>, OpSize;
 
   // Intrinsic operation, mem.
-  def SSm : SS4AIi8<opcss, MRMSrcMem,
+  def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
         !strconcat(OpcodeStr,
                 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         []>, OpSize;
 
   // Intrinsic operation, reg.
-  def SDr : SS4AIi8<opcsd, MRMSrcReg,
+  def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         []>, OpSize;
 
   // Intrinsic operation, mem.
-  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+  def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4508,12 +4500,18 @@
 // FP round - roundss, roundps, roundsd, roundpd
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
   // Intrinsic form
-  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround",
-                                int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
-                                VEX;
+  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+                                  memopv4f32, memopv2f64,
+                                  int_x86_sse41_round_ps,
+                                  int_x86_sse41_round_pd>, VEX;
+  defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+                                  memopv8f32, memopv4f64,
+                                  int_x86_avx_round_ps_256,
+                                  int_x86_avx_round_pd_256>, VEX;
   defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
-                                int_x86_sse41_round_ss, int_x86_sse41_round_sd,
-                                0>, VEX_4V;
+                                  int_x86_sse41_round_ss,
+                                  int_x86_sse41_round_sd, 0>, VEX_4V;
+
   // Instructions for the assembler
   defm VROUND  : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
                                         VEX;
@@ -4522,7 +4520,8 @@
   defm VROUND  : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
 }
 
-defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round",
+defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+                               memopv4f32, memopv2f64,
                                int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
 let Constraints = "$src1 = $dst" in
 defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
@@ -4536,53 +4535,78 @@
 // the intel intrinsic that corresponds to this.
 let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
 def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                      "vptest\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
-                      OpSize, VEX;
-def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
-                      "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+                OpSize, VEX;
+def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+                OpSize, VEX;
 
-def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
-                     "vptest\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
-                     OpSize, VEX;
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+                OpSize, VEX;
 def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
-                     "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+                OpSize, VEX;
 }
 
 let Defs = [EFLAGS] in {
 def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                    "ptest \t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
+              "ptest \t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
               OpSize;
-def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
-                    "ptest \t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+              "ptest \t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
               OpSize;
 }
 
 // The bit test instructions below are AVX only
 multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                       X86MemOperand x86memop> {
-  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-            []>, OpSize, VEX;
-  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-            []>, OpSize, VEX;
+                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+            OpSize, VEX;
 }
 
 let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
-  defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem>;
-  defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>;
-  defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem>;
-  defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>;
+defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
 }
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Misc Instructions
 //===----------------------------------------------------------------------===//
 
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+let mayLoad = 1 in
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+
+
+
 // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
 multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                                  Intrinsic IntId128> {
@@ -4729,12 +4753,10 @@
                                       VR128, memopv16i8, i128mem, 0>, VEX_4V;
   defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
                                       VR128, memopv16i8, i128mem, 0>, VEX_4V;
-  let Pattern = []<dag> in {
-  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
-                                      VR256, memopv32i8, i256mem, 0>, VEX_4V;
-  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
-                                      VR256, memopv32i8, i256mem, 0>, VEX_4V;
-  }
+  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+            int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+            int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
   defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
                                       VR128, memopv16i8, i128mem, 0>, VEX_4V;
   defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -4744,9 +4766,8 @@
                                    VR128, memopv16i8, i128mem, 0>, VEX_4V;
   defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
                                    VR128, memopv16i8, i128mem, 0>, VEX_4V;
-  let Pattern = []<dag> in
-  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
-                                    VR256, memopv32i8, i256mem, 0>, VEX_4V;
+  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+                                   VR256, memopv32i8, i256mem, 0>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -4795,13 +4816,10 @@
                                            memopv16i8, int_x86_sse41_blendvps>;
 defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
                                            memopv16i8, int_x86_sse41_pblendvb>;
-
-let Pattern = []<dag> in { // FIXME: implement 256 intrinsics here.
 defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
-                                           memopv32i8, int_x86_sse41_blendvpd>;
+                                         memopv32i8, int_x86_avx_blendv_pd_256>;
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
-                                           memopv32i8, int_x86_sse41_blendvps>;
-}
+                                         memopv32i8, int_x86_avx_blendv_ps_256>;
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -5262,14 +5280,19 @@
 
 // Load from memory and broadcast to all elements of the destination operand
 class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                    X86MemOperand x86memop> :
+                    X86MemOperand x86memop, Intrinsic Int> :
   AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, VEX;
+        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+        [(set RC:$dst, (Int addr:$src))]>, VEX;
 
-def VBROADCASTSS   : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem>;
-def VBROADCASTSSY  : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem>;
-def VBROADCASTSD   : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem>;
-def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem>;
+def VBROADCASTSS   : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+                                   int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY  : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+                                   int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD   : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+                                   int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+                                   int_x86_avx_vbroadcastf128_pd_256>;
 
 // Insert packed floating-point values
 def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
@@ -5292,53 +5315,83 @@
           []>, VEX;
 
 // Conditional SIMD Packed Loads and Stores
-multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr> {
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+                          Intrinsic IntLd, Intrinsic IntLd256,
+                          Intrinsic IntSt, Intrinsic IntSt256,
+                          PatFrag pf128, PatFrag pf256> {
   def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
              (ins VR128:$src1, f128mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX_4V;
+             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+             VEX_4V;
   def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
              (ins VR256:$src1, f256mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX_4V;
+             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+             VEX_4V;
   def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
              (ins f128mem:$dst, VR128:$src1, VR128:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX_4V;
+             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
   def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
              (ins f256mem:$dst, VR256:$src1, VR256:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX_4V;
+             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
 }
 
-defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps">;
-defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd">;
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+                                 int_x86_avx_maskload_ps,
+                                 int_x86_avx_maskload_ps_256,
+                                 int_x86_avx_maskstore_ps,
+                                 int_x86_avx_maskstore_ps_256,
+                                 memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+                                 int_x86_avx_maskload_pd,
+                                 int_x86_avx_maskload_pd_256,
+                                 int_x86_avx_maskstore_pd,
+                                 int_x86_avx_maskstore_pd_256,
+                                 memopv2f64, memopv4f64>;
 
 // Permute Floating-Point Values
 multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
-                      RegisterClass RC, X86MemOperand x86memop> {
+                      RegisterClass RC, X86MemOperand x86memop_f,
+                      X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+                      Intrinsic IntVar, Intrinsic IntImm> {
   def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, RC:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX_4V;
+             [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
   def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
-             (ins RC:$src1, x86memop:$src2),
+             (ins RC:$src1, x86memop_i:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX_4V;
+             [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
   def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, i8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX;
+             [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
   def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
-             (ins x86memop:$src1, i8imm:$src2),
+             (ins x86memop_f:$src1, i8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, VEX;
+             [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
 }
 
-defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem>;
-defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem>;
-defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem>;
-defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem>;
+defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+                             memopv4f32, memopv4i32,
+                             int_x86_avx_vpermilvar_ps,
+                             int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+                             memopv8f32, memopv8i32,
+                             int_x86_avx_vpermilvar_ps_256,
+                             int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+                             memopv2f64, memopv2i64,
+                             int_x86_avx_vpermilvar_pd,
+                             int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+                             memopv4f64, memopv4i64,
+                             int_x86_avx_vpermilvar_pd_256,
+                             int_x86_avx_vpermil_pd_256>;
 
 def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -5350,11 +5403,403 @@
           []>, VEX_4V;
 
 // Zero All YMM registers
-def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", []>, VEX, VEX_L,
-                Requires<[HasAVX]>;
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+                 [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
 
 // Zero Upper bits of YMM registers
-def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", []>, VEX,
-                Requires<[HasAVX]>;
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+                   [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
 
 } // isAsmParserOnly
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+          (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+                  VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+                  VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+                  VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+                     (memopv2f64 addr:$src2), (i8 imm:$imm))),
+          (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+                     (memopv2f64 addr:$src2), (i8 imm:$imm))),
+          (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+                     (memopv4f32 addr:$src2), (i8 imm:$imm))),
+          (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+                     (memopv4f32 addr:$src2), (i8 imm:$imm))),
+          (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+                     (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+          (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+                     (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+          (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+          (MOVDDUPrm addr:$src)>;
+
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+          (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+          (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+          (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+          (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+          (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+          (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+          (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+          (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+          (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+          (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+                                   (bc_v16i8 (memopv2i64 addr:$src2)))),
+          (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+          (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+                                   (bc_v8i16 (memopv2i64 addr:$src2)))),
+          (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+          (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+                                   (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+          (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+          (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+          (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+                                   (bc_v16i8 (memopv2i64 addr:$src2)))),
+          (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+          (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+                                   (bc_v8i16 (memopv2i64 addr:$src2)))),
+          (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+          (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+                                   (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+          (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+          (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+          (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+                    (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+                    (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+          (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+          (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4i32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4f32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+                    (bc_v4i32 (v2i64 (load addr:$src2)))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+          (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2i64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2f64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+          (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+          (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+          (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+          (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+          (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+          (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+          (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+          (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+          (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+          (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+          (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+          (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+                    (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+                            (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+          (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+          (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+          (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+          (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+                 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp Tue Oct 26 19:48:03 2010
@@ -337,7 +337,7 @@
  // no support for inline assembly
 static
 #endif
-void ATTRIBUTE_USED
+void LLVM_ATTRIBUTE_USED
 X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
   intptr_t *RetAddrLoc = &StackPtr[1];
   assert(*RetAddrLoc == RetAddr &&

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp Tue Oct 26 19:48:03 2010
@@ -103,6 +103,9 @@
 }
 
 X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+  if (Triple.getArch() == Triple::x86_64)
+    GlobalPrefix = "";
+
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
 

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp Tue Oct 26 19:48:03 2010
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -38,7 +39,7 @@
   ~X86MCCodeEmitter() {}
 
   unsigned getNumFixupKinds() const {
-    return 5;
+    return 7;
   }
 
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
@@ -47,7 +48,9 @@
       { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
       { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
       { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
-      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
+      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_signed_4byte", 0, 4 * 8, 0},
+      { "reloc_global_offset_table", 0, 4 * 8, 0}
     };
 
     if (Kind < FirstTargetFixupKind)
@@ -179,6 +182,37 @@
   }
 }
 
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand.  Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+  
+  if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+  if (Expr->getKind() == MCExpr::Binary) {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+    Expr = BE->getLHS();
+  }
+
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    return false;
+
+  const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+  const MCSymbol &S = Ref->getSymbol();
+  return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
 
 void X86MCCodeEmitter::
 EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
@@ -196,6 +230,13 @@
   // If we have an immoffset, add it to the expression.
   const MCExpr *Expr = DispOp.getExpr();
 
+  if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+    assert(ImmOffset == 0);
+
+    FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+    ImmOffset = CurByte;
+  }
+
   // If the fixup is pc-relative, we need to bias the value to be relative to
   // the start of the field, not the end of the field.
   if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
@@ -221,10 +262,10 @@
                                         uint64_t TSFlags, unsigned &CurByte,
                                         raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups) const{
-  const MCOperand &Disp     = MI.getOperand(Op+3);
-  const MCOperand &Base     = MI.getOperand(Op);
-  const MCOperand &Scale    = MI.getOperand(Op+1);
-  const MCOperand &IndexReg = MI.getOperand(Op+2);
+  const MCOperand &Disp     = MI.getOperand(Op+X86::AddrDisp);
+  const MCOperand &Base     = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &Scale    = MI.getOperand(Op+X86::AddrScaleAmt);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
   unsigned BaseReg = Base.getReg();
 
   // Handle %rip relative addressing.
@@ -238,8 +279,7 @@
     // movq loads are handled with a special relocation form which allows the
     // linker to eliminate some loads for GOT references which end up in the
     // same linkage unit.
-    if (MI.getOpcode() == X86::MOV64rm ||
-        MI.getOpcode() == X86::MOV64rm_TC)
+    if (MI.getOpcode() == X86::MOV64rm)
       FixupKind = X86::reloc_riprel_4byte_movq_load;
 
     // rip-relative addressing is actually relative to the *next* instruction.
@@ -295,7 +335,8 @@
 
     // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
     EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
-    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
     return;
   }
 
@@ -355,7 +396,8 @@
   if (ForceDisp8)
     EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
   else if (ForceDisp32 || Disp.getImm() != 0)
-    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
 }
 
 /// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -365,7 +407,7 @@
                                            const TargetInstrDesc &Desc,
                                            raw_ostream &OS) const {
   bool HasVEX_4V = false;
-  if (TSFlags & X86II::VEX_4V)
+  if ((TSFlags >> 32) & X86II::VEX_4V)
     HasVEX_4V = true;
 
   // VEX_R: opcode externsion equivalent to REX.R in
@@ -429,10 +471,10 @@
   if (TSFlags & X86II::OpSize)
     VEX_PP = 0x01;
 
-  if (TSFlags & X86II::VEX_W)
+  if ((TSFlags >> 32) & X86II::VEX_W)
     VEX_W = 1;
 
-  if (TSFlags & X86II::VEX_L)
+  if ((TSFlags >> 32) & X86II::VEX_L)
     VEX_L = 1;
 
   switch (TSFlags & X86II::Op0Mask) {
@@ -501,7 +543,7 @@
 
     // If the last register should be encoded in the immediate field
     // do not use any bit from VEX prefix to this register, ignore it
-    if (TSFlags & X86II::VEX_I8IMM)
+    if ((TSFlags >> 32) & X86II::VEX_I8IMM)
       NumOps--;
 
     for (; CurOp != NumOps; ++CurOp) {
@@ -708,14 +750,15 @@
   if ((TSFlags & X86II::Op0Mask) == X86II::REP)
     EmitByte(0xF3, CurByte, OS);
 
+  // Emit the address size opcode prefix as needed.
+  if ((TSFlags & X86II::AdSize) ||
+      (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+    EmitByte(0x67, CurByte, OS);
+  
   // Emit the operand size opcode prefix as needed.
   if (TSFlags & X86II::OpSize)
     EmitByte(0x66, CurByte, OS);
 
-  // Emit the address size opcode prefix as needed.
-  if (TSFlags & X86II::AdSize)
-    EmitByte(0x67, CurByte, OS);
-
   bool Need0FPrefix = false;
   switch (TSFlags & X86II::Op0Mask) {
   default: assert(0 && "Invalid prefix!");
@@ -801,11 +844,12 @@
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = false;
 
-  if (TSFlags & X86II::VEX)
+  if ((TSFlags >> 32) & X86II::VEX)
     HasVEXPrefix = true;
-  if (TSFlags & X86II::VEX_4V)
+  if ((TSFlags >> 32) & X86II::VEX_4V)
     HasVEX_4V = true;
 
+  
   // Determine where the memory operand starts, if present.
   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
   if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -815,7 +859,12 @@
   else
     EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
 
+  
   unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  
+  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+    BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
+  
   unsigned SrcRegNum = 0;
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg:
@@ -827,6 +876,21 @@
   case X86II::RawFrm:
     EmitByte(BaseOpcode, CurByte, OS);
     break;
+      
+  case X86II::RawFrmImm8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+    break;
+  case X86II::RawFrmImm16:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+    break;
 
   case X86II::AddRegFrm:
     EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
@@ -947,7 +1011,7 @@
   if (CurOp != NumOps) {
     // The last source register of a 4 operand instruction in AVX is encoded
     // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
-    if (TSFlags & X86II::VEX_I8IMM) {
+    if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
       const MCOperand &MO = MI.getOperand(CurOp++);
       bool IsExtReg =
         X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
@@ -955,12 +1019,21 @@
       RegNum |= GetX86RegNum(MO) << 4;
       EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
                     Fixups);
-    } else
+    } else {
+      unsigned FixupKind;
+      if (MI.getOpcode() == X86::MOV64ri32 || MI.getOpcode() == X86::MOV64mi32)
+        FixupKind = X86::reloc_signed_4byte;
+      else
+        FixupKind = getImmFixupKind(TSFlags);
       EmitImmediate(MI.getOperand(CurOp++),
-                    X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                    X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
                     CurByte, OS, Fixups);
+    }
   }
 
+  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+    EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+  
 
 #ifndef NDEBUG
   // FIXME: Verify.

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp Tue Oct 26 19:48:03 2010
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "InstPrinter/X86ATTInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
@@ -252,7 +253,13 @@
 }
 
 /// \brief Simplify things like MOV32rm to MOV32o32a.
-static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+                                  unsigned Opcode) {
+  // Don't make these simplifications in 64-bit mode; other assemblers don't
+  // perform them because they make the code larger.
+  if (Printer.getSubtarget().is64Bit())
+    return;
+
   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
   unsigned AddrBase = IsStore;
   unsigned RegOp = IsStore ? 0 : 5;
@@ -341,6 +348,7 @@
   }
   
   // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
   switch (OutMI.getOpcode()) {
   case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
     lower_lea64_32mem(&OutMI, 1);
@@ -371,15 +379,17 @@
   case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
   case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
   case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
-  case X86::MMX_V_SET0:   LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
-  case X86::MMX_V_SETALLONES:
-    LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
-  case X86::FsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::FsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::V_SET0PS:     LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
-  case X86::V_SET0PD:     LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
-  case X86::V_SET0PI:     LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+  case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::V_SET0PS:      LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+  case X86::V_SET0PD:      LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+  case X86::V_SET0PI:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::V_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+  case X86::AVX_SET0PS:    LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+  case X86::AVX_SET0PSY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+  case X86::AVX_SET0PD:    LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+  case X86::AVX_SET0PDY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+  case X86::AVX_SET0PI:    LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
 
   case X86::MOV16r0:
     LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV16r0 -> MOV32r0
@@ -390,12 +400,14 @@
     LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
 
-  // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+  // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
   // register inputs modeled as normal uses instead of implicit uses.  As such,
   // truncate off all but the first operand (the callee).  FIXME: Change isel.
   case X86::TAILJMPr64:
   case X86::CALL64r:
-  case X86::CALL64pcrel32: {
+  case X86::CALL64pcrel32:
+  case X86::WINCALL64r:
+  case X86::WINCALL64pcrel32: {
     unsigned Opcode = OutMI.getOpcode();
     MCOperand Saved = OutMI.getOperand(0);
     OutMI = MCInst();
@@ -404,6 +416,13 @@
     break;
   }
 
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    OutMI = MCInst();
+    OutMI.setOpcode(X86::RET);
+    break;
+  }
+
   // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
   case X86::TAILJMPr:
   case X86::TAILJMPd:
@@ -423,6 +442,19 @@
     break;
   }
 
+  // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
+  // this with an ugly goto in case the resultant OR uses EAX and needs the
+  // short form.
+  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+      
   // The assembler backend wants to see branches in their small form and relax
   // them to their large form.  The JIT can only handle the large form because
   // it does not do relaxation.  For now, translate the large form to the
@@ -453,15 +485,13 @@
   // MOV64ao8, MOV64o8a
   // XCHG16ar, XCHG32ar, XCHG64ar
   case X86::MOV8mr_NOREX:
-  case X86::MOV8mr:     SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+  case X86::MOV8mr:     SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
   case X86::MOV8rm_NOREX:
-  case X86::MOV8rm:     SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
-  case X86::MOV16mr:    SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
-  case X86::MOV16rm:    SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
-  case X86::MOV32mr:    SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
-  case X86::MOV32rm:    SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
-  case X86::MOV64mr:    SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
-  case X86::MOV64rm:    SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+  case X86::MOV8rm:     SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+  case X86::MOV16mr:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+  case X86::MOV16rm:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+  case X86::MOV32mr:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+  case X86::MOV32rm:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
 
   case X86::ADC8ri:     SimplifyShortImmForm(OutMI, X86::ADC8i8);    break;
   case X86::ADC16ri:    SimplifyShortImmForm(OutMI, X86::ADC16i16);  break;
@@ -515,6 +545,21 @@
     }
     return;
 
+  // Emit nothing here but a comment if we can.
+  case X86::Int_MemBarrier:
+    if (OutStreamer.hasRawTextSupport())
+      OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+    return;
+        
+
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    // Lower these as normal, but add some comments.
+    unsigned Reg = MI->getOperand(0).getReg();
+    OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+                           X86ATTInstPrinter::getRegisterName(Reg));
+    break;
+  }
   case X86::TAILJMPr:
   case X86::TAILJMPd:
   case X86::TAILJMPd64:

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -38,8 +38,15 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
+static cl::opt<bool>
+ForceStackAlign("force-align-stack",
+                 cl::desc("Force align the stack to the minimum alignment"
+                           " needed for the function."),
+                 cl::init(false), cl::Hidden);
+
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
   : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
@@ -152,46 +159,21 @@
   case X86::YMM7: case X86::YMM15: case X86::MM7:
     return 7;
 
-  case X86::ES:
-    return 0;
-  case X86::CS:
-    return 1;
-  case X86::SS:
-    return 2;
-  case X86::DS:
-    return 3;
-  case X86::FS:
-    return 4;
-  case X86::GS:
-    return 5;
-
-  case X86::CR0:
-    return 0;
-  case X86::CR1:
-    return 1;
-  case X86::CR2:
-    return 2;
-  case X86::CR3:
-    return 3;
-  case X86::CR4:
-    return 4;
-
-  case X86::DR0:
-    return 0;
-  case X86::DR1:
-    return 1;
-  case X86::DR2:
-    return 2;
-  case X86::DR3:
-    return 3;
-  case X86::DR4:
-    return 4;
-  case X86::DR5:
-    return 5;
-  case X86::DR6:
-    return 6;
-  case X86::DR7:
-    return 7;
+  case X86::ES: return 0;
+  case X86::CS: return 1;
+  case X86::SS: return 2;
+  case X86::DS: return 3;
+  case X86::FS: return 4;
+  case X86::GS: return 5;
+
+  case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+  case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+  case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+  case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+  case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+  case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+  case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+  case X86::CR7: case X86::CR15: case X86::DR7: return 7;
 
   // Pseudo index registers are equivalent to a "none"
   // scaled index (See Intel Manual 2A, table 2-3)
@@ -288,9 +270,14 @@
     }
     break;
   case X86::sub_32bit:
-    if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+    if (B == &X86::GR32RegClass) {
       if (A->getSize() == 8)
         return A;
+    } else if (B == &X86::GR32_NOSPRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+        return &X86::GR64_NOSPRegClass;
+      if (A->getSize() == 8)
+        return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
     } else if (B == &X86::GR32_ABCDRegClass) {
       if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
           A == &X86::GR64_NOREXRegClass ||
@@ -471,7 +458,11 @@
   if (0 && requiresRealignment && MFI->hasVarSizedObjects())
     report_fatal_error(
       "Stack realignment in presense of dynamic allocas is not supported");
-
+    
+  // If we've requested that we force align the stack do so now.
+  if (ForceStackAlign)
+    return canRealignStack(MF);
+    
   return requiresRealignment && canRealignStack(MF);
 }
 
@@ -615,10 +606,9 @@
   MBB.erase(I);
 }
 
-unsigned
+void
 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                     int SPAdj, FrameIndexValue *Value,
-                                     RegScavenger *RS) const{
+                                     int SPAdj, RegScavenger *RS) const{
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -665,7 +655,6 @@
     uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
     MI.getOperand(i+3).setOffset(Offset);
   }
-  return 0;
 }
 
 void
@@ -755,7 +744,7 @@
   }
 }
 
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
 static
 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator &MBBI,
@@ -887,6 +876,19 @@
   }
 }
 
+static bool isEAXLiveIn(MachineFunction &MF) {
+  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+    unsigned Reg = II->first;
+
+    if (Reg == X86::EAX || Reg == X86::AX ||
+        Reg == X86::AH || Reg == X86::AL)
+      return true;
+  }
+
+  return false;
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -906,6 +908,17 @@
   bool HasFP = hasFP(MF);
   DebugLoc DL;
 
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum SlotSize.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+
   // Add RETADDR move area to callee saved frame size.
   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
   if (TailCallReturnAddrDelta < 0)
@@ -920,12 +933,12 @@
       !needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
-      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
+      !IsWin64) {                                  // Win64 has no Red Zone
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
     MFI->setStackSize(StackSize);
-  } else if (Subtarget->isTargetWin64()) {
+  } else if (IsWin64) {
     // We need to always allocate 32 bytes as register spill area.
     // FIXME: We might reuse these 32 bytes for leaf functions.
     StackSize += 32;
@@ -1066,28 +1079,40 @@
 
   DL = MBB.findDebugLoc(MBBI);
 
+  // If there is an SUB32ri of ESP immediately before this instruction, merge
+  // the two. This can be the case when tail call elimination is enabled and
+  // the callee has more arguments then the caller.
+  NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately after this
+  // instruction, merge the two instructions.
+  mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
   // Adjust stack pointer: ESP -= numbytes.
-  if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
-    // Check, whether EAX is livein for this function.
-    bool isEAXAlive = false;
-    for (MachineRegisterInfo::livein_iterator
-           II = MF.getRegInfo().livein_begin(),
-           EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
-      unsigned Reg = II->first;
-      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
-                    Reg == X86::AH || Reg == X86::AL);
-    }
 
-    // Function prologue calls _alloca to probe the stack when allocating more
-    // than 4k bytes in one go. Touching the stack at 4K increments is necessary
-    // to ensure that the guard pages used by the OS virtual memory manager are
-    // allocated in correct sequence.
+  // Windows and cygwin/mingw require a prologue helper routine when allocating
+  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
+  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
+  // stack and adjust the stack pointer in one go.  The 64-bit version of
+  // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
+  // responsible for adjusting the stack pointer.  Touching the stack at 4K
+  // increments is necessary to ensure that the guard pages used by the OS
+  // virtual memory manager are allocated in correct sequence.
+  if (NumBytes >= 4096 &&
+     (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
+    // Check whether EAX is livein for this function.
+    bool isEAXAlive = isEAXLiveIn(MF);
+
+    const char *StackProbeSymbol =
+      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+    unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
     if (!isEAXAlive) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
         .addImm(NumBytes);
-      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-        .addExternalSymbol("_alloca")
-        .addReg(StackPtr, RegState::Define | RegState::Implicit);
+      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+        .addExternalSymbol(StackProbeSymbol)
+        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
     } else {
       // Save EAX
       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1097,9 +1122,10 @@
       // allocated bytes for EAX.
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
         .addImm(NumBytes - 4);
-      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-        .addExternalSymbol("_alloca")
-        .addReg(StackPtr, RegState::Define | RegState::Implicit);
+      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+        .addExternalSymbol(StackProbeSymbol)
+        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
 
       // Restore EAX
       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
@@ -1107,19 +1133,21 @@
                                       StackPtr, false, NumBytes - 4);
       MBB.insert(MBBI, MI);
     }
-  } else if (NumBytes) {
-    // If there is an SUB32ri of ESP immediately before this instruction, merge
-    // the two. This can be the case when tail call elimination is enabled and
-    // the callee has more arguments then the caller.
-    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
-    // If there is an ADD32ri or SUB32ri of ESP immediately after this
-    // instruction, merge the two instructions.
-    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
-    if (NumBytes)
-      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
-  }
+  } else if (NumBytes >= 4096 && Subtarget->isTargetWin64()) {
+    // Sanity check that EAX is not livein for this function.  It should
+    // should not be, so throw an assert.
+    assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
+
+    // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+    // Function prologue is responsible for adjusting the stack pointer.
+    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+      .addImm(NumBytes);
+    BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
+      .addExternalSymbol("__chkstk")
+      .addReg(StackPtr, RegState::Define | RegState::Implicit);
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+  } else if (NumBytes)
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
 
   if ((NumBytes || PushedRegs) && needsFrameMoves) {
     // Mark end of stack pointer adjustment.
@@ -1177,6 +1205,17 @@
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else
+      MaxAlign = MaxAlign ? MaxAlign : 4;
+  }
+
   if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
@@ -1524,7 +1563,7 @@
 namespace {
   struct MSAH : public MachineFunctionPass {
     static char ID;
-    MSAH() : MachineFunctionPass(&ID) {}
+    MSAH() : MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       const X86TargetMachine *TM =

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -126,9 +126,8 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MI) const;
 
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td Tue Oct 26 19:48:03 2010
@@ -231,7 +231,7 @@
   def DR6 : Register<"dr6">;
   def DR7 : Register<"dr7">;
   
-  // Condition registers
+  // Control registers
   def CR0 : Register<"cr0">;
   def CR1 : Register<"cr1">;
   def CR2 : Register<"cr2">;
@@ -241,6 +241,13 @@
   def CR6 : Register<"cr6">;
   def CR7 : Register<"cr7">;
   def CR8 : Register<"cr8">;
+  def CR9 : Register<"cr9">;
+  def CR10 : Register<"cr10">;
+  def CR11 : Register<"cr11">;
+  def CR12 : Register<"cr12">;
+  def CR13 : Register<"cr13">;
+  def CR14 : Register<"cr14">;
+  def CR15 : Register<"cr15">;
 
   // Pseudo index registers
   def EIZ : Register<"eiz">;
@@ -456,7 +463,8 @@
 
 // Control registers.
 def CONTROL_REG : RegisterClass<"X86", [i64], 64,
-                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
+                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
+                                 CR9, CR10, CR11, CR12, CR13, CR14, CR15]> {
 }
 
 // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
@@ -784,7 +792,7 @@
 }
 
 // Generic vector registers: VR64 and VR128.
-def VR64  : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+def VR64: RegisterClass<"X86", [x86mmx], 64,
                           [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
 def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
                           [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
@@ -833,4 +841,15 @@
 // Status flags registers.
 def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
   let CopyCost = -1;  // Don't allow copying of status registers.
+
+  // EFLAGS is not allocatable.
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    CCRClass::iterator
+    CCRClass::allocation_order_end(const MachineFunction &MF) const {
+      return allocation_order_begin(MF);
+    }
+  }];
 }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp Tue Oct 26 19:48:03 2010
@@ -32,10 +32,13 @@
                                              SDValue Dst, SDValue Src,
                                              SDValue Size, unsigned Align,
                                              bool isVolatile,
-                                             const Value *DstSV,
-                                             uint64_t DstSVOff) const {
+                                         MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+  
   // If not DWORD aligned or size is more than the threshold, call the library.
   // The libc version is likely to be faster for these cases. It can use the
   // address value and run time information about the CPU.
@@ -161,7 +164,7 @@
                                       DAG.getConstant(Offset, AddrVT)),
                           Src,
                           DAG.getConstant(BytesLeft, SizeVT),
-                          Align, isVolatile, DstSV, DstSVOff + Offset);
+                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
   }
 
   // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -173,10 +176,8 @@
                                         SDValue Chain, SDValue Dst, SDValue Src,
                                         SDValue Size, unsigned Align,
                                         bool isVolatile, bool AlwaysInline,
-                                        const Value *DstSV,
-                                        uint64_t DstSVOff,
-                                        const Value *SrcSV,
-                                        uint64_t SrcSVOff) const {
+                                         MachinePointerInfo DstPtrInfo,
+                                         MachinePointerInfo SrcPtrInfo) const {
   // This requires the copy size to be a constant, preferrably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -190,6 +191,11 @@
   if ((Align & 3) != 0)
     return SDValue();
 
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256 ||
+      SrcPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+  
   // DWORD aligned
   EVT AVT = MVT::i32;
   if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
@@ -234,8 +240,8 @@
                                                 DAG.getConstant(Offset, SrcVT)),
                                     DAG.getConstant(BytesLeft, SizeVT),
                                     Align, isVolatile, AlwaysInline,
-                                    DstSV, DstSVOff + Offset,
-                                    SrcSV, SrcSVOff + Offset));
+                                    DstPtrInfo.getWithOffset(Offset),
+                                    SrcPtrInfo.getWithOffset(Offset)));
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h Tue Oct 26 19:48:03 2010
@@ -39,8 +39,7 @@
                                   SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align,
                                   bool isVolatile,
-                                  const Value *DstSV,
-                                  uint64_t DstSVOff) const;
+                                  MachinePointerInfo DstPtrInfo) const;
 
   virtual
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
@@ -48,10 +47,8 @@
                                   SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align,
                                   bool isVolatile, bool AlwaysInline,
-                                  const Value *DstSV,
-                                  uint64_t DstSVOff,
-                                  const Value *SrcSV,
-                                  uint64_t SrcSVOff) const;
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
 };
 
 }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp Tue Oct 26 19:48:03 2010
@@ -73,7 +73,7 @@
       if (GV->hasDefaultVisibility() &&
           (isDecl || GV->isWeakForLinker()))
         return X86II::MO_GOTPCREL;
-    } else {
+    } else if (!isTargetWin64()) {
       assert(isTargetELF() && "Unknown rip-relative target");
 
       // Extra load is needed for all externally visible.

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h Tue Oct 26 19:48:03 2010
@@ -186,18 +186,8 @@
     return Is64Bit && (isTargetMingw() || isTargetWindows());
   }
 
-  std::string getDataLayout() const {
-    const char *p;
-    if (is64Bit())
-      p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
-    else if (isTargetDarwin())
-      p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
-    else if (isTargetMingw() || isTargetWindows())
-      p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
-    else
-      p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
-
-    return std::string(p);
+  bool isTargetWin32() const {
+    return !Is64Bit && (isTargetMingw() || isTargetWindows());
   }
 
   bool isPICStyleSet() const { return PICStyle != PICStyles::None; }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp Tue Oct 26 19:48:03 2010
@@ -46,10 +46,15 @@
                                     bool RelaxAll) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+  case Triple::MinGW32:
+  case Triple::MinGW64:
+  case Triple::Cygwin:
   case Triple::Win32:
     return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
   default:
-    return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
   }
 }
 
@@ -84,13 +89,28 @@
 
 X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, false) {
+  : X86TargetMachine(T, TT, FS, false),
+    DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+               "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+               (getSubtargetImpl()->isTargetCygMing() ||
+                getSubtargetImpl()->isTargetWindows()) ?
+               "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
 }
 
 
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, true) {
+  : X86TargetMachine(T, TT, FS, true),
+    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
 }
 
 /// X86TargetMachine ctor - Create an X86 target.
@@ -99,23 +119,27 @@
                                    const std::string &FS, bool is64Bit)
   : LLVMTargetMachine(T, TT), 
     Subtarget(TT, FS, is64Bit),
-    DataLayout(Subtarget.getDataLayout()),
     FrameInfo(TargetFrameInfo::StackGrowsDown,
               Subtarget.getStackAlignment(),
               (Subtarget.isTargetWin64() ? -40 :
                (Subtarget.is64Bit() ? -8 : -4))),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
-    ELFWriterInfo(*this) {
+    ELFWriterInfo(is64Bit, true) {
   DefRelocModel = getRelocationModel();
-      
+
   // If no relocation model was picked, default as appropriate for the target.
   if (getRelocationModel() == Reloc::Default) {
-    if (!Subtarget.isTargetDarwin())
-      setRelocationModel(Reloc::Static);
-    else if (Subtarget.is64Bit())
+    // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+    // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+    // use static relocation model by default.
+    if (Subtarget.isTargetDarwin()) {
+      if (Subtarget.is64Bit())
+        setRelocationModel(Reloc::PIC_);
+      else
+        setRelocationModel(Reloc::DynamicNoPIC);
+    } else if (Subtarget.isTargetWin64())
       setRelocationModel(Reloc::PIC_);
     else
-      setRelocationModel(Reloc::DynamicNoPIC);
+      setRelocationModel(Reloc::Static);
   }
 
   assert(getRelocationModel() != Reloc::Default &&
@@ -138,29 +162,27 @@
       Subtarget.isTargetDarwin() &&
       is64Bit)
     setRelocationModel(Reloc::PIC_);
-      
+
   // Determine the PICStyle based on the target selected.
   if (getRelocationModel() == Reloc::Static) {
     // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
     Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.is64Bit()) {
+    // PIC in 64 bit mode is always rip-rel.
+    Subtarget.setPICStyle(PICStyles::RIPRel);
   } else if (Subtarget.isTargetCygMing()) {
     Subtarget.setPICStyle(PICStyles::None);
   } else if (Subtarget.isTargetDarwin()) {
-    if (Subtarget.is64Bit())
-      Subtarget.setPICStyle(PICStyles::RIPRel);
-    else if (getRelocationModel() == Reloc::PIC_)
+    if (getRelocationModel() == Reloc::PIC_)
       Subtarget.setPICStyle(PICStyles::StubPIC);
     else {
       assert(getRelocationModel() == Reloc::DynamicNoPIC);
       Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
     }
   } else if (Subtarget.isTargetELF()) {
-    if (Subtarget.is64Bit())
-      Subtarget.setPICStyle(PICStyles::RIPRel);
-    else
-      Subtarget.setPICStyle(PICStyles::GOT);
+    Subtarget.setPICStyle(PICStyles::GOT);
   }
-      
+
   // Finally, if we have "none" as our PIC style, force to static mode.
   if (Subtarget.getPICStyle() == PICStyles::None)
     setRelocationModel(Reloc::Static);

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h Tue Oct 26 19:48:03 2010
@@ -31,12 +31,7 @@
 
 class X86TargetMachine : public LLVMTargetMachine {
   X86Subtarget      Subtarget;
-  const TargetData  DataLayout; // Calculates type size & alignment
   TargetFrameInfo   FrameInfo;
-  X86InstrInfo      InstrInfo;
-  X86JITInfo        JITInfo;
-  X86TargetLowering TLInfo;
-  X86SelectionDAGInfo TSInfo;
   X86ELFWriterInfo  ELFWriterInfo;
   Reloc::Model      DefRelocModel; // Reloc model before it's overridden.
 
@@ -49,20 +44,23 @@
   X86TargetMachine(const Target &T, const std::string &TT, 
                    const std::string &FS, bool is64Bit);
 
-  virtual const X86InstrInfo     *getInstrInfo() const { return &InstrInfo; }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    llvm_unreachable("getInstrInfo not implemented");
+  }
   virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
-  virtual       X86JITInfo       *getJITInfo()         { return &JITInfo; }
+  virtual       X86JITInfo       *getJITInfo()         {
+    llvm_unreachable("getJITInfo not implemented");
+  }
   virtual const X86Subtarget     *getSubtargetImpl() const{ return &Subtarget; }
-  virtual const X86TargetLowering *getTargetLowering() const { 
-    return &TLInfo;
+  virtual const X86TargetLowering *getTargetLowering() const {
+    llvm_unreachable("getTargetLowering not implemented");
   }
   virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
-    return &TSInfo;
+    llvm_unreachable("getSelectionDAGInfo not implemented");
   }
   virtual const X86RegisterInfo  *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
+    return &getInstrInfo()->getRegisterInfo();
   }
-  virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual const X86ELFWriterInfo *getELFWriterInfo() const {
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
   }
@@ -79,17 +77,53 @@
 /// X86_32TargetMachine - X86 32-bit target machine.
 ///
 class X86_32TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
 public:
   X86_32TargetMachine(const Target &T, const std::string &M,
                       const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
 };
 
 /// X86_64TargetMachine - X86 64-bit target machine.
 ///
 class X86_64TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
 public:
   X86_64TargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
 };
 
 } // End llvm namespace

Modified: llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -10,7 +10,7 @@
 tablegen(XCoreGenCallingConv.inc -gen-callingconv)
 tablegen(XCoreGenSubtarget.inc -gen-subtarget)
 
-add_llvm_target(XCore
+add_llvm_target(XCoreCodeGen
   XCoreFrameInfo.cpp
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp

Modified: llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -4,4 +4,4 @@
   XCoreTargetInfo.cpp
   )
 
-add_dependencies(LLVMXCoreInfo XCoreTable_gen)
+add_dependencies(LLVMXCoreInfo XCoreCodeGenTable_gen)

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -56,13 +56,21 @@
       return CurDAG->getTargetConstant(Imm, MVT::i32);
     }
 
+    inline bool immMskBitp(SDNode *inN) const {
+      ConstantSDNode *N = cast<ConstantSDNode>(inN);
+      uint32_t value = (uint32_t)N->getZExtValue();
+      if (!isMask_32(value)) {
+        return false;
+      }
+      int msksize = 32 - CountLeadingZeros_32(value);
+      return (msksize >= 1 && msksize <= 8) ||
+              msksize == 16 || msksize == 24 || msksize == 32;
+    }
+
     // Complex Pattern Selectors.
-    bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
-                        SDValue &Offset);
-    bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base,
-                        SDValue &Offset);
-    bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
-                        SDValue &Offset);
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
     
     virtual const char *getPassName() const {
       return "XCore DAG->DAG Pattern Instruction Selection";
@@ -80,8 +88,8 @@
   return new XCoreDAGToDAGISel(TM);
 }
 
-bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
-                                  SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
   FrameIndexSDNode *FIN = 0;
   if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -102,8 +110,8 @@
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
-                                  SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
   if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
     Base = Addr.getOperand(0);
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
@@ -123,8 +131,8 @@
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
-                                  SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
   if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
     Base = Addr.getOperand(0);
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
@@ -151,17 +159,15 @@
     switch (N->getOpcode()) {
       default: break;
       case ISD::Constant: {
-        if (Predicate_immMskBitp(N)) {
+        uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+        if (immMskBitp(N)) {
           // Transformation function: get the size of a mask
-          int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
-          assert(isMask_32(MaskVal));
           // Look for the first non-zero bit
-          SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
+          SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
           return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
                                         MVT::i32, MskSize);
         }
-        else if (! Predicate_immU16(N)) {
-          unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+        else if (!isUInt<16>(Val)) {
           SDValue CPIdx =
             CurDAG->getTargetConstantPool(ConstantInt::get(
                                   Type::getInt32Ty(*CurDAG->getContext()), Val),

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -392,24 +392,23 @@
 }
 
 SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-{
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
          "Unexpected extension type");
   assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
-  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
     return SDValue();
-  }
+
   unsigned ABIAlignment = getTargetData()->
     getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
   // Leave aligned load alone.
-  if (LD->getAlignment() >= ABIAlignment) {
+  if (LD->getAlignment() >= ABIAlignment)
     return SDValue();
-  }
+
   SDValue Chain = LD->getChain();
   SDValue BasePtr = LD->getBasePtr();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   
   SDValue Base;
   int64_t Offset;
@@ -419,10 +418,8 @@
       // We've managed to infer better alignment information than the load
       // already has. Use an aligned load.
       //
-      // FIXME: No new alignment information is actually passed here.
-      // Should the offset really be 4?
-      //
-      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+      return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+                         MachinePointerInfo(),
                          false, false, 0);
     }
     // Lower to
@@ -436,40 +433,40 @@
     SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
     SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
     
-    SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
-    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
+    SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
     
-    SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
-                              LowAddr, NULL, 4, false, false, 0);
-    SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
-                               HighAddr, NULL, 4, false, false, 0);
-    SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
-    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
-    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+    SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+                              LowAddr, MachinePointerInfo(), false, false, 0);
+    SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+                               HighAddr, MachinePointerInfo(), false, false, 0);
+    SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
                              High.getValue(1));
     SDValue Ops[] = { Result, Chain };
-    return DAG.getMergeValues(Ops, 2, dl);
+    return DAG.getMergeValues(Ops, 2, DL);
   }
   
   if (LD->getAlignment() == 2) {
-    int SVOffset = LD->getSrcValueOffset();
-    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain,
-                                 BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
+    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, DL, Chain,
+                                 BasePtr, LD->getPointerInfo(), MVT::i16,
                                  LD->isVolatile(), LD->isNonTemporal(), 2);
-    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
-    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain,
-                                  HighAddr, LD->getSrcValue(), SVOffset + 2,
+    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, DL, Chain,
+                                  HighAddr,
+                                  LD->getPointerInfo().getWithOffset(2),
                                   MVT::i16, LD->isVolatile(),
                                   LD->isNonTemporal(), 2);
-    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
                                       DAG.getConstant(16, MVT::i32));
-    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
                              High.getValue(1));
     SDValue Ops[] = { Result, Chain };
-    return DAG.getMergeValues(Ops, 2, dl);
+    return DAG.getMergeValues(Ops, 2, DL);
   }
   
   // Lower to a call to __misaligned_load(BasePtr).
@@ -486,12 +483,12 @@
                     false, false, 0, CallingConv::C, false,
                     /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
-                    Args, DAG, dl);
+                    Args, DAG, DL);
 
   SDValue Ops[] =
     { CallResult.first, CallResult.second };
 
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, 2, DL);
 }
 
 SDValue XCoreTargetLowering::
@@ -515,18 +512,17 @@
   DebugLoc dl = Op.getDebugLoc();
   
   if (ST->getAlignment() == 2) {
-    int SVOffset = ST->getSrcValueOffset();
     SDValue Low = Value;
     SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
                                       DAG.getConstant(16, MVT::i32));
     SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
-                                         ST->getSrcValue(), SVOffset, MVT::i16,
+                                         ST->getPointerInfo(), MVT::i16,
                                          ST->isVolatile(), ST->isNonTemporal(),
                                          2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
     SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
-                                          ST->getSrcValue(), SVOffset + 2,
+                                          ST->getPointerInfo().getWithOffset(2),
                                           MVT::i16, ST->isVolatile(),
                                           ST->isNonTemporal(), 2);
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
@@ -757,16 +753,18 @@
   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   EVT VT = Node->getValueType(0);
   SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
-                               Node->getOperand(1), V, 0, false, false, 0);
+                               Node->getOperand(1), MachinePointerInfo(V),
+                               false, false, 0);
   // Increment the pointer, VAList, to the next vararg
   SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, 
                      DAG.getConstant(VT.getSizeInBits(), 
                                      getPointerTy()));
   // Store the incremented VAList to the legalized pointer
-  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
-                      false, false, 0);
+  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+                      MachinePointerInfo(V), false, false, 0);
   // Load the actual argument out of the pointer VAList
-  return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
+  return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+                     false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -778,9 +776,8 @@
   MachineFunction &MF = DAG.getMachineFunction();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
-  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
-                      false, false, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), 
+                      MachinePointerInfo(), false, false, 0);
 }
 
 SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -1079,7 +1076,8 @@
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, 
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -1111,8 +1109,8 @@
         RegInfo.addLiveIn(ArgRegs[i], VReg);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         // Move argument from virt reg -> stack
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                     false, false, 0);
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                     MachinePointerInfo(), false, false, 0);
         MemOps.push_back(Store);
       }
       if (!MemOps.empty())
@@ -1443,9 +1441,8 @@
         return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
                               LD->getBasePtr(),
                               DAG.getConstant(StoreBits/8, MVT::i32),
-                              Alignment, false, ST->getSrcValue(),
-                              ST->getSrcValueOffset(), LD->getSrcValue(),
-                              LD->getSrcValueOffset());
+                              Alignment, false, ST->getPointerInfo(),
+                              LD->getPointerInfo());
       }
     }
     break;

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -140,17 +140,7 @@
   return (uint32_t)N->getZExtValue() < (1 << 20);
 }]>;
 
-def immMskBitp : PatLeaf<(imm), [{
-  uint32_t value = (uint32_t)N->getZExtValue();
-  if (!isMask_32(value)) {
-    return false;
-  }
-  int msksize = 32 - CountLeadingZeros_32(value);
-  return (msksize >= 1 && msksize <= 8)
-          || msksize == 16
-          || msksize == 24
-          || msksize == 32;
-}]>;
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
 
 def immBitp : PatLeaf<(imm), [{
   uint32_t value = (uint32_t)N->getZExtValue();

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -155,10 +155,9 @@
   MBB.erase(I);
 }
 
-unsigned
+void
 XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                       int SPAdj, FrameIndexValue *Value,
-                                       RegScavenger *RS) const {
+                                       int SPAdj, RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
   MachineInstr &MI = *II;
   DebugLoc dl = MI.getDebugLoc();
@@ -291,7 +290,6 @@
   }
   // Erase old instruction.
   MBB.erase(II);
-  return 0;
 }
 
 void

Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -54,9 +54,8 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                 RegScavenger *RS = NULL) const;

Modified: llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp Tue Oct 26 19:48:03 2010
@@ -25,7 +25,7 @@
   // Hello - The first implementation, without getAnalysisUsage.
   struct Hello : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    Hello() : FunctionPass(&ID) {}
+    Hello() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F) {
       ++HelloCounter;
@@ -37,13 +37,13 @@
 }
 
 char Hello::ID = 0;
-INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false);
+static RegisterPass<Hello> X("hello", "Hello World Pass");
 
 namespace {
   // Hello2 - The second implementation with getAnalysisUsage implemented.
   struct Hello2 : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    Hello2() : FunctionPass(&ID) {}
+    Hello2() : FunctionPass(ID) {}
 
     virtual bool runOnFunction(Function &F) {
       ++HelloCounter;
@@ -60,6 +60,5 @@
 }
 
 char Hello2::ID = 0;
-INITIALIZE_PASS(Hello2, "hello2",
-                "Hello World Pass (with getAnalysisUsage implemented)",
-                false, false);
+static RegisterPass<Hello2>
+Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp Tue Oct 26 19:48:03 2010
@@ -67,7 +67,9 @@
     virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
-      : CallGraphSCCPass(&ID), maxElements(maxElements) {}
+        : CallGraphSCCPass(ID), maxElements(maxElements) {
+      initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+    }
 
     /// A vector used to hold the indices of a single GEP instruction
     typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +86,12 @@
 }
 
 char ArgPromotion::ID = 0;
-INITIALIZE_PASS(ArgPromotion, "argpromotion",
-                "Promote 'by reference' arguments to scalars", false, false);
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
 
 Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
   return new ArgPromotion(maxElements);
@@ -445,7 +451,7 @@
 
     const PointerType *LoadTy =
       cast<PointerType>(Load->getPointerOperand()->getType());
-    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
+    uint64_t LoadSize = TD->getTypeStoreSize(LoadTy->getElementType());
 
     if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
       return false;  // Pointer is invalidated!

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -23,5 +23,3 @@
   StripSymbols.cpp
   StructRetPromotion.cpp
   )
-
-target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine)

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp Tue Oct 26 19:48:03 2010
@@ -19,10 +19,12 @@
 
 #define DEBUG_TYPE "constmerge"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -31,7 +33,9 @@
 namespace {
   struct ConstantMerge : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    ConstantMerge() : ModulePass(&ID) {}
+    ConstantMerge() : ModulePass(ID) {
+      initializeConstantMergePass(*PassRegistry::getPassRegistry());
+    }
 
     // run - For this pass, process all of the globals in the module,
     // eliminating duplicate constants.
@@ -42,11 +46,31 @@
 
 char ConstantMerge::ID = 0;
 INITIALIZE_PASS(ConstantMerge, "constmerge",
-                "Merge Duplicate Global Constants", false, false);
+                "Merge Duplicate Global Constants", false, false)
 
 ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
 
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
 bool ConstantMerge::runOnModule(Module &M) {
+  // Find all the globals that are marked "used".  These cannot be merged.
+  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+  
   // Map unique constant/section pairs to globals.  We don't want to merge
   // globals in different sections.
   DenseMap<Constant*, GlobalVariable*> CMap;
@@ -79,9 +103,13 @@
       
       // Only process constants with initializers in the default addres space.
       if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
-          GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+          GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
         continue;
       
+      
+      
       Constant *Init = GV->getInitializer();
 
       // Check to see if the initializer is already known.

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp Tue Oct 26 19:48:03 2010
@@ -122,11 +122,13 @@
 
   protected:
     // DAH uses this to specify a different ID.
-    explicit DAE(void *ID) : ModulePass(ID) {}
+    explicit DAE(char &ID) : ModulePass(ID) {}
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    DAE() : ModulePass(&ID) {}
+    DAE() : ModulePass(ID) {
+      initializeDAEPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M);
 
@@ -151,7 +153,7 @@
 
 
 char DAE::ID = 0;
-INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
 
 namespace {
   /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -159,7 +161,7 @@
   /// by bugpoint.
   struct DAH : public DAE {
     static char ID;
-    DAH() : DAE(&ID) {}
+    DAH() : DAE(ID) {}
 
     virtual bool ShouldHackArguments() const { return true; }
   };
@@ -168,7 +170,7 @@
 char DAH::ID = 0;
 INITIALIZE_PASS(DAH, "deadarghaX0r", 
                 "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
-                false, false);
+                false, false)
 
 /// createDeadArgEliminationPass - This pass removes arguments from functions
 /// which are not used by the body of the function.
@@ -797,7 +799,8 @@
       } else if (New->getType()->isVoidTy()) {
         // Our return value has uses, but they will get removed later on.
         // Replace by null for now.
-        Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+        if (!Call->getType()->isX86_MMXTy())
+          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
       } else {
         assert(RetTy->isStructTy() &&
                "Return type changed, but not into a void. The old return type"
@@ -860,7 +863,8 @@
     } else {
       // If this argument is dead, replace any uses of it with null constants
       // (these are guaranteed to become unused later on).
-      I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+      if (!I->getType()->isX86_MMXTy())
+        I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
     }
 
   // If we change the return value of the function we must rewrite any return

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp Tue Oct 26 19:48:03 2010
@@ -26,7 +26,9 @@
 namespace {
   struct DTE : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    DTE() : ModulePass(&ID) {}
+    DTE() : ModulePass(ID) {
+      initializeDTEPass(*PassRegistry::getPassRegistry());
+    }
 
     // doPassInitialization - For this pass, it removes global symbol table
     // entries for primitive types.  These are never used for linking in GCC and
@@ -45,7 +47,10 @@
 }
 
 char DTE::ID = 0;
-INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
+INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
 
 ModulePass *llvm::createDeadTypeEliminationPass() {
   return new DTE();

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp Tue Oct 26 19:48:03 2010
@@ -17,15 +17,15 @@
 #include "llvm/Pass.h"
 #include "llvm/Constants.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
 #include <algorithm>
 using namespace llvm;
 
 namespace {
   /// @brief A pass to extract specific functions and their dependencies.
   class GVExtractorPass : public ModulePass {
-    std::vector<GlobalValue*> Named;
+    SetVector<GlobalValue *> Named;
     bool deleteStuff;
-    bool reLink;
   public:
     static char ID; // Pass identification, replacement for typeid
 
@@ -33,133 +33,38 @@
     /// specified function. Otherwise, it deletes as much of the module as
     /// possible, except for the function specified.
     ///
-    explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true,
-                             bool relinkCallees = false)
-      : ModulePass(&ID), Named(GVs), deleteStuff(deleteS),
-        reLink(relinkCallees) {}
+    explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+      : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
 
     bool runOnModule(Module &M) {
-      if (Named.size() == 0) {
-        return false;  // Nothing to extract
-      }
-      
-      
-      if (deleteStuff)
-        return deleteGV();
-      M.setModuleInlineAsm("");
-      return isolateGV(M);
-    }
-
-    bool deleteGV() {
-      for (std::vector<GlobalValue*>::iterator GI = Named.begin(), 
-             GE = Named.end(); GI != GE; ++GI) {
-        if (Function* NamedFunc = dyn_cast<Function>(*GI)) {
-         // If we're in relinking mode, set linkage of all internal callees to
-         // external. This will allow us extract function, and then - link
-         // everything together
-         if (reLink) {
-           for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end();
-                B != BE; ++B) {
-             for (BasicBlock::iterator I = B->begin(), E = B->end();
-                  I != E; ++I) {
-               if (CallInst* callInst = dyn_cast<CallInst>(&*I)) {
-                 Function* Callee = callInst->getCalledFunction();
-                 if (Callee && Callee->hasLocalLinkage())
-                   Callee->setLinkage(GlobalValue::ExternalLinkage);
-               }
-             }
-           }
-         }
-         
-         NamedFunc->setLinkage(GlobalValue::ExternalLinkage);
-         NamedFunc->deleteBody();
-         assert(NamedFunc->isDeclaration() && "This didn't make the function external!");
-       } else {
-          if (!(*GI)->isDeclaration()) {
-            cast<GlobalVariable>(*GI)->setInitializer(0);  //clear the initializer
-            (*GI)->setLinkage(GlobalValue::ExternalLinkage);
-          }
-        }
-      }
-      return true;
-    }
-
-    bool isolateGV(Module &M) {
-      // Mark all globals internal
-      // FIXME: what should we do with private linkage?
-      for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
-        if (!I->isDeclaration()) {
-          I->setLinkage(GlobalValue::InternalLinkage);
-        }
-      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-        if (!I->isDeclaration()) {
-          I->setLinkage(GlobalValue::InternalLinkage);
-        }
-
-      // Make sure our result is globally accessible...
-      // by putting them in the used array
-      {
-        std::vector<Constant *> AUGs;
-        const Type *SBP=
-              Type::getInt8PtrTy(M.getContext());
-        for (std::vector<GlobalValue*>::iterator GI = Named.begin(), 
-               GE = Named.end(); GI != GE; ++GI) {
-          (*GI)->setLinkage(GlobalValue::ExternalLinkage);
-          AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP));
-        }
-        ArrayType *AT = ArrayType::get(SBP, AUGs.size());
-        Constant *Init = ConstantArray::get(AT, AUGs);
-        GlobalValue *gv = new GlobalVariable(M, AT, false, 
-                                             GlobalValue::AppendingLinkage, 
-                                             Init, "llvm.used");
-        gv->setSection("llvm.metadata");
-      }
-
-      // All of the functions may be used by global variables or the named
-      // globals.  Loop through them and create a new, external functions that
-      // can be "used", instead of ones with bodies.
-      std::vector<Function*> NewFunctions;
-
-      Function *Last = --M.end();  // Figure out where the last real fn is.
-
-      for (Module::iterator I = M.begin(); ; ++I) {
-        if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
-          Function *New = Function::Create(I->getFunctionType(),
-                                           GlobalValue::ExternalLinkage);
-          New->copyAttributesFrom(I);
-
-          // If it's not the named function, delete the body of the function
-          I->dropAllReferences();
-
-          M.getFunctionList().push_back(New);
-          NewFunctions.push_back(New);
-          New->takeName(I);
-        }
-
-        if (&*I == Last) break;  // Stop after processing the last function
+      // Visit the global inline asm.
+      if (!deleteStuff)
+        M.setModuleInlineAsm("");
+
+      // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+      // implementation could figure out which GlobalValues are actually
+      // referenced by the Named set, and which GlobalValues in the rest of
+      // the module are referenced by the NamedSet, and get away with leaving
+      // more internal and private things internal and private. But for now,
+      // be conservative and simple.
+
+      // Visit the GlobalVariables.
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+           I != E; ++I) {
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+          I->setInitializer(0);
       }
 
-      // Now that we have replacements all set up, loop through the module,
-      // deleting the old functions, replacing them with the newly created
-      // functions.
-      if (!NewFunctions.empty()) {
-        unsigned FuncNum = 0;
-        Module::iterator I = M.begin();
-        do {
-          if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
-            // Make everything that uses the old function use the new dummy fn
-            I->replaceAllUsesWith(NewFunctions[FuncNum++]);
-
-            Function *Old = I;
-            ++I;  // Move the iterator to the new function
-
-            // Delete the old function!
-            M.getFunctionList().erase(Old);
-
-          } else {
-            ++I;  // Skip the function we are extracting
-          }
-        } while (&*I != NewFunctions[0]);
+      // Visit the Functions.
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+          I->deleteBody();
       }
 
       return true;
@@ -170,6 +75,6 @@
 }
 
 ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, 
-                                         bool deleteFn, bool relinkCallees) {
-  return new GVExtractorPass(GVs, deleteFn, relinkCallees);
+                                         bool deleteFn) {
+  return new GVExtractorPass(GVs, deleteFn);
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp Tue Oct 26 19:48:03 2010
@@ -41,7 +41,9 @@
 namespace {
   struct FunctionAttrs : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
-    FunctionAttrs() : CallGraphSCCPass(&ID) {}
+    FunctionAttrs() : CallGraphSCCPass(ID) {
+      initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
     bool runOnSCC(CallGraphSCC &SCC);
@@ -69,8 +71,11 @@
 }
 
 char FunctionAttrs::ID = 0;
-INITIALIZE_PASS(FunctionAttrs, "functionattrs",
-                "Deduce function attributes", false, false);
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
 
 Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
 
@@ -169,7 +174,7 @@
           continue;
         // Ignore intrinsics that only access local memory.
         if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
-          if (AliasAnalysis::getModRefBehavior(id) ==
+          if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
               AliasAnalysis::AccessesArguments) {
             // Check that all pointer arguments point to local memory.
             for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp Tue Oct 26 19:48:03 2010
@@ -31,7 +31,9 @@
 namespace {
   struct GlobalDCE : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    GlobalDCE() : ModulePass(&ID) {}
+    GlobalDCE() : ModulePass(ID) {
+      initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+    }
 
     // run - Do the GlobalDCE pass on the specified module, optionally updating
     // the specified callgraph to reflect the changes.
@@ -52,7 +54,7 @@
 
 char GlobalDCE::ID = 0;
 INITIALIZE_PASS(GlobalDCE, "globaldce",
-                "Dead Global Elimination", false, false);
+                "Dead Global Elimination", false, false)
 
 ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp Tue Oct 26 19:48:03 2010
@@ -59,7 +59,9 @@
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     }
     static char ID; // Pass identification, replacement for typeid
-    GlobalOpt() : ModulePass(&ID) {}
+    GlobalOpt() : ModulePass(ID) {
+      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M);
 
@@ -75,7 +77,7 @@
 
 char GlobalOpt::ID = 0;
 INITIALIZE_PASS(GlobalOpt, "globalopt",
-                "Global Variable Optimizer", false, false);
+                "Global Variable Optimizer", false, false)
 
 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
 
@@ -129,7 +131,7 @@
 
   /// HasPHIUser - Set to true if this global has a user that is a PHI node.
   bool HasPHIUser;
-  
+
   GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
                    AccessingFunction(0), HasMultipleAccessingFunctions(false),
                    HasNonInstructionUser(false), HasPHIUser(false) {}
@@ -308,7 +310,7 @@
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
         Changed |= CleanupConstantGlobalUsers(CE, SubInit);
-      } else if (CE->getOpcode() == Instruction::BitCast && 
+      } else if (CE->getOpcode() == Instruction::BitCast &&
                  CE->getType()->isPointerTy()) {
         // Pointer cast, delete any stores and memsets to the global.
         Changed |= CleanupConstantGlobalUsers(CE, 0);
@@ -324,7 +326,7 @@
       // and will invalidate our notion of what Init is.
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
-        ConstantExpr *CE = 
+        ConstantExpr *CE =
           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -361,7 +363,7 @@
   // We might have a dead and dangling constant hanging off of here.
   if (Constant *C = dyn_cast<Constant>(V))
     return SafeToDestroyConstant(C);
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
 
@@ -371,15 +373,15 @@
   // Stores *to* the pointer are ok.
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getOperand(0) != V;
-    
+
   // Otherwise, it must be a GEP.
   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
   if (GEPI == 0) return false;
-  
+
   if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
       !cast<Constant>(GEPI->getOperand(1))->isNullValue())
     return false;
-  
+
   for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
        I != E; ++I)
     if (!isSafeSROAElementUse(*I))
@@ -393,11 +395,11 @@
 ///
 static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
   // The user of the global must be a GEP Inst or a ConstantExpr GEP.
-  if (!isa<GetElementPtrInst>(U) && 
-      (!isa<ConstantExpr>(U) || 
+  if (!isa<GetElementPtrInst>(U) &&
+      (!isa<ConstantExpr>(U) ||
        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
     return false;
-  
+
   // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
   // don't like < 3 operand CE's, and we don't like non-constant integer
   // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
@@ -409,18 +411,18 @@
 
   gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
   ++GEPI;  // Skip over the pointer index.
-  
+
   // If this is a use of an array allocation, do a bit more checking for sanity.
   if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
     uint64_t NumElements = AT->getNumElements();
     ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-    
+
     // Check to make sure that index falls within the array.  If not,
     // something funny is going on, so we won't do the optimization.
     //
     if (Idx->getZExtValue() >= NumElements)
       return false;
-      
+
     // We cannot scalar repl this level of the array unless any array
     // sub-indices are in-range constants.  In particular, consider:
     // A[0][i].  We cannot know that the user isn't doing invalid things like
@@ -441,7 +443,7 @@
                "Indexed GEP type is not array, vector, or struct!");
         continue;
       }
-      
+
       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
         return false;
@@ -465,7 +467,7 @@
   }
   return true;
 }
- 
+
 
 /// SRAGlobal - Perform scalar replacement of aggregates on the specified global
 /// variable.  This opens the door for other optimizations by exposing the
@@ -476,7 +478,7 @@
   // Make sure this global only has simple uses that we can SRA.
   if (!GlobalUsersSafeToSRA(GV))
     return 0;
-  
+
   assert(GV->hasLocalLinkage() && !GV->isConstant());
   Constant *Init = GV->getInitializer();
   const Type *Ty = Init->getType();
@@ -488,7 +490,7 @@
   unsigned StartAlignment = GV->getAlignment();
   if (StartAlignment == 0)
     StartAlignment = TD.getABITypeAlignment(GV->getType());
-   
+
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
     NewGlobals.reserve(STy->getNumElements());
     const StructLayout &Layout = *TD.getStructLayout(STy);
@@ -503,7 +505,7 @@
                                               GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
-      
+
       // Calculate the known alignment of the field.  If the original aggregate
       // had 256 byte alignment for example, something might depend on that:
       // propagate info to each field.
@@ -522,7 +524,7 @@
     if (NumElements > 16 && GV->hasNUsesOrMore(16))
       return 0; // It's not worth it.
     NewGlobals.reserve(NumElements);
-    
+
     uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
     for (unsigned i = 0, e = NumElements; i != e; ++i) {
@@ -537,7 +539,7 @@
                                               GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
-      
+
       // Calculate the known alignment of the field.  If the original aggregate
       // had 256 byte alignment for example, something might depend on that:
       // propagate info to each field.
@@ -549,7 +551,7 @@
 
   if (NewGlobals.empty())
     return 0;
-  
+
   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -615,7 +617,7 @@
 }
 
 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null.  PHIs keeps track of any 
+/// value will trap if the value is dynamically null.  PHIs keeps track of any
 /// phi nodes we've seen to avoid reprocessing them.
 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
@@ -757,7 +759,7 @@
   // Keep track of whether we are able to remove all the uses of the global
   // other than the store that defines it.
   bool AllNonStoreUsesGone = true;
-  
+
   // Replace all uses of loads with uses of uses of the stored value.
   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
     User *GlobalUser = *GUI++;
@@ -830,7 +832,7 @@
                                                      ConstantInt *NElements,
                                                      TargetData* TD) {
   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
-  
+
   const Type *GlobalType;
   if (NElements->getZExtValue() == 1)
     GlobalType = AllocTy;
@@ -840,14 +842,14 @@
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
-  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
                                              GlobalType, false,
                                              GlobalValue::InternalLinkage,
                                              UndefValue::get(GlobalType),
                                              GV->getName()+".body",
                                              GV,
                                              GV->isThreadLocal());
-  
+
   // If there are bitcast users of the malloc (which is typical, usually we have
   // a malloc + bitcast) then replace them with uses of the new global.  Update
   // other users to use the global as well.
@@ -867,10 +869,10 @@
       User->replaceUsesOfWith(CI, TheBC);
     }
   }
-  
+
   Constant *RepValue = NewGV;
   if (NewGV->getType() != GV->getType()->getElementType())
-    RepValue = ConstantExpr::getBitCast(RepValue, 
+    RepValue = ConstantExpr::getBitCast(RepValue,
                                         GV->getType()->getElementType());
 
   // If there is a comparison against null, we will insert a global bool to
@@ -890,7 +892,7 @@
       SI->eraseFromParent();
       continue;
     }
-    
+
     LoadInst *LI = cast<LoadInst>(GV->use_back());
     while (!LI->use_empty()) {
       Use &LoadUse = LI->use_begin().getUse();
@@ -898,7 +900,7 @@
         LoadUse = RepValue;
         continue;
       }
-      
+
       ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
       // Replace the cmp X, 0 with a use of the bool value.
       Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
@@ -963,20 +965,20 @@
     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
       continue; // Fine, ignore.
     }
-    
+
     if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
         return false;  // Storing the pointer itself... bad.
       continue; // Otherwise, storing through it, or storing into GV... fine.
     }
-    
+
     // Must index into the array and into the struct.
     if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
         return false;
       continue;
     }
-    
+
     if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
       // cycles.
@@ -985,13 +987,13 @@
           return false;
       continue;
     }
-    
+
     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
         return false;
       continue;
     }
-    
+
     return false;
   }
   return true;
@@ -1000,9 +1002,9 @@
 /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
 /// somewhere.  Transform all uses of the allocation into loads from the
 /// global and uses of the resultant pointer.  Further, delete the store into
-/// GV.  This assumes that these value pass the 
+/// GV.  This assumes that these value pass the
 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
-static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
                                           GlobalVariable *GV) {
   while (!Alloc->use_empty()) {
     Instruction *U = cast<Instruction>(*Alloc->use_begin());
@@ -1035,7 +1037,7 @@
             continue;
           }
     }
-      
+
     // Insert a load from the global, and use it instead of the malloc.
     Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
     U->replaceUsesOfWith(Alloc, NL);
@@ -1053,24 +1055,24 @@
   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
        ++UI) {
     const Instruction *User = cast<Instruction>(*UI);
-    
+
     // Comparison against null is ok.
     if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return false;
       continue;
     }
-    
+
     // getelementptr is also ok, but only a simple form.
     if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
       // Must index into the array and into the struct.
       if (GEPI->getNumOperands() < 3)
         return false;
-      
+
       // Otherwise the GEP is ok.
       continue;
     }
-    
+
     if (const PHINode *PN = dyn_cast<PHINode>(User)) {
       if (!LoadUsingPHIsPerLoad.insert(PN))
         // This means some phi nodes are dependent on each other.
@@ -1079,19 +1081,19 @@
       if (!LoadUsingPHIs.insert(PN))
         // If we have already analyzed this PHI, then it is safe.
         continue;
-      
+
       // Make sure all uses of the PHI are simple enough to transform.
       if (!LoadUsesSimpleEnoughForHeapSRA(PN,
                                           LoadUsingPHIs, LoadUsingPHIsPerLoad))
         return false;
-      
+
       continue;
     }
-    
+
     // Otherwise we don't know what this is, not ok.
     return false;
   }
-  
+
   return true;
 }
 
@@ -1110,10 +1112,10 @@
         return false;
       LoadUsingPHIsPerLoad.clear();
     }
-  
+
   // If we reach here, we know that all uses of the loads and transitive uses
   // (through PHI nodes) are simple enough to transform.  However, we don't know
-  // that all inputs the to the PHI nodes are in the same equivalence sets. 
+  // that all inputs the to the PHI nodes are in the same equivalence sets.
   // Check to verify that all operands of the PHIs are either PHIS that can be
   // transformed, loads from GV, or MI itself.
   for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
@@ -1121,29 +1123,29 @@
     const PHINode *PN = *I;
     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
       Value *InVal = PN->getIncomingValue(op);
-      
+
       // PHI of the stored value itself is ok.
       if (InVal == StoredVal) continue;
-      
+
       if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
         // One of the PHIs in our set is (optimistically) ok.
         if (LoadUsingPHIs.count(InPN))
           continue;
         return false;
       }
-      
+
       // Load from GV is ok.
       if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
         if (LI->getOperand(0) == GV)
           continue;
-      
+
       // UNDEF? NULL?
-      
+
       // Anything else is rejected.
       return false;
     }
   }
-  
+
   return true;
 }
 
@@ -1151,15 +1153,15 @@
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
-  
+
   if (FieldNo >= FieldVals.size())
     FieldVals.resize(FieldNo+1);
-  
+
   // If we already have this value, just reuse the previously scalarized
   // version.
   if (Value *FieldVal = FieldVals[FieldNo])
     return FieldVal;
-  
+
   // Depending on what instruction this is, we have several cases.
   Value *Result;
   if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
@@ -1172,9 +1174,9 @@
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
-    const StructType *ST = 
+    const StructType *ST =
       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
-    
+
     Result =
      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
                      PN->getName()+".f"+Twine(FieldNo), PN);
@@ -1183,13 +1185,13 @@
     llvm_unreachable("Unknown usable value");
     Result = 0;
   }
-  
+
   return FieldVals[FieldNo] = Result;
 }
 
 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
 /// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   // If this is a comparison against null, handle it.
@@ -1199,30 +1201,30 @@
     // field.
     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
                                    InsertedScalarizedValues, PHIsToRewrite);
-    
+
     Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
-                              Constant::getNullValue(NPtr->getType()), 
+                              Constant::getNullValue(NPtr->getType()),
                               SCI->getName());
     SCI->replaceAllUsesWith(New);
     SCI->eraseFromParent();
     return;
   }
-  
+
   // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
     assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
            && "Unexpected GEPI!");
-  
+
     // Load the pointer for this field.
     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
                                      InsertedScalarizedValues, PHIsToRewrite);
-    
+
     // Create the new GEP idx vector.
     SmallVector<Value*, 8> GEPIdx;
     GEPIdx.push_back(GEPI->getOperand(1));
     GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
-    
+
     Value *NGEPI = GetElementPtrInst::Create(NewPtr,
                                              GEPIdx.begin(), GEPIdx.end(),
                                              GEPI->getName(), GEPI);
@@ -1243,7 +1245,7 @@
   tie(InsertPos, Inserted) =
     InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
   if (!Inserted) return;
-  
+
   // If this is the first time we've seen this PHI, recursively process all
   // users.
   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
@@ -1256,7 +1258,7 @@
 /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
 /// use FieldGlobals instead.  All uses of loaded values satisfy
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
-static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
@@ -1264,7 +1266,7 @@
     Instruction *User = cast<Instruction>(*UI++);
     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   }
-  
+
   if (Load->use_empty()) {
     Load->eraseFromParent();
     InsertedScalarizedValues.erase(Load);
@@ -1289,11 +1291,11 @@
   // new mallocs at the same place as CI, and N globals.
   std::vector<Value*> FieldGlobals;
   std::vector<Value*> FieldMallocs;
-  
+
   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
     const Type *FieldTy = STy->getElementType(FieldNo);
     const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
-    
+
     GlobalVariable *NGV =
       new GlobalVariable(*GV->getParent(),
                          PFieldTy, false, GlobalValue::InternalLinkage,
@@ -1301,7 +1303,7 @@
                          GV->getName() + ".f" + Twine(FieldNo), GV,
                          GV->isThreadLocal());
     FieldGlobals.push_back(NGV);
-    
+
     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
     if (const StructType *ST = dyn_cast<StructType>(FieldTy))
       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
@@ -1313,7 +1315,7 @@
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, CI);
   }
-  
+
   // The tricky aspect of this transformation is handling the case when malloc
   // fails.  In the original code, malloc failing would set the result pointer
   // of malloc to null.  In this case, some mallocs could succeed and others
@@ -1340,23 +1342,23 @@
   // Split the basic block at the old malloc.
   BasicBlock *OrigBB = CI->getParent();
   BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
-  
+
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
   BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
                                                 "malloc_ret_null",
                                                 OrigBB->getParent());
-  
+
   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
   // branch on RunningOr.
   OrigBB->getTerminator()->eraseFromParent();
   BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-  
+
   // Within the NullPtrBlock, we need to emit a comparison and branch for each
   // pointer, because some may be null while others are not.
   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
-    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
                               Constant::getNullValue(GVVal->getType()),
                               "tmp");
     BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1371,10 +1373,10 @@
     new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
                   FreeBlock);
     BranchInst::Create(NextBlock, FreeBlock);
-    
+
     NullPtrBlock = NextBlock;
   }
-  
+
   BranchInst::Create(ContBB, NullPtrBlock);
 
   // CI is no longer needed, remove it.
@@ -1385,25 +1387,25 @@
   /// inserted for a given load.
   DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
   InsertedScalarizedValues[GV] = FieldGlobals;
-  
+
   std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-  
+
   // Okay, the malloc site is completely handled.  All of the uses of GV are now
   // loads, and all uses of those loads are simple.  Rewrite them to use loads
   // of the per-field globals instead.
   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
     Instruction *User = cast<Instruction>(*UI++);
-    
+
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
       RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
       continue;
     }
-    
+
     // Must be a store of null.
     StoreInst *SI = cast<StoreInst>(User);
     assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
            "Unexpected heap-sra user!");
-    
+
     // Insert a store of null into each global.
     for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
       const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
@@ -1430,7 +1432,7 @@
       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
     }
   }
-  
+
   // Drop all inter-phi links and any loads that made it this far.
   for (DenseMap<Value*, std::vector<Value*> >::iterator
        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1440,7 +1442,7 @@
     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
       LI->dropAllReferences();
   }
-  
+
   // Delete all the phis and loads now that inter-references are dead.
   for (DenseMap<Value*, std::vector<Value*> >::iterator
        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1450,7 +1452,7 @@
     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
       LI->eraseFromParent();
   }
-  
+
   // The old global is now dead, remove it.
   GV->eraseFromParent();
 
@@ -1468,7 +1470,7 @@
                                                TargetData *TD) {
   if (!TD)
     return false;
-  
+
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1508,7 +1510,7 @@
       GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
       return true;
     }
-  
+
   // If the allocation is an array of structures, consider transforming this
   // into multiple malloc'd arrays, one for each field.  This is basically
   // SRoA for malloc'd memory.
@@ -1544,13 +1546,13 @@
       CI = dyn_cast<BitCastInst>(Malloc) ?
         extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
     }
-      
+
     GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
     return true;
   }
-  
+
   return false;
-}  
+}
 
 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
 // that only one value (besides its initializer) is ever stored to the global.
@@ -1568,7 +1570,7 @@
       GV->getInitializer()->isNullValue()) {
     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
       if (GV->getInitializer()->getType() != SOVC->getType())
-        SOVC = 
+        SOVC =
          ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
 
       // Optimize away any trapping uses of the loaded value.
@@ -1576,7 +1578,7 @@
         return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
       const Type* MallocType = getMallocAllocatedType(CI);
-      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 
+      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
                                                            GVI, TD))
         return true;
     }
@@ -1591,7 +1593,7 @@
 /// whenever it is used.  This exposes the values to other scalar optimizations.
 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   const Type *GVElType = GV->getType()->getElementType();
-  
+
   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
   // an FP value, pointer or vector, don't do this optimization because a select
   // between them is very expensive and unlikely to lead to later
@@ -1611,11 +1613,11 @@
   }
 
   DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
-  
+
   // Create the new global, initializing it to false.
   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
                                              false,
-                                             GlobalValue::InternalLinkage, 
+                                             GlobalValue::InternalLinkage,
                                         ConstantInt::getFalse(GV->getContext()),
                                              GV->getName()+".b",
                                              GV->isThreadLocal());
@@ -1716,11 +1718,11 @@
                    << GS.AccessingFunction->getName() << "\n");
     DEBUG(dbgs() << "  HasMultipleAccessingFunctions =  "
                  << GS.HasMultipleAccessingFunctions << "\n");
-    DEBUG(dbgs() << "  HasNonInstructionUser = " 
+    DEBUG(dbgs() << "  HasNonInstructionUser = "
                  << GS.HasNonInstructionUser<<"\n");
     DEBUG(dbgs() << "\n");
 #endif
-    
+
     // If this is a first class global and has only one accessing function
     // and this function is main (which we know is not recursive we can make
     // this global a local variable) we replace the global with a local alloca
@@ -1750,7 +1752,7 @@
       ++NumLocalized;
       return true;
     }
-    
+
     // If the global is never loaded (but may be stored to), it is dead.
     // Delete it now.
     if (!GS.isLoaded) {
@@ -1943,9 +1945,10 @@
       if (!FTy || !FTy->getReturnType()->isVoidTy() ||
           FTy->isVarArg() || FTy->getNumParams() != 0)
         return 0;
-      
-      // Verify that the initializer is simple enough for us to handle.
-      if (!I->hasDefinitiveInitializer()) return 0;
+
+      // Verify that the initializer is simple enough for us to handle. We are
+      // only allowed to optimize the initializer if it is unique.
+      if (!I->hasUniqueInitializer()) return 0;
       ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
       if (!CA) return 0;
       for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -1956,7 +1959,7 @@
           // Must have a function or null ptr.
           if (!isa<Function>(CS->getOperand(1)))
             return 0;
-          
+
           // Init priority must be standard.
           ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
           if (!CI || CI->getZExtValue() != 65535)
@@ -1964,7 +1967,7 @@
         } else {
           return 0;
         }
-      
+
       return I;
     }
   return 0;
@@ -1985,13 +1988,13 @@
 
 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
 /// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
                                           const std::vector<Function*> &Ctors) {
   // If we made a change, reassemble the initializer list.
   std::vector<Constant*> CSVals;
   CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
   CSVals.push_back(0);
-  
+
   // Create the new init list.
   std::vector<Constant*> CAList;
   for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
@@ -2007,26 +2010,26 @@
     }
     CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
   }
-  
+
   // Create the array initializer.
   const Type *StructTy =
       cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
-  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 
+  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
                                                    CAList.size()), CAList);
-  
+
   // If we didn't change the number of elements, don't create a new GV.
   if (CA->getType() == GCL->getInitializer()->getType()) {
     GCL->setInitializer(CA);
     return GCL;
   }
-  
+
   // Create the new global and insert it next to the existing list.
   GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
                                            GCL->getLinkage(), CA, "",
                                            GCL->isThreadLocal());
   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   NGV->takeName(GCL);
-  
+
   // Nuke the old list, replacing any uses with the new one.
   if (!GCL->use_empty()) {
     Constant *V = NGV;
@@ -2035,7 +2038,7 @@
     GCL->replaceAllUsesWith(V);
   }
   GCL->eraseFromParent();
-  
+
   if (Ctors.size())
     return NGV;
   else
@@ -2062,9 +2065,9 @@
     return false;
 
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-    // Do not allow weak/linkonce/dllimport/dllexport linkage or
+    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
     // external globals.
-    return GV->hasDefinitiveInitializer();
+    return GV->hasUniqueInitializer();
 
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     // Handle a constantexpr gep.
@@ -2072,13 +2075,13 @@
         isa<GlobalVariable>(CE->getOperand(0)) &&
         cast<GEPOperator>(CE)->isInBounds()) {
       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      // Do not allow weak/linkonce/dllimport/dllexport linkage or
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
       // external globals.
-      if (!GV->hasDefinitiveInitializer())
+      if (!GV->hasUniqueInitializer())
         return false;
 
       // The first index must be zero.
-      ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin()));
+      ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
       if (!CI || !CI->isZero()) return false;
 
       // The remaining indices must be compile-time known integers within the
@@ -2101,7 +2104,7 @@
     assert(Val->getType() == Init->getType() && "Type mismatch!");
     return Val;
   }
-  
+
   std::vector<Constant*> Elts;
   if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
 
@@ -2119,13 +2122,13 @@
       llvm_unreachable("This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
     }
-    
+
     // Replace the element that we are supposed to.
     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
     unsigned Idx = CU->getZExtValue();
     assert(Idx < STy->getNumElements() && "Struct index out of range!");
     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
-    
+
     // Return the modified struct.
     return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
                                STy->isPacked());
@@ -2138,8 +2141,8 @@
       NumElts = ATy->getNumElements();
     else
       NumElts = cast<VectorType>(InitTy)->getNumElements();
-    
-    
+
+
     // Break up the array into elements.
     if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
       for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -2154,16 +2157,16 @@
              " ConstantFoldLoadThroughGEPConstantExpr");
       Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
     }
-    
+
     assert(CI->getZExtValue() < NumElts);
     Elts[CI->getZExtValue()] =
       EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
-    
+
     if (Init->getType()->isArrayTy())
       return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
     else
       return ConstantVector::get(&Elts[0], Elts.size());
-  }    
+  }
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
@@ -2189,14 +2192,14 @@
   // is the most up-to-date.
   DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
   if (I != Memory.end()) return I->second;
- 
+
   // Access it.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
     if (GV->hasDefinitiveInitializer())
       return GV->getInitializer();
     return 0;
   }
-  
+
   // Handle a constantexpr getelementptr.
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
     if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -2221,12 +2224,12 @@
   // bail out.  TODO: we might want to accept limited recursion.
   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
     return false;
-  
+
   CallStack.push_back(F);
-  
+
   /// Values - As we compute SSA register values, we store their contents here.
   DenseMap<Value*, Constant*> Values;
-  
+
   // Initialize arguments to the incoming values specified.
   unsigned ArgNo = 0;
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
@@ -2237,14 +2240,14 @@
   /// we can only evaluate any one basic block at most once.  This set keeps
   /// track of what we have executed so we can detect recursive cases etc.
   SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-  
+
   // CurInst - The current instruction we're evaluating.
   BasicBlock::iterator CurInst = F->begin()->begin();
-  
+
   // This is the main evaluation loop.
   while (1) {
     Constant *InstResult = 0;
-    
+
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
       if (SI->isVolatile()) return false;  // no volatile accesses.
       Constant *Ptr = getVal(Values, SI->getOperand(1));
@@ -2290,7 +2293,7 @@
                                               GlobalValue::InternalLinkage,
                                               UndefValue::get(Ty),
                                               AI->getName()));
-      InstResult = AllocaTmps.back();     
+      InstResult = AllocaTmps.back();
     } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
 
       // Debug info can safely be ignored here.
@@ -2324,7 +2327,7 @@
       } else {
         if (Callee->getFunctionType()->isVarArg())
           return false;
-        
+
         Constant *RetVal;
         // Execute the call, if successful, use the return value.
         if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
@@ -2342,7 +2345,7 @@
             dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
           if (!Cond) return false;  // Cannot determine.
 
-          NewBB = BI->getSuccessor(!Cond->getZExtValue());          
+          NewBB = BI->getSuccessor(!Cond->getZExtValue());
         }
       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
         ConstantInt *Val =
@@ -2358,20 +2361,20 @@
       } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
         if (RI->getNumOperands())
           RetVal = getVal(Values, RI->getOperand(0));
-        
+
         CallStack.pop_back();  // return from fn.
         return true;  // We succeeded at evaluating this ctor!
       } else {
         // invoke, unwind, unreachable.
         return false;  // Cannot handle this terminator.
       }
-      
+
       // Okay, we succeeded in evaluating this control flow.  See if we have
       // executed the new block before.  If so, we have a looping function,
       // which we cannot evaluate in reasonable time.
       if (!ExecutedBlocks.insert(NewBB))
         return false;  // looped!
-      
+
       // Okay, we have never been in this block before.  Check to see if there
       // are any PHI nodes.  If so, evaluate them with information about where
       // we came from.
@@ -2387,10 +2390,10 @@
       // Did not know how to evaluate this!
       return false;
     }
-    
+
     if (!CurInst->use_empty())
       Values[CurInst] = InstResult;
-    
+
     // Advance program counter.
     ++CurInst;
   }
@@ -2408,7 +2411,7 @@
   /// to represent its body.  This vector is needed so we can delete the
   /// temporary globals when we are done.
   std::vector<GlobalVariable*> AllocaTmps;
-  
+
   /// CallStack - This is used to detect recursion.  In pathological situations
   /// we could hit exponential behavior, but at least there is nothing
   /// unbounded.
@@ -2428,13 +2431,13 @@
          E = MutatedMemory.end(); I != E; ++I)
       CommitValueTo(I->second, I->first);
   }
-  
+
   // At this point, we are done interpreting.  If we created any 'alloca'
   // temporaries, release them now.
   while (!AllocaTmps.empty()) {
     GlobalVariable *Tmp = AllocaTmps.back();
     AllocaTmps.pop_back();
-    
+
     // If there are still users of the alloca, the program is doing something
     // silly, e.g. storing the address of the alloca somewhere and using it
     // later.  Since this is undefined, we'll just make it be null.
@@ -2442,7 +2445,7 @@
       Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
     delete Tmp;
   }
-  
+
   return EvalSuccess;
 }
 
@@ -2454,7 +2457,7 @@
   std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
   bool MadeChange = false;
   if (Ctors.empty()) return false;
-  
+
   // Loop over global ctors, optimizing them when we can.
   for (unsigned i = 0; i != Ctors.size(); ++i) {
     Function *F = Ctors[i];
@@ -2467,10 +2470,10 @@
       }
       break;
     }
-    
+
     // We cannot simplify external ctor functions.
     if (F->empty()) continue;
-    
+
     // If we can evaluate the ctor at compile time, do.
     if (EvaluateStaticConstructor(F)) {
       Ctors.erase(Ctors.begin()+i);
@@ -2480,9 +2483,9 @@
       continue;
     }
   }
-  
+
   if (!MadeChange) return false;
-  
+
   GCL = InstallGlobalCtors(GCL, Ctors);
   return true;
 }
@@ -2546,21 +2549,21 @@
 
 bool GlobalOpt::runOnModule(Module &M) {
   bool Changed = false;
-  
+
   // Try to find the llvm.globalctors list.
   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
 
   bool LocalChange = true;
   while (LocalChange) {
     LocalChange = false;
-    
+
     // Delete functions that are trivially dead, ccc -> fastcc
     LocalChange |= OptimizeFunctions(M);
-    
+
     // Optimize global_ctors list.
     if (GlobalCtors)
       LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
-    
+
     // Optimize non-address-taken globals.
     LocalChange |= OptimizeGlobalVars(M);
 
@@ -2568,9 +2571,9 @@
     LocalChange |= OptimizeGlobalAliases(M);
     Changed |= LocalChange;
   }
-  
+
   // TODO: Move all global ctors functions to the end of the module for code
   // layout.
-  
+
   return Changed;
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
   ///
   struct IPCP : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    IPCP() : ModulePass(&ID) {}
+    IPCP() : ModulePass(ID) {
+      initializeIPCPPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M);
   private:
@@ -46,7 +48,7 @@
 
 char IPCP::ID = 0;
 INITIALIZE_PASS(IPCP, "ipconstprop",
-                "Interprocedural constant propagation", false, false);
+                "Interprocedural constant propagation", false, false)
 
 ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp Tue Oct 26 19:48:03 2010
@@ -7,17 +7,52 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMIPO.a, which implements
-// several transformations over the LLVM intermediate representation.
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMIPO.a, which implements several transformations over the LLVM 
+// intermediate representation.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Transforms/IPO.h"
 
 using namespace llvm;
 
+void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeArgPromotionPass(Registry);
+  initializeConstantMergePass(Registry);
+  initializeDAEPass(Registry);
+  initializeDAHPass(Registry);
+  initializeDTEPass(Registry);
+  initializeFunctionAttrsPass(Registry);
+  initializeGlobalDCEPass(Registry);
+  initializeGlobalOptPass(Registry);
+  initializeIPCPPass(Registry);
+  initializeAlwaysInlinerPass(Registry);
+  initializeSimpleInlinerPass(Registry);
+  initializeInternalizePassPass(Registry);
+  initializeLoopExtractorPass(Registry);
+  initializeBlockExtractorPassPass(Registry);
+  initializeSingleLoopExtractorPass(Registry);
+  initializeLowerSetJmpPass(Registry);
+  initializeMergeFunctionsPass(Registry);
+  initializePartialInlinerPass(Registry);
+  initializePartSpecPass(Registry);
+  initializePruneEHPass(Registry);
+  initializeStripDeadPrototypesPassPass(Registry);
+  initializeStripSymbolsPass(Registry);
+  initializeStripDebugDeclarePass(Registry);
+  initializeStripDeadDebugInfoPass(Registry);
+  initializeStripNonDebugSymbolsPass(Registry);
+  initializeSRETPromotionPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+  initializeIPO(*unwrap(R));
+}
+
 void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createArgumentPromotionPass());
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
     InlineCostAnalyzer CA;
   public:
     // Use extremely low threshold. 
-    AlwaysInliner() : Inliner(&ID, -2000000000) {}
+    AlwaysInliner() : Inliner(ID, -2000000000) {
+      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+    }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +63,11 @@
 }
 
 char AlwaysInliner::ID = 0;
-INITIALIZE_PASS(AlwaysInliner, "always-inline",
-                "Inliner for always_inline functions", false, false);
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
 
 Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp Tue Oct 26 19:48:03 2010
@@ -33,8 +33,12 @@
     SmallPtrSet<const Function*, 16> NeverInline; 
     InlineCostAnalyzer CA;
   public:
-    SimpleInliner() : Inliner(&ID) {}
-    SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {}
+    SimpleInliner() : Inliner(ID) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +60,11 @@
 }
 
 char SimpleInliner::ID = 0;
-INITIALIZE_PASS(SimpleInliner, "inline",
-                "Function Integration/Inlining", false, false);
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
 
 Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp Tue Oct 26 19:48:03 2010
@@ -48,10 +48,10 @@
 // Threshold to use when optsize is specified (and there is no -inline-limit).
 const int OptSizeThreshold = 75;
 
-Inliner::Inliner(void *ID) 
+Inliner::Inliner(char &ID) 
   : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
 
-Inliner::Inliner(void *ID, int Threshold) 
+Inliner::Inliner(char &ID, int Threshold) 
   : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
 
 /// getAnalysisUsage - For this class, we declare that we require and preserve

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp Tue Oct 26 19:48:03 2010
@@ -64,10 +64,11 @@
 
 char InternalizePass::ID = 0;
 INITIALIZE_PASS(InternalizePass, "internalize",
-                "Internalize Global Symbols", false, false);
+                "Internalize Global Symbols", false, false)
 
 InternalizePass::InternalizePass(bool AllButMain)
-  : ModulePass(&ID), AllButMain(AllButMain){
+  : ModulePass(ID), AllButMain(AllButMain){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   if (!APIFile.empty())           // If a filename is specified, use it.
     LoadFile(APIFile.c_str());
   if (!APIList.empty())           // If a list is specified, use it as well.
@@ -75,7 +76,8 @@
 }
 
 InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
-  : ModulePass(&ID), AllButMain(false){
+  : ModulePass(ID), AllButMain(false){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   for(std::vector<const char *>::const_iterator itr = exportList.begin();
         itr != exportList.end(); itr++) {
     ExternalNames.insert(*itr);

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp Tue Oct 26 19:48:03 2010
@@ -37,7 +37,9 @@
     unsigned NumLoops;
 
     explicit LoopExtractor(unsigned numLoops = ~0) 
-      : LoopPass(&ID), NumLoops(numLoops) {}
+      : LoopPass(ID), NumLoops(numLoops) {
+        initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -50,8 +52,13 @@
 }
 
 char LoopExtractor::ID = 0;
-INITIALIZE_PASS(LoopExtractor, "loop-extract",
-                "Extract loops into new functions", false, false);
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
 
 namespace {
   /// SingleLoopExtractor - For bugpoint.
@@ -63,7 +70,7 @@
 
 char SingleLoopExtractor::ID = 0;
 INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
-                "Extract at most one loop into a new function", false, false);
+                "Extract at most one loop into a new function", false, false)
 
 // createLoopExtractorPass - This pass extracts all natural loops from the
 // program into a function if it can.
@@ -147,7 +154,7 @@
     std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
   public:
     static char ID; // Pass identification, replacement for typeid
-    BlockExtractorPass() : ModulePass(&ID) {
+    BlockExtractorPass() : ModulePass(ID) {
       if (!BlockFile.empty())
         LoadFile(BlockFile.c_str());
     }
@@ -159,7 +166,7 @@
 char BlockExtractorPass::ID = 0;
 INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
                 "Extract Basic Blocks From Module (for bugpoint use)",
-                false, false);
+                false, false)
 
 // createBlockExtractorPass - This pass extracts all blocks (except those
 // specified in the argument list) from the functions in the module.

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp Tue Oct 26 19:48:03 2010
@@ -109,7 +109,9 @@
     bool IsTransformableFunction(StringRef Name);
   public:
     static char ID; // Pass identification, replacement for typeid
-    LowerSetJmp() : ModulePass(&ID) {}
+    LowerSetJmp() : ModulePass(ID) {
+      initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+    }
 
     void visitCallInst(CallInst& CI);
     void visitInvokeInst(InvokeInst& II);
@@ -122,7 +124,7 @@
 } // end anonymous namespace
 
 char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
 
 // run - Run the transformation on the program. We grab the function
 // prototypes for longjmp and setjmp. If they are used in the program,

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp Tue Oct 26 19:48:03 2010
@@ -29,44 +29,27 @@
 //
 // Many functions have their address taken by the virtual function table for
 // the object they belong to. However, as long as it's only used for a lookup
-// and call, this is irrelevant, and we'd like to fold such implementations.
+// and call, this is irrelevant, and we'd like to fold such functions.
 //
-// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
 //
-// The current implementation loops over a pair-wise comparison of all
-// functions in the program where the two functions in the pair are treated as
-// assumed to be equal until proven otherwise. We could both use fewer
-// comparisons and optimize more complex cases if we used strongly connected
-// components of the call graph.
-//
-// * be smarter about bitcast.
+// * be smarter about bitcasts.
 //
 // In order to fold functions, we will sometimes add either bitcast instructions
 // or bitcast constant expressions. Unfortunately, this can confound further
 // analysis since the two functions differ where one has a bitcast and the
-// other doesn't. We should learn to peer through bitcasts without imposing bad
-// performance properties.
-//
-// * don't emit aliases for Mach-O.
-//
-// Mach-O doesn't support aliases which means that we must avoid introducing
-// them in the bitcode on architectures which don't support them, such as
-// Mac OSX. There's a few approaches to this problem;
-//   a) teach codegen to lower global aliases to thunks on platforms which don't
-//      support them.
-//   b) always emit thunks, and create a separate thunk-to-alias pass which
-//      runs on ELF systems. This has the added benefit of transforming other
-//      thunks such as those produced by a C++ frontend into aliases when legal
-//      to do so.
+// other doesn't. We should learn to look through bitcasts.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
@@ -76,52 +59,21 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
-#include <map>
 #include <vector>
 using namespace llvm;
 
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
 
-namespace {
-  class MergeFunctions : public ModulePass {
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    MergeFunctions() : ModulePass(&ID) {}
-
-    bool runOnModule(Module &M);
-
-  private:
-    bool isEquivalentGEP(const GetElementPtrInst *GEP1,
-                         const GetElementPtrInst *GEP2);
-
-    bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
-    bool equals(const Function *F, const Function *G);
-
-    bool compare(const Value *V1, const Value *V2);
-
-    const Function *LHS, *RHS;
-    typedef DenseMap<const Value *, unsigned long> IDMap;
-    IDMap Map;
-    DenseMap<const Function *, IDMap> Domains;
-    DenseMap<const Function *, unsigned long> DomainCount;
-    TargetData *TD;
-  };
-}
-
-char MergeFunctions::ID = 0;
-INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
-
-ModulePass *llvm::createMergeFunctionsPass() {
-  return new MergeFunctions();
-}
-
-// ===----------------------------------------------------------------------===
-// Comparison of functions
-// ===----------------------------------------------------------------------===
-
-static unsigned long hash(const Function *F) {
+/// ProfileFunction - Creates a hash-code for the function which is the same
+/// for any two functions that will compare equal, without looking at the
+/// instructions inside the function.
+static unsigned ProfileFunction(const Function *F) {
   const FunctionType *FTy = F->getFunctionType();
 
   FoldingSetNodeID ID;
@@ -135,9 +87,159 @@
   return ID.ComputeHash();
 }
 
-/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
-/// type equivalence rules apply.
-static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+namespace {
+
+class ComparableFunction {
+public:
+  static const ComparableFunction EmptyKey;
+  static const ComparableFunction TombstoneKey;
+
+  ComparableFunction(Function *Func, TargetData *TD)
+    : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
+
+  Function *getFunc() const { return Func; }
+  unsigned getHash() const { return Hash; }
+  TargetData *getTD() const { return TD; }
+
+  // Drops AssertingVH reference to the function. Outside of debug mode, this
+  // does nothing.
+  void release() {
+    assert(Func &&
+           "Attempted to release function twice, or release empty/tombstone!");
+    Func = NULL;
+  }
+
+private:
+  explicit ComparableFunction(unsigned Hash)
+    : Func(NULL), Hash(Hash), TD(NULL) {}
+
+  AssertingVH<Function> Func;
+  unsigned Hash;
+  TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+    ComparableFunction(1);
+
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<ComparableFunction> {
+    static ComparableFunction getEmptyKey() {
+      return ComparableFunction::EmptyKey;
+    }
+    static ComparableFunction getTombstoneKey() {
+      return ComparableFunction::TombstoneKey;
+    }
+    static unsigned getHashValue(const ComparableFunction &CF) {
+      return CF.getHash();
+    }
+    static bool isEqual(const ComparableFunction &LHS,
+                        const ComparableFunction &RHS);
+  };
+}
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+  static char ID;
+  MergeFunctions() : ModulePass(ID) {
+    initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M);
+
+private:
+  typedef DenseSet<ComparableFunction> FnSetType;
+
+
+  /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+  /// equal to one that's already present.
+  bool Insert(FnSetType &FnSet, ComparableFunction &NewF);
+
+  /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
+  /// may be deleted, or may be converted into a thunk. In either case, it
+  /// should never be visited again.
+  void MergeTwoFunctions(Function *F, Function *G) const;
+
+  /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
+  /// replace direct uses of G with bitcast(F). Deletes G.
+  void WriteThunk(Function *F, Function *G) const;
+
+  TargetData *TD;
+};
+
+}  // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+  return new MergeFunctions();
+}
+
+namespace {
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+  FunctionComparator(const TargetData *TD, const Function *F1,
+                     const Function *F2)
+    : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+
+  /// Compare - test whether the two functions have equivalent behaviour.
+  bool Compare();
+
+private:
+  /// Compare - test whether two basic blocks have equivalent behaviour.
+  bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+  /// Enumerate - Assign or look up previously assigned numbers for the two
+  /// values, and return whether the numbers are equal. Numbers are assigned in
+  /// the order visited.
+  bool Enumerate(const Value *V1, const Value *V2);
+
+  /// isEquivalentOperation - Compare two Instructions for equivalence, similar
+  /// to Instruction::isSameOperationAs but with modifications to the type
+  /// comparison.
+  bool isEquivalentOperation(const Instruction *I1,
+                             const Instruction *I2) const;
+
+  /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+  bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+  bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+                       const GetElementPtrInst *GEP2) {
+    return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+  }
+
+  /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+  bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+
+  // The two functions undergoing comparison.
+  const Function *F1, *F2;
+
+  const TargetData *TD;
+
+  typedef DenseMap<const Value *, unsigned long> IDMap;
+  IDMap Map1, Map2;
+  unsigned long IDMap1Count, IDMap2Count;
+};
+}
+
+/// isEquivalentType - any two pointers in the same address space are
+/// equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+                                          const Type *Ty2) const {
   if (Ty1 == Ty2)
     return true;
   if (Ty1->getTypeID() != Ty2->getTypeID())
@@ -184,21 +286,6 @@
     return true;
   }
 
-  case Type::UnionTyID: {
-    const UnionType *UTy1 = cast<UnionType>(Ty1);
-    const UnionType *UTy2 = cast<UnionType>(Ty2);
-
-    // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
-    if (UTy1->getNumElements() != UTy2->getNumElements())
-      return false;
-
-    for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
-      if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
-        return false;
-    }
-    return true;
-  }
-
   case Type::FunctionTyID: {
     const FunctionType *FTy1 = cast<FunctionType>(Ty1);
     const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -222,6 +309,7 @@
     return ATy1->getNumElements() == ATy2->getNumElements() &&
            isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
   }
+
   case Type::VectorTyID: {
     const VectorType *VTy1 = cast<VectorType>(Ty1);
     const VectorType *VTy2 = cast<VectorType>(Ty2);
@@ -234,8 +322,8 @@
 /// isEquivalentOperation - determine whether the two operations are the same
 /// except that pointer-to-A and pointer-to-B are equivalent. This should be
 /// kept in sync with Instruction::isSameOperationAs.
-static bool
-isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+                                               const Instruction *I2) const {
   if (I1->getOpcode() != I2->getOpcode() ||
       I1->getNumOperands() != I2->getNumOperands() ||
       !isEquivalentType(I1->getType(), I2->getType()) ||
@@ -287,18 +375,15 @@
   return true;
 }
 
-bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
-                                     const GetElementPtrInst *GEP2) {
+/// isEquivalentGEP - determine whether two GEP operations perform the same
+/// underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+                                         const GEPOperator *GEP2) {
+  // When we have target data, we can reduce the GEP down to the value in bytes
+  // added to the address.
   if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
-    SmallVector<Value *, 8> Indices1, Indices2;
-    for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
-           E = GEP1->idx_end(); I != E; ++I) {
-      Indices1.push_back(*I);
-    }
-    for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
-           E = GEP2->idx_end(); I != E; ++I) {
-      Indices2.push_back(*I);
-    }
+    SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+    SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
     uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
                                             Indices1.data(), Indices1.size());
     uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
@@ -306,7 +391,6 @@
     return Offset1 == Offset2;
   }
 
-  // Equivalent types aren't enough.
   if (GEP1->getPointerOperand()->getType() !=
       GEP2->getPointerOperand()->getType())
     return false;
@@ -315,19 +399,26 @@
     return false;
 
   for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
-    if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+    if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
       return false;
   }
 
   return true;
 }
 
-bool MergeFunctions::compare(const Value *V1, const Value *V2) {
-  if (V1 == LHS || V1 == RHS)
-    if (V2 == LHS || V2 == RHS)
-      return true;
+/// Enumerate - Compare two values used by the two functions under pair-wise
+/// comparison. If this is the first time the values are seen, they're added to
+/// the mapping so that we will detect mismatches on next use.
+bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+  // Check for function @f1 referring to itself and function @f2 referring to
+  // itself, or referring to each other, or both referring to either of them.
+  // They're all equivalent if the two functions are otherwise equivalent.
+  if (V1 == F1 && V2 == F2)
+    return true;
+  if (V1 == F2 && V2 == F1)
+    return true;
 
-  // TODO: constant expressions in terms of LHS and RHS
+  // TODO: constant expressions with GEP or references to F1 or F2.
   if (isa<Constant>(V1))
     return V1 == V2;
 
@@ -338,228 +429,138 @@
            IA1->getConstraintString() == IA2->getConstraintString();
   }
 
-  // We enumerate constants globally and arguments, basic blocks or
-  // instructions within the function they belong to.
-  const Function *Domain1 = NULL;
-  if (const Argument *A = dyn_cast<Argument>(V1)) {
-    Domain1 = A->getParent();
-  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
-    Domain1 = BB->getParent();
-  } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
-    Domain1 = I->getParent()->getParent();
-  }
-
-  const Function *Domain2 = NULL;
-  if (const Argument *A = dyn_cast<Argument>(V2)) {
-    Domain2 = A->getParent();
-  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
-    Domain2 = BB->getParent();
-  } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
-    Domain2 = I->getParent()->getParent();
-  }
-
-  if (Domain1 != Domain2)
-    if (Domain1 != LHS && Domain1 != RHS)
-      if (Domain2 != LHS && Domain2 != RHS)
-        return false;
-
-  IDMap &Map1 = Domains[Domain1];
   unsigned long &ID1 = Map1[V1];
   if (!ID1)
-    ID1 = ++DomainCount[Domain1];
+    ID1 = ++IDMap1Count;
 
-  IDMap &Map2 = Domains[Domain2];
   unsigned long &ID2 = Map2[V2];
   if (!ID2)
-    ID2 = ++DomainCount[Domain2];
+    ID2 = ++IDMap2Count;
 
   return ID1 == ID2;
 }
 
-bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
-  BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
-  BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+/// Compare - test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+  BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+  BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
 
   do {
-    if (!compare(FI, GI))
+    if (!Enumerate(F1I, F2I))
       return false;
 
-    if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
-      const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
-      const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+    if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+      const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+      if (!GEP2)
+        return false;
 
-      if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+      if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
         return false;
 
       if (!isEquivalentGEP(GEP1, GEP2))
         return false;
     } else {
-      if (!isEquivalentOperation(FI, GI))
+      if (!isEquivalentOperation(F1I, F2I))
         return false;
 
-      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-        Value *OpF = FI->getOperand(i);
-        Value *OpG = GI->getOperand(i);
+      assert(F1I->getNumOperands() == F2I->getNumOperands());
+      for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+        Value *OpF1 = F1I->getOperand(i);
+        Value *OpF2 = F2I->getOperand(i);
 
-        if (!compare(OpF, OpG))
+        if (!Enumerate(OpF1, OpF2))
           return false;
 
-        if (OpF->getValueID() != OpG->getValueID() ||
-            !isEquivalentType(OpF->getType(), OpG->getType()))
+        if (OpF1->getValueID() != OpF2->getValueID() ||
+            !isEquivalentType(OpF1->getType(), OpF2->getType()))
           return false;
       }
     }
 
-    ++FI, ++GI;
-  } while (FI != FE && GI != GE);
+    ++F1I, ++F2I;
+  } while (F1I != F1E && F2I != F2E);
 
-  return FI == FE && GI == GE;
+  return F1I == F1E && F2I == F2E;
 }
 
-bool MergeFunctions::equals(const Function *F, const Function *G) {
+/// Compare - test whether the two functions have equivalent behaviour.
+bool FunctionComparator::Compare() {
   // We need to recheck everything, but check the things that weren't included
   // in the hash first.
 
-  if (F->getAttributes() != G->getAttributes())
+  if (F1->getAttributes() != F2->getAttributes())
     return false;
 
-  if (F->hasGC() != G->hasGC())
+  if (F1->hasGC() != F2->hasGC())
     return false;
 
-  if (F->hasGC() && F->getGC() != G->getGC())
+  if (F1->hasGC() && F1->getGC() != F2->getGC())
     return false;
 
-  if (F->hasSection() != G->hasSection())
+  if (F1->hasSection() != F2->hasSection())
     return false;
 
-  if (F->hasSection() && F->getSection() != G->getSection())
+  if (F1->hasSection() && F1->getSection() != F2->getSection())
     return false;
 
-  if (F->isVarArg() != G->isVarArg())
+  if (F1->isVarArg() != F2->isVarArg())
     return false;
 
   // TODO: if it's internal and only used in direct calls, we could handle this
   // case too.
-  if (F->getCallingConv() != G->getCallingConv())
+  if (F1->getCallingConv() != F2->getCallingConv())
     return false;
 
-  if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
+  if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
     return false;
 
-  assert(F->arg_size() == G->arg_size() &&
-         "Identical functions have a different number of args.");
-
-  LHS = F;
-  RHS = G;
+  assert(F1->arg_size() == F2->arg_size() &&
+         "Identically typed functions have different numbers of args!");
 
   // Visit the arguments so that they get enumerated in the order they're
   // passed in.
-  for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
-         fe = F->arg_end(); fi != fe; ++fi, ++gi) {
-    if (!compare(fi, gi))
-      llvm_unreachable("Arguments repeat");
-  }
-
-  SmallVector<const BasicBlock *, 8> FBBs, GBBs;
-  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
-  FBBs.push_back(&F->getEntryBlock());
-  GBBs.push_back(&G->getEntryBlock());
-  VisitedBBs.insert(FBBs[0]);
-  while (!FBBs.empty()) {
-    const BasicBlock *FBB = FBBs.pop_back_val();
-    const BasicBlock *GBB = GBBs.pop_back_val();
-    if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
-      Domains.clear();
-      DomainCount.clear();
-      return false;
-    }
-    const TerminatorInst *FTI = FBB->getTerminator();
-    const TerminatorInst *GTI = GBB->getTerminator();
-    assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
-    for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
-      if (!VisitedBBs.insert(FTI->getSuccessor(i)))
-        continue;
-      FBBs.push_back(FTI->getSuccessor(i));
-      GBBs.push_back(GTI->getSuccessor(i));
-    }
+  for (Function::const_arg_iterator f1i = F1->arg_begin(),
+         f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+    if (!Enumerate(f1i, f2i))
+      llvm_unreachable("Arguments repeat!");
   }
 
-  Domains.clear();
-  DomainCount.clear();
-  return true;
-}
+  // We do a CFG-ordered walk since the actual ordering of the blocks in the
+  // linked list is immaterial. Our walk starts at the entry block for both
+  // functions, then takes each block from each terminator in order. As an
+  // artifact, this also means that unreachable blocks are ignored.
+  SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
 
-// ===----------------------------------------------------------------------===
-// Folding of functions
-// ===----------------------------------------------------------------------===
-
-// Cases:
-// * F is external strong, G is external strong:
-//   turn G into a thunk to F    (1)
-// * F is external strong, G is external weak:
-//   turn G into a thunk to F    (1)
-// * F is external weak, G is external weak:
-//   unfoldable
-// * F is external strong, G is internal:
-//   address of G taken:
-//     turn G into a thunk to F  (1)
-//   address of G not taken:
-//     make G an alias to F      (2)
-// * F is internal, G is external weak
-//   address of F is taken:
-//     turn G into a thunk to F  (1)
-//   address of F is not taken:
-//     make G an alias of F      (2)
-// * F is internal, G is internal:
-//   address of F and G are taken:
-//     turn G into a thunk to F  (1)
-//   address of G is not taken:
-//     make G an alias to F      (2)
-//
-// alias requires linkage == (external,local,weak) fallback to creating a thunk
-// external means 'externally visible' linkage != (internal,private)
-// internal means linkage == (internal,private)
-// weak means linkage mayBeOverridable
-// being external implies that the address is taken
-//
-// 1. turn G into a thunk to F
-// 2. make G an alias to F
+  F1BBs.push_back(&F1->getEntryBlock());
+  F2BBs.push_back(&F2->getEntryBlock());
 
-enum LinkageCategory {
-  ExternalStrong,
-  ExternalWeak,
-  Internal
-};
+  VisitedBBs.insert(F1BBs[0]);
+  while (!F1BBs.empty()) {
+    const BasicBlock *F1BB = F1BBs.pop_back_val();
+    const BasicBlock *F2BB = F2BBs.pop_back_val();
 
-static LinkageCategory categorize(const Function *F) {
-  switch (F->getLinkage()) {
-  case GlobalValue::InternalLinkage:
-  case GlobalValue::PrivateLinkage:
-  case GlobalValue::LinkerPrivateLinkage:
-    return Internal;
-
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:
-  case GlobalValue::ExternalWeakLinkage:
-  case GlobalValue::LinkerPrivateWeakLinkage:
-    return ExternalWeak;
-
-  case GlobalValue::ExternalLinkage:
-  case GlobalValue::AvailableExternallyLinkage:
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage:
-  case GlobalValue::AppendingLinkage:
-  case GlobalValue::DLLImportLinkage:
-  case GlobalValue::DLLExportLinkage:
-  case GlobalValue::CommonLinkage:
-    return ExternalStrong;
-  }
+    if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+      return false;
 
-  llvm_unreachable("Unknown LinkageType.");
-  return ExternalWeak;
+    const TerminatorInst *F1TI = F1BB->getTerminator();
+    const TerminatorInst *F2TI = F2BB->getTerminator();
+
+    assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+    for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+        continue;
+
+      F1BBs.push_back(F1TI->getSuccessor(i));
+      F2BBs.push_back(F2TI->getSuccessor(i));
+    }
+  }
+  return true;
 }
 
-static void ThunkGToF(Function *F, Function *G) {
+/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
+/// direct uses of G with bitcast(F). Deletes G.
+void MergeFunctions::WriteThunk(Function *F, Function *G) const {
   if (!G->mayBeOverridden()) {
     // Redirect direct callers of G to F.
     Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
@@ -573,188 +574,212 @@
     }
   }
 
+  // If G was internal then we may have replaced all uses of G with F. If so,
+  // stop here and delete G. There's no need for a thunk.
+  if (G->hasLocalLinkage() && G->use_empty()) {
+    G->eraseFromParent();
+    return;
+  }
+
   Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
                                     G->getParent());
   BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+  IRBuilder<false> Builder(BB);
 
   SmallVector<Value *, 16> Args;
   unsigned i = 0;
   const FunctionType *FFTy = F->getFunctionType();
   for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
        AI != AE; ++AI) {
-    if (FFTy->getParamType(i) == AI->getType()) {
-      Args.push_back(AI);
-    } else {
-      Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
-    }
+    Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
     ++i;
   }
 
-  CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+  CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
   if (NewG->getReturnType()->isVoidTy()) {
-    ReturnInst::Create(F->getContext(), BB);
-  } else if (CI->getType() != NewG->getReturnType()) {
-    Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
-    ReturnInst::Create(F->getContext(), BCI, BB);
+    Builder.CreateRetVoid();
   } else {
-    ReturnInst::Create(F->getContext(), CI, BB);
+    Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
   }
 
   NewG->copyAttributesFrom(G);
   NewG->takeName(G);
   G->replaceAllUsesWith(NewG);
   G->eraseFromParent();
-}
 
-static void AliasGToF(Function *F, Function *G) {
-  // Darwin will trigger llvm_unreachable if asked to codegen an alias.
-  return ThunkGToF(F, G);
-
-#if 0
-  if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
-    return ThunkGToF(F, G);
-
-  GlobalAlias *GA = new GlobalAlias(
-    G->getType(), G->getLinkage(), "",
-    ConstantExpr::getBitCast(F, G->getType()), G->getParent());
-  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-  GA->takeName(G);
-  GA->setVisibility(G->getVisibility());
-  G->replaceAllUsesWith(GA);
-  G->eraseFromParent();
-#endif
+  DEBUG(dbgs() << "WriteThunk: " << NewG->getName() << '\n');
+  ++NumThunksWritten;
 }
 
-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
-  Function *F = FnVec[i];
-  Function *G = FnVec[j];
-
-  LinkageCategory catF = categorize(F);
-  LinkageCategory catG = categorize(G);
-
-  if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
-    std::swap(FnVec[i], FnVec[j]);
-    std::swap(F, G);
-    std::swap(catF, catG);
-  }
-
-  switch (catF) {
-  case ExternalStrong:
-    switch (catG) {
-    case ExternalStrong:
-    case ExternalWeak:
-      ThunkGToF(F, G);
-      break;
-    case Internal:
-      if (G->hasAddressTaken())
-        ThunkGToF(F, G);
-      else
-        AliasGToF(F, G);
-      break;
-    }
-    break;
-
-  case ExternalWeak: {
-    assert(catG == ExternalWeak);
+/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
+/// Function G is deleted.
+void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
+  if (F->mayBeOverridden()) {
+    assert(G->mayBeOverridden());
 
     // Make them both thunks to the same internal function.
-    F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
     Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
                                    F->getParent());
     H->copyAttributesFrom(F);
     H->takeName(F);
     F->replaceAllUsesWith(H);
 
-    ThunkGToF(F, G);
-    ThunkGToF(F, H);
+    unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
 
-    F->setLinkage(GlobalValue::InternalLinkage);
-  } break;
+    WriteThunk(F, G);
+    WriteThunk(F, H);
 
-  case Internal:
-    switch (catG) {
-    case ExternalStrong:
-      llvm_unreachable(0);
-      // fall-through
-    case ExternalWeak:
-      if (F->hasAddressTaken())
-        ThunkGToF(F, G);
-      else
-        AliasGToF(F, G);
-      break;
-    case Internal: {
-      bool addrTakenF = F->hasAddressTaken();
-      bool addrTakenG = G->hasAddressTaken();
-      if (!addrTakenF && addrTakenG) {
-        std::swap(FnVec[i], FnVec[j]);
-        std::swap(F, G);
-        std::swap(addrTakenF, addrTakenG);
-      }
+    F->setAlignment(MaxAlignment);
+    F->setLinkage(GlobalValue::InternalLinkage);
 
-      if (addrTakenF && addrTakenG) {
-        ThunkGToF(F, G);
-      } else {
-        assert(!addrTakenG);
-        AliasGToF(F, G);
-      }
-    } break;
-  } break;
+    ++NumDoubleWeak;
+  } else {
+    WriteThunk(F, G);
   }
 
   ++NumFunctionsMerged;
-  return true;
 }
 
-// ===----------------------------------------------------------------------===
-// Pass definition
-// ===----------------------------------------------------------------------===
+// Insert - Insert a ComparableFunction into the FnSet, or merge it away if
+// equal to one that's already inserted.
+bool MergeFunctions::Insert(FnSetType &FnSet, ComparableFunction &NewF) {
+  std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+  if (Result.second)
+    return false;
 
-bool MergeFunctions::runOnModule(Module &M) {
-  bool Changed = false;
+  const ComparableFunction &OldF = *Result.first;
 
-  std::map<unsigned long, std::vector<Function *> > FnMap;
+  // Never thunk a strong function to a weak function.
+  assert(!OldF.getFunc()->mayBeOverridden() ||
+         NewF.getFunc()->mayBeOverridden());
 
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration())
-      continue;
+  DEBUG(dbgs() << "  " << OldF.getFunc()->getName() << " == "
+               << NewF.getFunc()->getName() << '\n');
 
-    FnMap[hash(F)].push_back(F);
-  }
+  Function *DeleteF = NewF.getFunc();
+  NewF.release();
+  MergeTwoFunctions(OldF.getFunc(), DeleteF);
+  return true;
+}
+
+// IsThunk - This method determines whether or not a given Function is a thunk\// like the ones emitted by this pass and therefore not subject to further
+// merging.
+static bool IsThunk(const Function *F) {
+  // The safe direction to fail is to return true. In that case, the function
+  // will be removed from merging analysis. If we failed to including functions
+  // then we may try to merge unmergable thing (ie., identical weak functions)
+  // which will push us into an infinite loop.
+
+  assert(!F->isDeclaration() && "Expected a function definition.");
+
+  const BasicBlock *BB = &F->front();
+  // A thunk is:
+  //   bitcast-inst*
+  //   optional-reg tail call @thunkee(args...*)
+  //   ret void|optional-reg
+  // where the args are in the same order as the arguments.
+
+  // Put this at the top since it triggers most often.
+  const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+  if (!RI) return false;
+
+  // Verify that the sequence of bitcast-inst's are all casts of arguments and
+  // that there aren't any extras (ie. no repeated casts).
+  int LastArgNo = -1;
+  BasicBlock::const_iterator I = BB->begin();
+  while (const BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+    const Argument *A = dyn_cast<Argument>(BCI->getOperand(0));
+    if (!A) return false;
+    if ((int)A->getArgNo() <= LastArgNo) return false;
+    LastArgNo = A->getArgNo();
+    ++I;
+  }
+
+  // Verify that we have a direct tail call and that the calling conventions
+  // and number of arguments match.
+  const CallInst *CI = dyn_cast<CallInst>(I++);
+  if (!CI || !CI->isTailCall() || !CI->getCalledFunction() || 
+      CI->getCallingConv() != CI->getCalledFunction()->getCallingConv() ||
+      CI->getNumArgOperands() != F->arg_size())
+    return false;
+
+  // Verify that the call instruction has the same arguments as this function
+  // and that they're all either the incoming argument or a cast of the right
+  // argument.
+  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+    const Value *V = CI->getArgOperand(i);
+    const Argument *A = dyn_cast<Argument>(V);
+    if (!A) {
+      const BitCastInst *BCI = dyn_cast<BitCastInst>(V);
+      if (!BCI) return false;
+      A = cast<Argument>(BCI->getOperand(0));
+    }
+    if (A->getArgNo() != i) return false;
+  }
+
+  // Verify that the terminator is a ret void (if we're void) or a ret of the
+  // call's return, or a ret of a bitcast of the call's return.
+  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+    ++I;
+    if (BCI->getOperand(0) != CI) return false;
+  }
+  if (RI != I) return false;
+  if (RI->getNumOperands() == 0)
+    return CI->getType()->isVoidTy();
+  return RI->getReturnValue() == CI;
+}
 
+bool MergeFunctions::runOnModule(Module &M) {
+  bool Changed = false;
   TD = getAnalysisIfAvailable<TargetData>();
 
   bool LocalChanged;
   do {
+    DEBUG(dbgs() << "size of module: " << M.size() << '\n');
     LocalChanged = false;
-    DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
-    for (std::map<unsigned long, std::vector<Function *> >::iterator
-           I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
-      std::vector<Function *> &FnVec = I->second;
-      DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
-
-      for (int i = 0, e = FnVec.size(); i != e; ++i) {
-        for (int j = i + 1; j != e; ++j) {
-          bool isEqual = equals(FnVec[i], FnVec[j]);
-
-          DEBUG(dbgs() << "  " << FnVec[i]->getName()
-                << (isEqual ? " == " : " != ")
-                << FnVec[j]->getName() << "\n");
-
-          if (isEqual) {
-            if (fold(FnVec, i, j)) {
-              LocalChanged = true;
-              FnVec.erase(FnVec.begin() + j);
-              --j, --e;
-            }
-          }
-        }
+    FnSetType FnSet;
+
+    // Insert only strong functions and merge them. Strong function merging
+    // always deletes one of them.
+    for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+      Function *F = I++;
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          !F->mayBeOverridden() && !IsThunk(F)) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        LocalChanged |= Insert(FnSet, CF);
       }
+    }
 
+    // Insert only weak functions and merge them. By doing these second we
+    // create thunks to the strong function when possible. When two weak
+    // functions are identical, we create a new strong function with two weak
+    // weak thunks to it which are identical but not mergable.
+    for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+      Function *F = I++;
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          F->mayBeOverridden() && !IsThunk(F)) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        LocalChanged |= Insert(FnSet, CF);
+      }
     }
+    DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
     Changed |= LocalChanged;
   } while (LocalChanged);
 
   return Changed;
 }
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+                                               const ComparableFunction &RHS) {
+  if (LHS.getFunc() == RHS.getFunc() &&
+      LHS.getHash() == RHS.getHash())
+    return true;
+  if (!LHS.getFunc() || !RHS.getFunc())
+    return false;
+  assert(LHS.getTD() == RHS.getTD() &&
+         "Comparing functions for different targets");
+  return FunctionComparator(LHS.getTD(),
+                            LHS.getFunc(), RHS.getFunc()).Compare();
+}

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp Tue Oct 26 19:48:03 2010
@@ -30,7 +30,9 @@
   struct PartialInliner : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
     static char ID; // Pass identification, replacement for typeid
-    PartialInliner() : ModulePass(&ID) {}
+    PartialInliner() : ModulePass(ID) {
+      initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+    }
     
     bool runOnModule(Module& M);
     
@@ -41,7 +43,7 @@
 
 char PartialInliner::ID = 0;
 INITIALIZE_PASS(PartialInliner, "partial-inliner",
-                "Partial Inliner", false, false);
+                "Partial Inliner", false, false)
 
 ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
 
@@ -67,8 +69,9 @@
     return 0;
   
   // Clone the function, so that we can hack away on it.
-  ValueMap<const Value*, Value*> VMap;
-  Function* duplicateFunction = CloneFunction(F, VMap);
+  ValueToValueMapTy VMap;
+  Function* duplicateFunction = CloneFunction(F, VMap,
+                                              /*ModuleLevelChanges=*/false);
   duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
   F->getParent()->getFunctionList().push_back(duplicateFunction);
   BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp Tue Oct 26 19:48:03 2010
@@ -25,6 +25,7 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/ADT/DenseSet.h"
@@ -37,47 +38,49 @@
 // Maximum number of arguments markable interested
 static const int MaxInterests = 6;
 
-// Call must be used at least occasionally
-static const int CallsMin = 5;
-
-// Must have 10% of calls having the same constant to specialize on
-static const double ConstValPercent = .1;
-
 namespace {
   typedef SmallVector<int, MaxInterests> InterestingArgVector;
   class PartSpec : public ModulePass {
     void scanForInterest(Function&, InterestingArgVector&);
     int scanDistribution(Function&, int, std::map<Constant*, int>&);
+    InlineCostAnalyzer CA;
   public :
     static char ID; // Pass identification, replacement for typeid
-    PartSpec() : ModulePass(&ID) {}
+    PartSpec() : ModulePass(ID) {
+      initializePartSpecPass(*PassRegistry::getPassRegistry());
+    }
     bool runOnModule(Module &M);
   };
 }
 
 char PartSpec::ID = 0;
 INITIALIZE_PASS(PartSpec, "partialspecialization",
-                "Partial Specialization", false, false);
+                "Partial Specialization", false, false)
 
 // Specialize F by replacing the arguments (keys) in replacements with the 
 // constants (values).  Replace all calls to F with those constants with
 // a call to the specialized function.  Returns the specialized function
 static Function* 
 SpecializeFunction(Function* F, 
-                   ValueMap<const Value*, Value*>& replacements) {
+                   ValueToValueMapTy& replacements) {
   // arg numbers of deleted arguments
   DenseMap<unsigned, const Argument*> deleted;
-  for (ValueMap<const Value*, Value*>::iterator 
+  for (ValueToValueMapTy::iterator 
          repb = replacements.begin(), repe = replacements.end();
        repb != repe; ++repb) {
     Argument const *arg = cast<const Argument>(repb->first);
     deleted[arg->getArgNo()] = arg;
   }
 
-  Function* NF = CloneFunction(F, replacements);
+  Function* NF = CloneFunction(F, replacements,
+                               /*ModuleLevelChanges=*/false);
   NF->setLinkage(GlobalValue::InternalLinkage);
   F->getParent()->getFunctionList().push_back(NF);
 
+  // FIXME: Specialized versions getting the same constants should also get
+  // the same name.  That way, specializations for public functions can be
+  // marked linkonce_odr and reused across modules.
+
   for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); 
        ii != ee; ) {
     Value::use_iterator i = ii;
@@ -148,22 +151,37 @@
     bool breakOuter = false;
     for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
       std::map<Constant*, int> distribution;
-      int total = scanDistribution(F, interestingArgs[x], distribution);
-      if (total > CallsMin) 
-        for (std::map<Constant*, int>::iterator ii = distribution.begin(),
-               ee = distribution.end(); ii != ee; ++ii)
-          if (total > ii->second && ii->first &&
-               ii->second > total * ConstValPercent) {
-            ValueMap<const Value*, Value*> m;
-            Function::arg_iterator arg = F.arg_begin();
-            for (int y = 0; y < interestingArgs[x]; ++y)
-              ++arg;
-            m[&*arg] = ii->first;
-            SpecializeFunction(&F, m);
-            ++numSpecialized;
-            breakOuter = true;
-            Changed = true;
-          }
+      scanDistribution(F, interestingArgs[x], distribution);
+      for (std::map<Constant*, int>::iterator ii = distribution.begin(),
+             ee = distribution.end(); ii != ee; ++ii) {
+        // The distribution map might have an entry for NULL (i.e., one or more
+        // callsites were passing a non-constant there).  We allow that to 
+        // happen so that we can see whether any callsites pass a non-constant; 
+        // if none do and the function is internal, we might have an opportunity
+        // to kill the original function.
+        if (!ii->first) continue;
+        int bonus = ii->second;
+        SmallVector<unsigned, 1> argnos;
+        argnos.push_back(interestingArgs[x]);
+        InlineCost cost = CA.getSpecializationCost(&F, argnos);
+        // FIXME: If this is the last constant entry, and no non-constant
+        // entries exist, and the target function is internal, the cost should
+        // be reduced by the original size of the target function, almost
+        // certainly making it negative and causing a specialization that will
+        // leave the original function dead and removable.
+        if (cost.isAlways() || 
+           (cost.isVariable() && cost.getValue() < bonus)) {
+          ValueToValueMapTy m;
+          Function::arg_iterator arg = F.arg_begin();
+          for (int y = 0; y < interestingArgs[x]; ++y)
+            ++arg;
+          m[&*arg] = ii->first;
+          SpecializeFunction(&F, m);
+          ++numSpecialized;
+          breakOuter = true;
+          Changed = true;
+        }
+      }
     }
   }
   return Changed;
@@ -174,28 +192,20 @@
 void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
   for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
       ii != ee; ++ii) {
-    for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
-        ui != ue; ++ui) {
-
-      bool interesting = false;
-      User *U = *ui;
-      if (isa<CmpInst>(U)) interesting = true;
-      else if (isa<CallInst>(U))
-        interesting = ui->getOperand(0) == ii;
-      else if (isa<InvokeInst>(U))
-        interesting = ui->getOperand(0) == ii;
-      else if (isa<SwitchInst>(U)) interesting = true;
-      else if (isa<BranchInst>(U)) interesting = true;
-
-      if (interesting) {
-        args.push_back(std::distance(F.arg_begin(), ii));
-        break;
-      }
+    int argno = std::distance(F.arg_begin(), ii);
+    SmallVector<unsigned, 1> argnos;
+    argnos.push_back(argno);
+    int bonus = CA.getSpecializationBonus(&F, argnos);
+    if (bonus > 0) {
+      args.push_back(argno);
     }
   }
 }
 
 /// scanDistribution - Construct a histogram of constants for arg of F at arg.
+/// For each distinct constant, we'll compute the total of the specialization
+/// bonus across all callsites passing that constant; if that total exceeds
+/// the specialization cost, we will create the specialization.
 int PartSpec::scanDistribution(Function& F, int arg, 
                                std::map<Constant*, int>& dist) {
   bool hasIndirect = false;
@@ -205,7 +215,10 @@
     User *U = *ii;
     CallSite CS(U);
     if (CS && CS.getCalledFunction() == &F) {
-      ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
+      SmallVector<unsigned, 1> argnos;
+      argnos.push_back(arg);
+      dist[dyn_cast<Constant>(CS.getArgument(arg))] += 
+           CA.getSpecializationBonus(&F, argnos);
       ++total;
     } else
       hasIndirect = true;

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp Tue Oct 26 19:48:03 2010
@@ -37,7 +37,9 @@
 namespace {
   struct PruneEH : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
-    PruneEH() : CallGraphSCCPass(&ID) {}
+    PruneEH() : CallGraphSCCPass(ID) {
+      initializePruneEHPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
     bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +50,11 @@
 }
 
 char PruneEH::ID = 0;
-INITIALIZE_PASS(PruneEH, "prune-eh",
-                "Remove unused exception handling info", false, false);
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
 
 Pass *llvm::createPruneEHPass() { return new PruneEH(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp Tue Oct 26 19:48:03 2010
@@ -29,7 +29,9 @@
 class StripDeadPrototypesPass : public ModulePass {
 public:
   static char ID; // Pass identification, replacement for typeid
-  StripDeadPrototypesPass() : ModulePass(&ID) { }
+  StripDeadPrototypesPass() : ModulePass(ID) {
+    initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+  }
   virtual bool runOnModule(Module &M);
 };
 
@@ -37,7 +39,7 @@
 
 char StripDeadPrototypesPass::ID = 0;
 INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
-                "Strip Unused Function Prototypes", false, false);
+                "Strip Unused Function Prototypes", false, false)
 
 bool StripDeadPrototypesPass::runOnModule(Module &M) {
   bool MadeChange = false;

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp Tue Oct 26 19:48:03 2010
@@ -39,7 +39,9 @@
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripSymbols(bool ODI = false) 
-      : ModulePass(&ID), OnlyDebugInfo(ODI) {}
+      : ModulePass(ID), OnlyDebugInfo(ODI) {
+        initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -52,7 +54,9 @@
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripNonDebugSymbols()
-      : ModulePass(&ID) {}
+      : ModulePass(ID) {
+        initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -65,7 +69,9 @@
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripDebugDeclare()
-      : ModulePass(&ID) {}
+      : ModulePass(ID) {
+        initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -78,7 +84,9 @@
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripDeadDebugInfo()
-      : ModulePass(&ID) {}
+      : ModulePass(ID) {
+        initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -90,7 +98,7 @@
 
 char StripSymbols::ID = 0;
 INITIALIZE_PASS(StripSymbols, "strip",
-                "Strip all symbols from a module", false, false);
+                "Strip all symbols from a module", false, false)
 
 ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
   return new StripSymbols(OnlyDebugInfo);
@@ -99,7 +107,7 @@
 char StripNonDebugSymbols::ID = 0;
 INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
                 "Strip all symbols, except dbg symbols, from a module",
-                false, false);
+                false, false)
 
 ModulePass *llvm::createStripNonDebugSymbolsPass() {
   return new StripNonDebugSymbols();
@@ -107,7 +115,7 @@
 
 char StripDebugDeclare::ID = 0;
 INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
-                "Strip all llvm.dbg.declare intrinsics", false, false);
+                "Strip all llvm.dbg.declare intrinsics", false, false)
 
 ModulePass *llvm::createStripDebugDeclarePass() {
   return new StripDebugDeclare();
@@ -115,7 +123,7 @@
 
 char StripDeadDebugInfo::ID = 0;
 INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
-                "Strip debug info for unused symbols", false, false);
+                "Strip debug info for unused symbols", false, false)
 
 ModulePass *llvm::createStripDeadDebugInfoPass() {
   return new StripDeadDebugInfo();
@@ -350,8 +358,8 @@
 
     for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
            E = MDs.end(); I != E; ++I) {
-      if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), 
-                              true)) {
+      GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+      if (GV && M.getGlobalVariable(GV->getName(), true)) {
         if (!NMD)
           NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
         NMD->addOperand(*I);

Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===//
+//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -50,20 +50,24 @@
 
     virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
-    SRETPromotion() : CallGraphSCCPass(&ID) {}
+    SRETPromotion() : CallGraphSCCPass(ID) {
+      initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     CallGraphNode *PromoteReturn(CallGraphNode *CGN);
     bool isSafeToUpdateAllCallers(Function *F);
     Function *cloneFunctionBody(Function *F, const StructType *STy);
     CallGraphNode *updateCallSites(Function *F, Function *NF);
-    bool nestedStructType(const StructType *STy);
   };
 }
 
 char SRETPromotion::ID = 0;
-INITIALIZE_PASS(SRETPromotion, "sretpromotion",
-                "Promote sret arguments to multiple ret values", false, false);
+INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
+                "Promote sret arguments to multiple ret values", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
+                "Promote sret arguments to multiple ret values", false, false)
 
 Pass *llvm::createStructRetPromotionPass() {
   return new SRETPromotion();
@@ -354,14 +358,3 @@
   return NF_CGN;
 }
 
-/// nestedStructType - Return true if STy includes any
-/// other aggregate types
-bool SRETPromotion::nestedStructType(const StructType *STy) {
-  unsigned Num = STy->getNumElements();
-  for (unsigned i = 0; i < Num; i++) {
-    const Type *Ty = STy->getElementType(i);
-    if (!Ty->isSingleValueType() && !Ty->isVoidTy())
-      return true;
-  }
-  return false;
-}

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -13,5 +13,3 @@
   InstCombineSimplifyDemanded.cpp
   InstCombineVectorOps.cpp
   )
-
-target_link_libraries (LLVMInstCombine LLVMTransformUtils)

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h Tue Oct 26 19:48:03 2010
@@ -81,7 +81,9 @@
   BuilderTy *Builder;
       
   static char ID; // Pass identification, replacement for typeid
-  InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+  InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+    initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+  }
 
 public:
   virtual bool runOnFunction(Function &F);

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Tue Oct 26 19:48:03 2010
@@ -207,15 +207,26 @@
     }
     break;
   case Instruction::Or:
-    if (Together == AndRHS) // (X | C) & C --> C
-      return ReplaceInstUsesWith(TheAnd, AndRHS);
-
-    if (Op->hasOneUse() && Together != OpRHS) {
-      // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Value *Or = Builder->CreateOr(X, Together);
-      Or->takeName(Op);
-      return BinaryOperator::CreateAnd(Or, AndRHS);
+    if (Op->hasOneUse()){
+      if (Together != OpRHS) {
+        // (X | C1) & C2 --> (X | (C1&C2)) & C2
+        Value *Or = Builder->CreateOr(X, Together);
+        Or->takeName(Op);
+        return BinaryOperator::CreateAnd(Or, AndRHS);
+      }
+      
+      ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+      if (TogetherCI && !TogetherCI->isZero()){
+        // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+        // NOTE: This reduces the number of bits set in the & mask, which
+        // can expose opportunities for store narrowing.
+        Together = ConstantExpr::getXor(AndRHS, Together);
+        Value *And = Builder->CreateAnd(X, Together);
+        And->takeName(Op);
+        return BinaryOperator::CreateOr(And, OpRHS);
+      }
     }
+    
     break;
   case Instruction::Add:
     if (Op->hasOneUse()) {
@@ -434,6 +445,270 @@
   return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
 }
 
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is 
+/// described as the "AMask" or "BMask" part of the enum. If the enum 
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only 
+/// if (A & B) == A, or all bits of A are set in B.
+///   Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only 
+/// if (A & B) == 0, or all bits of A are cleared in B.
+///   Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not 
+/// contain any number of one bits and zero bits.
+///   Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+///   Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+///    (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+///    (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+  FoldMskICmp_AMask_AllOnes           =     1,
+  FoldMskICmp_AMask_NotAllOnes        =     2,
+  FoldMskICmp_BMask_AllOnes           =     4,
+  FoldMskICmp_BMask_NotAllOnes        =     8,
+  FoldMskICmp_Mask_AllZeroes          =    16,
+  FoldMskICmp_Mask_NotAllZeroes       =    32,
+  FoldMskICmp_AMask_Mixed             =    64,
+  FoldMskICmp_AMask_NotMixed          =   128,
+  FoldMskICmp_BMask_Mixed             =   256,
+  FoldMskICmp_BMask_NotMixed          =   512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 
+                                    ICmpInst::Predicate SCC)
+{
+  ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+  bool icmp_abit = (ACst != 0 && !ACst->isZero() && 
+                    ACst->getValue().isPowerOf2());
+  bool icmp_bbit = (BCst != 0 && !BCst->isZero() && 
+                    BCst->getValue().isPowerOf2());
+  unsigned result = 0;
+  if (CCst != 0 && CCst->isZero()) {
+    // if C is zero, then both A and B qualify as mask
+    result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_AMask_Mixed |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_AMask_NotMixed |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+                            FoldMskICmp_AMask_NotMixed) 
+                         : (FoldMskICmp_AMask_AllOnes |
+                            FoldMskICmp_AMask_Mixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_BMask_AllOnes |
+                            FoldMskICmp_BMask_Mixed));
+    return result;
+  }
+  if (A == C) {
+    result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+                          FoldMskICmp_AMask_Mixed)
+                       : (FoldMskICmp_AMask_NotAllOnes |
+                          FoldMskICmp_AMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_AMask_NotMixed)
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_AMask_Mixed));
+  }
+  else if (ACst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(ACst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+                       : FoldMskICmp_AMask_NotMixed);
+  }
+  if (B == C) 
+  {
+    result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_BMask_NotAllOnes |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_BMask_Mixed));
+  }
+  else if (BCst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(BCst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+                       : FoldMskICmp_BMask_NotMixed);
+  }
+  return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
+                                             Value*& B, Value*& C,
+                                             Value*& D, Value*& E,
+                                             ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+  if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+  if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+  if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+  // vectors are not (yet?) supported
+  if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+  // Here comes the tricky part:
+  // LHS might be of the form L11 & L12 == X, X == L21 & L22, 
+  // and L11 & L12 == L21 & L22. The same goes for RHS.
+  // Now we must find those components L** and R**, that are equal, so
+  // that we can extract the parameters A, B, C, D, and E for the canonical 
+  // above.
+  Value *L1 = LHS->getOperand(0);
+  Value *L2 = LHS->getOperand(1);
+  Value *L11,*L12,*L21,*L22;
+  if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+    if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+      L21 = L22 = 0;
+  }
+  else {
+    if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+      return 0;
+    std::swap(L1, L2);
+    L21 = L22 = 0;
+  }
+
+  Value *R1 = RHS->getOperand(0);
+  Value *R2 = RHS->getOperand(1);
+  Value *R11,*R12;
+  bool ok = false;
+  if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+      A = R11; D = R12; E = R2; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R2; ok = true;
+    }
+  }
+  if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+       A = R11; D = R12; E = R1; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R1; ok = true;
+    }
+    else
+      return 0;
+  }
+  if (!ok)
+    return 0;
+
+  if (L11 == A) {
+    B = L12; C = L2;
+  }
+  else if (L12 == A) {
+    B = L11; C = L2;
+  }
+  else if (L21 == A) {
+    B = L22; C = L1;
+  }
+  else if (L22 == A) {
+    B = L21; C = L1;
+  }
+
+  unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+  unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+  return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+                                     ICmpInst::Predicate NEWCC,
+                                     llvm::InstCombiner::BuilderTy* Builder) {
+  Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+  unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+  if (mask == 0) return 0;
+
+  if (NEWCC == ICmpInst::ICMP_NE)
+    mask >>= 1; // treat "Not"-states as normal states
+
+  if (mask & FoldMskICmp_Mask_AllZeroes) {
+    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 
+    // -> (icmp eq (A & (B|D)), 0)
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    // we can't use C as zero, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp ne (A & D), D) 
+    // with B and D, having a single bit set
+    Value* zero = Constant::getNullValue(A->getType());
+    return Builder->CreateICmp(NEWCC, newAnd, zero);
+  }
+  else if (mask & FoldMskICmp_BMask_AllOnes) {
+    // (icmp eq (A & B), B) & (icmp eq (A & D), D) 
+    // -> (icmp eq (A & (B|D)), (B|D))
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr);
+  }     
+  else if (mask & FoldMskICmp_AMask_AllOnes) {
+    // (icmp eq (A & B), A) & (icmp eq (A & D), A) 
+    // -> (icmp eq (A & (B&D)), A)
+    Value* newAnd1 = Builder->CreateAnd(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newAnd1);
+    return Builder->CreateICmp(NEWCC, newAnd, A);
+  }
+  else if (mask & FoldMskICmp_BMask_Mixed) {
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E) 
+    // We already know that B & C == C && D & E == E.
+    // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+    // C and E, which are shared by both the mask B and the mask D, don't
+    // contradict, then we can transform to
+    // -> (icmp eq (A & (B|D)), (C|E))
+    // Currently, we only handle the case of B, C, D, and E being constant.
+    ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+    if (BCst == 0) return 0;
+    ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+    if (DCst == 0) return 0;
+    // we can't simply use C and E, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp eq (A & D), D) 
+    // with B and D, having a single bit set
+
+    ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+    if (CCst == 0) return 0;
+    if (LHS->getPredicate() != NEWCC)
+      CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+    ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+    if (ECst == 0) return 0;
+    if (RHS->getPredicate() != NEWCC)
+      ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+    ConstantInt* MCst = dyn_cast<ConstantInt>(
+      ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+                           ConstantExpr::getXor(CCst, ECst)) );
+    // if there is a conflict we should actually return a false for the
+    // whole construct
+    if (!MCst->isZero())
+      return 0;
+    Value* newOr1 = Builder->CreateOr(B, D);
+    Value* newOr2 = ConstantExpr::getOr(CCst, ECst);
+    Value* newAnd = Builder->CreateAnd(A, newOr1);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+  }
+  return 0;
+}
+
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -451,6 +726,13 @@
       return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
+
+  {
+    // handle (roughly):
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+    Value* fold = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder);
+    if (fold) return fold;
+  }
   
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -1145,17 +1427,25 @@
     }
   }
   
+  {
+    // handle (roughly):
+    // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+    Value* fold = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder);
+    if (fold) return fold;
+  }
+
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
   ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
   ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
   if (LHSCst == 0 || RHSCst == 0) return 0;
 
-  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
-  if (LHSCst == RHSCst && LHSCC == RHSCC &&
-      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
-    Value *NewOr = Builder->CreateOr(Val, Val2);
-    return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
   }
   
   // From here on, we only handle:
@@ -1667,6 +1957,18 @@
       }
   }
   
+  // Note: If we've gotten to the point of visiting the outer OR, then the
+  // inner one couldn't be simplified.  If it was a constant, then it won't
+  // be simplified by a later pass either, so we try swapping the inner/outer
+  // ORs in the hopes that we'll be able to simplify it this way.
+  // (X|C) | V --> (X|V) | C
+  if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+      match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+    Value *Inner = Builder->CreateOr(A, Op1);
+    Inner->takeName(Op0);
+    return BinaryOperator::CreateOr(Inner, C1);
+  }
+  
   return Changed ? &I : 0;
 }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Oct 26 19:48:03 2010
@@ -109,10 +109,9 @@
   TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
 
   unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-  unsigned MaxAlign = Value::MaximumAlignment;
 
   // LLVM doesn't support alignments larger than this currently.
-  Align = std::min(Align, MaxAlign);
+  Align = std::min(Align, +Value::MaximumAlignment);
 
   if (PrefAlign > Align)
     Align = EnforceKnownAlignment(V, Align, PrefAlign);
@@ -281,7 +280,8 @@
 
     // memmove/cpy/set of zero bytes is a noop.
     if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
-      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+      if (NumBytes->isNullValue())
+        return EraseInstFromFunction(CI);
 
       if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
         if (CI->getZExtValue() == 1) {
@@ -290,6 +290,10 @@
           // alignment is sufficient.
         }
     }
+    
+    // No other transformations apply to volatile transfers.
+    if (MI->isVolatile())
+      return 0;
 
     // If we have a memmove and the source operation is a constant global,
     // then the source and dest pointers can't alias, so we can change this
@@ -539,7 +543,7 @@
       // X + 0 -> {X, false}
       if (RHS->isZero()) {
         Constant *V[] = {
-          UndefValue::get(II->getCalledValue()->getType()),
+          UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
         Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
@@ -698,6 +702,32 @@
     }
     break;
 
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    unsigned MemAlign = GetOrEnforceKnownAlignment(II->getArgOperand(0));
+    unsigned AlignArg = II->getNumArgOperands() - 1;
+    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+      II->setArgOperand(AlignArg,
+                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                         MemAlign, false));
+      return II;
+    }
+    break;
+  }
+
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp Tue Oct 26 19:48:03 2010
@@ -396,6 +396,11 @@
   case Instruction::Trunc:
     // trunc(trunc(x)) -> trunc(x)
     return true;
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+    // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+    return true;
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
     return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
@@ -454,6 +459,29 @@
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
+  
+  // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+  Value *A = 0; ConstantInt *Cst = 0;
+  if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+      Src->hasOneUse()) {
+    // We have three types to worry about here, the type of A, the source of
+    // the truncate (MidSize), and the destination of the truncate. We know that
+    // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
+    // between ASize and ResultSize.
+    unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    
+    // If the shift amount is larger than the size of A, then the result is
+    // known to be zero because all the input bits got shifted out.
+    if (Cst->getZExtValue() >= ASize)
+      return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+    // Since we're doing an lshr and a zero extend, and know that the shift
+    // amount is smaller than ASize, it is always safe to do the shift in A's
+    // type, then zero extend or truncate to the result.
+    Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+    Shift->takeName(Src);
+    return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+  }
 
   return 0;
 }
@@ -538,8 +566,7 @@
           
         if (CI.getType() == In->getType())
           return ReplaceInstUsesWith(CI, In);
-        else
-          return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+        return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
       }
     }
   }
@@ -1112,7 +1139,7 @@
         Arg->getOperand(0)->getType()->isFloatTy()) {
       Function *Callee = Call->getCalledFunction();
       Module *M = CI.getParent()->getParent()->getParent();
-      Constant* SqrtfFunc = M->getOrInsertFunction("sqrtf", 
+      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", 
                                                    Callee->getAttributes(),
                                                    Builder->getFloatTy(),
                                                    Builder->getFloatTy(),
@@ -1120,6 +1147,11 @@
       CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
                                        "sqrtfcall");
       ret->setAttributes(Callee->getAttributes());
+      
+      
+      // Remove the old Call.  With -fmath-errno, it won't get marked readnone.
+      Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+      EraseInstFromFunction(*Call);
       return ret;
     }
   }
@@ -1335,6 +1367,199 @@
   return new ShuffleVectorInst(InVal, V2, Mask);
 }
 
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+  return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+  return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy.  Look through the value to see if we can decompose it into
+/// insertions into the vector.  See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+                                     SmallVectorImpl<Value*> &Elements,
+                                     const Type *VecEltTy) {
+  // Undef values never contribute useful bits to the result.
+  if (isa<UndefValue>(V)) return true;
+  
+  // If we got down to a value of the right type, we win, try inserting into the
+  // right element.
+  if (V->getType() == VecEltTy) {
+    // Inserting null doesn't actually insert any elements.
+    if (Constant *C = dyn_cast<Constant>(V))
+      if (C->isNullValue())
+        return true;
+    
+    // Fail if multiple elements are inserted into this slot.
+    if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+      return false;
+    
+    Elements[ElementIndex] = V;
+    return true;
+  }
+  
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    // Figure out the # elements this provides, and bitcast it or slice it up
+    // as required.
+    unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+                                        VecEltTy);
+    // If the constant is the size of a vector element, we just need to bitcast
+    // it to the right type so it gets properly inserted.
+    if (NumElts == 1)
+      return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+                                      ElementIndex, Elements, VecEltTy);
+    
+    // Okay, this is a constant that covers multiple elements.  Slice it up into
+    // pieces and insert each element-sized piece into the vector.
+    if (!isa<IntegerType>(C->getType()))
+      C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+                                       C->getType()->getPrimitiveSizeInBits()));
+    unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+    const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+    
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+                                                               i*ElementSize));
+      Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+      if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+        return false;
+    }
+    return true;
+  }
+  
+  if (!V->hasOneUse()) return false;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return false;
+  switch (I->getOpcode()) {
+  default: return false; // Unhandled case.
+  case Instruction::BitCast:
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy);  
+  case Instruction::ZExt:
+    if (!isMultipleOfTypeSize(
+                          I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+                              VecEltTy))
+      return false;
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy);  
+  case Instruction::Or:
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy) &&
+           CollectInsertionElements(I->getOperand(1), ElementIndex,
+                                    Elements, VecEltTy);
+  case Instruction::Shl: {
+    // Must be shifting by a constant that is a multiple of the element size.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+    if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+    unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+    
+    return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+                                    Elements, VecEltTy);
+  }
+      
+  }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements.  This is to
+/// optimize code like this:
+///
+///    %tmp37 = bitcast float %inc to i32
+///    %tmp38 = zext i32 %tmp37 to i64
+///    %tmp31 = bitcast float %inc5 to i32
+///    %tmp32 = zext i32 %tmp31 to i64
+///    %tmp33 = shl i64 %tmp32, 32
+///    %ins35 = or i64 %tmp33, %tmp38
+///    %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+                                                InstCombiner &IC) {
+  const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+  Value *IntInput = CI.getOperand(0);
+
+  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+  if (!CollectInsertionElements(IntInput, 0, Elements,
+                                DestVecTy->getElementType()))
+    return 0;
+
+  // If we succeeded, we know that all of the element are specified by Elements
+  // or are zero if Elements has a null entry.  Recast this as a set of
+  // insertions.
+  Value *Result = Constant::getNullValue(CI.getType());
+  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+    if (Elements[i] == 0) continue;  // Unset element.
+    
+    Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+                                             IC.Builder->getInt32(i));
+  }
+  
+  return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast.  The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType();
+
+  // If this is a bitcast from int to float, check to see if the int is an
+  // extraction from a vector.
+  Value *VecInput = 0;
+  // bitcast(trunc(bitcast(somevector)))
+  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+    
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+    }
+  }
+  
+  // bitcast(trunc(lshr(bitcast(somevector), cst))
+  ConstantInt *ShAmt = 0;
+  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+                                m_ConstantInt(ShAmt)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+        ShAmt->getZExtValue() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+      
+      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+    }
+  }
+  return 0;
+}
 
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   // If the operands are integer typed then apply the integer transforms,
@@ -1386,6 +1611,11 @@
                                                ((Instruction*)NULL));
     }
   }
+  
+  // Try to optimize int -> float bitcasts.
+  if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+      return I;
 
   if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
     if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
@@ -1395,16 +1625,24 @@
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
     }
     
-    // If this is a cast from an integer to vector, check to see if the input
-    // is a trunc or zext of a bitcast from vector.  If so, we can replace all
-    // the casts with a shuffle and (potentially) a bitcast.
-    if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
-      CastInst *SrcCast = cast<CastInst>(Src);
-      if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
-        if (isa<VectorType>(BCIn->getOperand(0)->getType()))
-          if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+    if (isa<IntegerType>(SrcTy)) {
+      // If this is a cast from an integer to vector, check to see if the input
+      // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+      // the casts with a shuffle and (potentially) a bitcast.
+      if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+        CastInst *SrcCast = cast<CastInst>(Src);
+        if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+          if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+            if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
                                                cast<VectorType>(DestTy), *this))
-            return I;
+              return I;
+      }
+      
+      // If the input is an 'or' instruction, we may be doing shifts and ors to
+      // assemble the elements of the vector manually.  Try to rip the code out
+      // and replace it with insertelements.
+      if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+        return ReplaceInstUsesWith(CI, V);
     }
   }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp Tue Oct 26 19:48:03 2010
@@ -146,10 +146,14 @@
   if (TD) {
     unsigned KnownAlign =
       GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
-    if (KnownAlign >
-        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
-                                  LI.getAlignment()))
+    unsigned LoadAlign = LI.getAlignment();
+    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+      TD->getABITypeAlignment(LI.getType());
+
+    if (KnownAlign > EffectiveLoadAlign)
       LI.setAlignment(KnownAlign);
+    else if (LoadAlign == 0)
+      LI.setAlignment(EffectiveLoadAlign);
   }
 
   // load (cast X) --> cast (load X) iff safe.
@@ -326,7 +330,9 @@
   
   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                    SIOp0->getName()+".c");
-  return new StoreInst(NewCast, CastOp);
+  SI.setOperand(0, NewCast);
+  SI.setOperand(1, CastOp);
+  return &SI;
 }
 
 /// equivalentAddressValues - Test if A and B will obviously have the same
@@ -411,10 +417,14 @@
   if (TD) {
     unsigned KnownAlign =
       GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
-    if (KnownAlign >
-        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
-                                  SI.getAlignment()))
+    unsigned StoreAlign = SI.getAlignment();
+    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+      TD->getABITypeAlignment(Val->getType());
+
+    if (KnownAlign > EffectiveStoreAlign)
       SI.setAlignment(KnownAlign);
+    else if (StoreAlign == 0)
+      SI.setAlignment(EffectiveStoreAlign);
   }
 
   // Do really simple DSE, to catch cases where there are several consecutive

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp Tue Oct 26 19:48:03 2010
@@ -56,10 +56,270 @@
   return 0;
 }
 
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits.  This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree.  This is used to eliminate extraneous shifting from things
+/// like:
+///      %C = shl i128 %A, 64
+///      %D = shl i128 %B, 96
+///      %E = or i128 %C, %D
+///      %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits.  If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+                               InstCombiner &IC) {
+  // We can always evaluate constants shifted.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If this is the opposite shift, we can directly reuse the input of the shift
+  // if the needed bits are already zero in the input.  This allows us to reuse
+  // the value which means that we don't care if the shift has multiple uses.
+  //  TODO:  Handle opposite shift by exact value.
+  ConstantInt *CI;
+  if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+      (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+    if (CI->getZExtValue() == NumBits) {
+      // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+      // If this is a truncate of a logical shr, we can truncate it to a smaller
+      // lshr iff we know that the bits we would otherwise be shifting in are
+      // already zeros.
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(0),
+            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+          CI->getLimitedValue(BitWidth) < BitWidth) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+      }
+#endif
+      
+    }
+  }
+  
+  // We can't mutate something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+  
+  switch (I->getOpcode()) {
+  default: return false;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+    return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+           CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+      
+  case Instruction::Shl: {
+    // We can often fold the shift into shifts-by-a-constant.
+    CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+
+    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+    if (isLeftShift) return true;
+    
+    // We can always turn shl(c)+shr(c) -> and(c2).
+    if (CI->getValue() == NumBits) return true;
+      
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+    // profitable unless we know the and'd out bits are already zero.
+    if (CI->getZExtValue() > NumBits) {
+      unsigned HighBits = CI->getZExtValue() - NumBits;
+      if (MaskedValueIsZero(I->getOperand(0),
+                            APInt::getHighBitsSet(TypeWidth, HighBits)))
+        return true;
+    }
+      
+    return false;
+  }
+  case Instruction::LShr: {
+    // We can often fold the shift into shifts-by-a-constant.
+    CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+    
+    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+    if (!isLeftShift) return true;
+    
+    // We can always turn lshr(c)+shl(c) -> and(c2).
+    if (CI->getValue() == NumBits) return true;
+      
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+    // profitable unless we know the and'd out bits are already zero.
+    if (CI->getZExtValue() > NumBits) {
+      unsigned LowBits = CI->getZExtValue() - NumBits;
+      if (MaskedValueIsZero(I->getOperand(0),
+                            APInt::getLowBitsSet(TypeWidth, LowBits)))
+        return true;
+    }
+      
+    return false;
+  }
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+           CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+  }
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+        return false;
+    return true;
+  }
+  }      
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+                              InstCombiner &IC) {
+  // We can always evaluate constants shifted.
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (isLeftShift)
+      V = IC.Builder->CreateShl(C, NumBits);
+    else
+      V = IC.Builder->CreateLShr(C, NumBits);
+    // If we got a constantexpr back, try to simplify it with TD info.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+    return V;
+  }
+  
+  Instruction *I = cast<Instruction>(V);
+  IC.Worklist.Add(I);
+
+  switch (I->getOpcode()) {
+  default: assert(0 && "Inconsistency with CanEvaluateShifted");
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+    I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    return I;
+    
+  case Instruction::Shl: {
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We only accept shifts-by-a-constant in CanEvaluateShifted.
+    ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+    
+    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+    if (isLeftShift) {
+      // If this is oversized composite shift, then unsigned shifts get 0.
+      unsigned NewShAmt = NumBits+CI->getZExtValue();
+      if (NewShAmt >= TypeWidth)
+        return Constant::getNullValue(I->getType());
+
+      I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+      return I;
+    }
+    
+    // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+    // zeros.
+    if (CI->getValue() == NumBits) {
+      APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+      V = IC.Builder->CreateAnd(I->getOperand(0),
+                                ConstantInt::get(I->getContext(), Mask));
+      if (Instruction *VI = dyn_cast<Instruction>(V)) {
+        VI->moveBefore(I);
+        VI->takeName(I);
+      }
+      return V;
+    }
+    
+    // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+    // the and won't be needed.
+    assert(CI->getZExtValue() > NumBits);
+    I->setOperand(1, ConstantInt::get(I->getType(),
+                                      CI->getZExtValue() - NumBits));
+    return I;
+  }
+  case Instruction::LShr: {
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+    // We only accept shifts-by-a-constant in CanEvaluateShifted.
+    ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+    
+    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+    if (!isLeftShift) {
+      // If this is oversized composite shift, then unsigned shifts get 0.
+      unsigned NewShAmt = NumBits+CI->getZExtValue();
+      if (NewShAmt >= TypeWidth)
+        return Constant::getNullValue(I->getType());
+      
+      I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+      return I;
+    }
+    
+    // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+    // zeros.
+    if (CI->getValue() == NumBits) {
+      APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+      V = IC.Builder->CreateAnd(I->getOperand(0),
+                                ConstantInt::get(I->getContext(), Mask));
+      if (Instruction *VI = dyn_cast<Instruction>(V)) {
+        VI->moveBefore(I);
+        VI->takeName(I);
+      }
+      return V;
+    }
+    
+    // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+    // the and won't be needed.
+    assert(CI->getZExtValue() > NumBits);
+    I->setOperand(1, ConstantInt::get(I->getType(),
+                                      CI->getZExtValue() - NumBits));
+    return I;
+  }
+    
+  case Instruction::Select:
+    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+    return I;
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+                                              NumBits, isLeftShift, IC));
+    return PN;
+  }
+  }      
+}
+
+
+
 Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                                BinaryOperator &I) {
   bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
+  
+  
+  // See if we can propagate this shift into the input, this covers the trivial
+  // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+  if (I.getOpcode() != Instruction::AShr &&
+      CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+    DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+              " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
+    
+    return ReplaceInstUsesWith(I, 
+                 GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+  }
+  
+  
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
@@ -288,39 +548,17 @@
                                     ConstantInt::get(Ty, AmtSum));
     }
     
-    if (ShiftOp->getOpcode() == Instruction::LShr &&
-        I.getOpcode() == Instruction::AShr) {
-      if (AmtSum >= TypeBits)
-        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-      
-      // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
-    }
-    
-    if (ShiftOp->getOpcode() == Instruction::AShr &&
-        I.getOpcode() == Instruction::LShr) {
-      // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
-      if (AmtSum >= TypeBits)
-        AmtSum = TypeBits-1;
-      
-      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
-
-      APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::CreateAnd(Shift,
-                                       ConstantInt::get(I.getContext(), Mask));
-    }
-    
-    // Okay, if we get here, one shift must be left, and the other shift must be
-    // right.  See if the amounts are equal.
     if (ShiftAmt1 == ShiftAmt2) {
       // If we have ((X >>? C) << C), turn this into X & (-1 << C).
-      if (I.getOpcode() == Instruction::Shl) {
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
         return BinaryOperator::CreateAnd(X,
                                          ConstantInt::get(I.getContext(),Mask));
       }
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
-      if (I.getOpcode() == Instruction::LShr) {
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
         return BinaryOperator::CreateAnd(X,
                                         ConstantInt::get(I.getContext(), Mask));
@@ -329,7 +567,8 @@
       uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
       
       // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
-      if (I.getOpcode() == Instruction::Shl) {
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
         Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
@@ -340,7 +579,8 @@
       }
       
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
-      if (I.getOpcode() == Instruction::LShr) {
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
         Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
         
@@ -355,9 +595,8 @@
       uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
 
       // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
-      if (I.getOpcode() == Instruction::Shl) {
-        assert(ShiftOp->getOpcode() == Instruction::LShr ||
-               ShiftOp->getOpcode() == Instruction::AShr);
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
         Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
                                             ConstantInt::get(Ty, ShiftDiff));
         
@@ -367,8 +606,8 @@
       }
       
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
-      if (I.getOpcode() == Instruction::LShr) {
-        assert(ShiftOp->getOpcode() == Instruction::Shl);
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
         Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));

Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Oct 26 19:48:03 2010
@@ -48,6 +48,7 @@
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
 #include <algorithm>
 #include <climits>
 using namespace llvm;
@@ -58,10 +59,18 @@
 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
 STATISTIC(NumSunkInst , "Number of instructions sunk");
 
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+  initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+  initializeInstCombine(*unwrap(R));
+}
 
 char InstCombiner::ID = 0;
 INITIALIZE_PASS(InstCombiner, "instcombine",
-                "Combine redundant instructions", false, false);
+                "Combine redundant instructions", false, false)
 
 void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreservedID(LCSSAID);
@@ -1023,10 +1032,8 @@
   bool MadeIRChange = false;
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
-  
-  std::vector<Instruction*> InstrsForInstCombineWorklist;
-  InstrsForInstCombineWorklist.reserve(128);
 
+  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
   SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
   
   do {

Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMInstrumentation
   EdgeProfiling.cpp
+  Instrumentation.cpp
   OptimalEdgeProfiling.cpp
   ProfilingUtils.cpp
   )

Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp Tue Oct 26 19:48:03 2010
@@ -34,7 +34,9 @@
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
-    EdgeProfiler() : ModulePass(&ID) {}
+    EdgeProfiler() : ModulePass(ID) {
+      initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual const char *getPassName() const {
       return "Edge Profiler";
@@ -44,7 +46,7 @@
 
 char EdgeProfiler::ID = 0;
 INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
-                "Insert instrumentation for edge profiling", false, false);
+                "Insert instrumentation for edge profiling", false, false)
 
 ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
-    OptimalEdgeProfiler() : ModulePass(&ID) {}
+    OptimalEdgeProfiler() : ModulePass(ID) {
+      initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +52,14 @@
 }
 
 char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling", 
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling", 
+                "Insert optimal instrumentation for edge profiling",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling", 
                 "Insert optimal instrumentation for edge profiling",
-                false, false);
+                false, false)
 
 ModulePass *llvm::createOptimalEdgeProfilerPass() {
   return new OptimalEdgeProfiler();

Removed: llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp (removed)
@@ -1,1113 +0,0 @@
-//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass removes redundant branch instructions. This algorithm was
-// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
-// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
-// Algorithm was created to remove array bound checks for strongly typed
-// languages. This implementation expands the idea and removes any conditional
-// branches that can be proved redundant, not only those used in array bound
-// checks. With the SSI representation, each variable has a
-// constraint. By analyzing these constraints we can prove that a branch is
-// redundant. When a branch is proved redundant it means that
-// one direction will always be taken; thus, we can change this branch into an
-// unconditional jump.
-// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
-// after ABCD to clean up the code.
-// This implementation was created based on the implementation of the ABCD
-// algorithm implemented for the compiler Jitrino.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "abcd"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-
-using namespace llvm;
-
-STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
-STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
-
-namespace {
-
-class ABCD : public FunctionPass {
- public:
-  static char ID;  // Pass identification, replacement for typeid.
-  ABCD() : FunctionPass(&ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<SSI>();
-  }
-
-  bool runOnFunction(Function &F);
-
- private:
-  /// Keep track of whether we've modified the program yet.
-  bool modified;
-
-  enum ProveResult {
-    False = 0,
-    Reduced = 1,
-    True = 2
-  };
-
-  typedef ProveResult (*meet_function)(ProveResult, ProveResult);
-  static ProveResult max(ProveResult res1, ProveResult res2) {
-    return (ProveResult) std::max(res1, res2);
-  }
-  static ProveResult min(ProveResult res1, ProveResult res2) {
-    return (ProveResult) std::min(res1, res2);
-  }
-
-  class Bound {
-   public:
-    Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
-    Bound(const Bound &b, int cnst)
-      : value(b.value - cnst), upper_bound(b.upper_bound) {}
-    Bound(const Bound &b, const APInt &cnst)
-      : value(b.value - cnst), upper_bound(b.upper_bound) {}
-
-    /// Test if Bound is an upper bound
-    bool isUpperBound() const { return upper_bound; }
-
-    /// Get the bitwidth of this bound
-    int32_t getBitWidth() const { return value.getBitWidth(); }
-
-    /// Creates a Bound incrementing the one received
-    static Bound createIncrement(const Bound &b) {
-      return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
-                   b.upper_bound);
-    }
-
-    /// Creates a Bound decrementing the one received
-    static Bound createDecrement(const Bound &b) {
-      return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
-                   b.upper_bound);
-    }
-
-    /// Test if two bounds are equal
-    static bool eq(const Bound *a, const Bound *b) {
-      if (!a || !b) return false;
-
-      assert(a->isUpperBound() == b->isUpperBound());
-      return a->value == b->value;
-    }
-
-    /// Test if val is less than or equal to Bound b
-    static bool leq(APInt val, const Bound &b) {
-      return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
-    }
-
-    /// Test if Bound a is less then or equal to Bound
-    static bool leq(const Bound &a, const Bound &b) {
-      assert(a.isUpperBound() == b.isUpperBound());
-      return a.isUpperBound() ? a.value.sle(b.value) :
-                                a.value.sge(b.value);
-    }
-
-    /// Test if Bound a is less then Bound b
-    static bool lt(const Bound &a, const Bound &b) {
-      assert(a.isUpperBound() == b.isUpperBound());
-      return a.isUpperBound() ? a.value.slt(b.value) :
-                                a.value.sgt(b.value);
-    }
-
-    /// Test if Bound b is greater then or equal val
-    static bool geq(const Bound &b, APInt val) {
-      return leq(val, b);
-    }
-
-    /// Test if Bound a is greater then or equal Bound b
-    static bool geq(const Bound &a, const Bound &b) {
-      return leq(b, a);
-    }
-
-   private:
-    APInt value;
-    bool upper_bound;
-  };
-
-  /// This class is used to store results some parts of the graph,
-  /// so information does not need to be recalculated. The maximum false,
-  /// minimum true and minimum reduced results are stored
-  class MemoizedResultChart {
-   public:
-     MemoizedResultChart() {}
-     MemoizedResultChart(const MemoizedResultChart &other) {
-       if (other.max_false)
-         max_false.reset(new Bound(*other.max_false));
-       if (other.min_true)
-         min_true.reset(new Bound(*other.min_true));
-       if (other.min_reduced)
-         min_reduced.reset(new Bound(*other.min_reduced));
-     }
-
-    /// Returns the max false
-    const Bound *getFalse() const { return max_false.get(); }
-
-    /// Returns the min true
-    const Bound *getTrue() const { return min_true.get(); }
-
-    /// Returns the min reduced
-    const Bound *getReduced() const { return min_reduced.get(); }
-
-    /// Return the stored result for this bound
-    ProveResult getResult(const Bound &bound) const;
-
-    /// Stores a false found
-    void addFalse(const Bound &bound);
-
-    /// Stores a true found
-    void addTrue(const Bound &bound);
-
-    /// Stores a Reduced found
-    void addReduced(const Bound &bound);
-
-    /// Clears redundant reduced
-    /// If a min_true is smaller than a min_reduced then the min_reduced
-    /// is unnecessary and then removed. It also works for min_reduced
-    /// begin smaller than max_false.
-    void clearRedundantReduced();
-
-    void clear() {
-      max_false.reset();
-      min_true.reset();
-      min_reduced.reset();
-    }
-
-  private:
-    OwningPtr<Bound> max_false, min_true, min_reduced;
-  };
-
-  /// This class stores the result found for a node of the graph,
-  /// so these results do not need to be recalculated, only searched for.
-  class MemoizedResult {
-  public:
-    /// Test if there is true result stored from b to a
-    /// that is less then the bound
-    bool hasTrue(Value *b, const Bound &bound) const {
-      const Bound *trueBound = map.lookup(b).getTrue();
-      return trueBound && Bound::leq(*trueBound, bound);
-    }
-
-    /// Test if there is false result stored from b to a
-    /// that is less then the bound
-    bool hasFalse(Value *b, const Bound &bound) const {
-      const Bound *falseBound = map.lookup(b).getFalse();
-      return falseBound && Bound::leq(*falseBound, bound);
-    }
-
-    /// Test if there is reduced result stored from b to a
-    /// that is less then the bound
-    bool hasReduced(Value *b, const Bound &bound) const {
-      const Bound *reducedBound = map.lookup(b).getReduced();
-      return reducedBound && Bound::leq(*reducedBound, bound);
-    }
-
-    /// Returns the stored bound for b
-    ProveResult getBoundResult(Value *b, const Bound &bound) {
-      return map[b].getResult(bound);
-    }
-
-    /// Clears the map
-    void clear() {
-      DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
-      DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
-      for (; begin != end; ++begin) {
-        begin->second.clear();
-      }
-      map.clear();
-    }
-
-    /// Stores the bound found
-    void updateBound(Value *b, const Bound &bound, const ProveResult res);
-
-  private:
-    // Maps a nod in the graph with its results found.
-    DenseMap<Value*, MemoizedResultChart> map;
-  };
-
-  /// This class represents an edge in the inequality graph used by the
-  /// ABCD algorithm. An edge connects node v to node u with a value c if
-  /// we could infer a constraint v <= u + c in the source program.
-  class Edge {
-  public:
-    Edge(Value *V, APInt val, bool upper)
-      : vertex(V), value(val), upper_bound(upper) {}
-
-    Value *getVertex() const { return vertex; }
-    const APInt &getValue() const { return value; }
-    bool isUpperBound() const { return upper_bound; }
-
-  private:
-    Value *vertex;
-    APInt value;
-    bool upper_bound;
-  };
-
-  /// Weighted and Directed graph to represent constraints.
-  /// There is one type of constraint, a <= b + X, which will generate an
-  /// edge from b to a with weight X.
-  class InequalityGraph {
-  public:
-
-    /// Adds an edge from V_from to V_to with weight value
-    void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
-
-    /// Test if there is a node V
-    bool hasNode(Value *V) const { return graph.count(V); }
-
-    /// Test if there is any edge from V in the upper direction
-    bool hasEdge(Value *V, bool upper) const;
-
-    /// Returns all edges pointed by vertex V
-    SmallVector<Edge, 16> getEdges(Value *V) const {
-      return graph.lookup(V);
-    }
-
-    /// Prints the graph in dot format.
-    /// Blue edges represent upper bound and Red lower bound.
-    void printGraph(raw_ostream &OS, Function &F) const {
-      printHeader(OS, F);
-      printBody(OS);
-      printFooter(OS);
-    }
-
-    /// Clear the graph
-    void clear() {
-      graph.clear();
-    }
-
-  private:
-    DenseMap<Value *, SmallVector<Edge, 16> > graph;
-
-    /// Prints the header of the dot file
-    void printHeader(raw_ostream &OS, Function &F) const;
-
-    /// Prints the footer of the dot file
-    void printFooter(raw_ostream &OS) const {
-      OS << "}\n";
-    }
-
-    /// Prints the body of the dot file
-    void printBody(raw_ostream &OS) const;
-
-    /// Prints vertex source to the dot file
-    void printVertex(raw_ostream &OS, Value *source) const;
-
-    /// Prints the edge to the dot file
-    void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
-
-    void printName(raw_ostream &OS, Value *info) const;
-  };
-
-  /// Iterates through all BasicBlocks, if the Terminator Instruction
-  /// uses an Comparator Instruction, all operands of this comparator
-  /// are sent to be transformed to SSI. Only Instruction operands are
-  /// transformed.
-  void createSSI(Function &F);
-
-  /// Creates the graphs for this function.
-  /// It will look for all comparators used in branches, and create them.
-  /// These comparators will create constraints for any instruction as an
-  /// operand.
-  void executeABCD(Function &F);
-
-  /// Seeks redundancies in the comparator instruction CI.
-  /// If the ABCD algorithm can prove that the comparator CI always
-  /// takes one way, then the Terminator Instruction TI is substituted from
-  /// a conditional branch to a unconditional one.
-  /// This code basically receives a comparator, and verifies which kind of
-  /// instruction it is. Depending on the kind of instruction, we use different
-  /// strategies to prove its redundancy.
-  void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
-
-  /// Substitutes Terminator Instruction TI, that is a conditional branch,
-  /// with one unconditional branch. Succ_edge determines if the new
-  /// unconditional edge will be the first or second edge of the former TI
-  /// instruction.
-  void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
-
-  /// When an conditional branch is removed, the BasicBlock that is no longer
-  /// reachable will have problems in phi functions. This method fixes these
-  /// phis removing the former BasicBlock from the list of incoming BasicBlocks
-  /// of all phis. In case the phi remains with no predecessor it will be
-  /// marked to be removed later.
-  void fixPhi(BasicBlock *BB, BasicBlock *Succ);
-
-  /// Removes phis that have no predecessor
-  void removePhis();
-
-  /// Creates constraints for Instructions.
-  /// If the constraint for this instruction has already been created
-  /// nothing is done.
-  void createConstraintInstruction(Instruction *I);
-
-  /// Creates constraints for Binary Operators.
-  /// It will create constraints only for addition and subtraction,
-  /// the other binary operations are not treated by ABCD.
-  /// For additions in the form a = b + X and a = X + b, where X is a constant,
-  /// the constraint a <= b + X can be obtained. For this constraint, an edge
-  /// a->b with weight X is added to the lower bound graph, and an edge
-  /// b->a with weight -X is added to the upper bound graph.
-  /// Only subtractions in the format a = b - X is used by ABCD.
-  /// Edges are created using the same semantic as addition.
-  void createConstraintBinaryOperator(BinaryOperator *BO);
-
-  /// Creates constraints for Comparator Instructions.
-  /// Only comparators that have any of the following operators
-  /// are used to create constraints: >=, >, <=, <. And only if
-  /// at least one operand is an Instruction. In a Comparator Instruction
-  /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-  /// t and f represent sigma for operands in true and false branches. The
-  /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-  /// b_f <= b. There are two more constraints that depend on the operator.
-  /// For the operator <= : a_t <= b_t   and b_f <= a_f-1
-  /// For the operator <  : a_t <= b_t-1 and b_f <= a_f
-  /// For the operator >= : b_t <= a_t   and a_f <= b_f-1
-  /// For the operator >  : b_t <= a_t-1 and a_f <= b_f
-  void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
-
-  /// Creates constraints for PHI nodes.
-  /// In a PHI node a = phi(b,c) we can create the constraint
-  /// a<= max(b,c). With this constraint there will be the edges,
-  /// b->a and c->a with weight 0 in the lower bound graph, and the edges
-  /// a->b and a->c with weight 0 in the upper bound graph.
-  void createConstraintPHINode(PHINode *PN);
-
-  /// Given a binary operator, we are only interest in the case
-  /// that one operand is an Instruction and the other is a ConstantInt. In
-  /// this case the method returns true, otherwise false. It also obtains the
-  /// Instruction and ConstantInt from the BinaryOperator and returns it.
-  bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
-                                Instruction **I2, ConstantInt **C1,
-                                ConstantInt **C2);
-
-  /// This method creates a constraint between a Sigma and an Instruction.
-  /// These constraints are created as soon as we find a comparator that uses a
-  /// SSI variable.
-  void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
-                               BasicBlock *BB_succ_f, PHINode **SIG_op_t,
-                               PHINode **SIG_op_f);
-
-  /// If PN_op1 and PN_o2 are different from NULL, create a constraint
-  /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-  /// with the respective V_op#, if V_op# is a ConstantInt.
-  void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, 
-                              ConstantInt *V_op1, ConstantInt *V_op2,
-                              APInt value);
-
-  /// Returns the sigma representing the Instruction I in BasicBlock BB.
-  /// Returns NULL in case there is no sigma for this Instruction in this
-  /// Basic Block. This methods assume that sigmas are the first instructions
-  /// in a block, and that there can be only two sigmas in a block. So it will
-  /// only look on the first two instructions of BasicBlock BB.
-  PHINode *findSigma(BasicBlock *BB, Instruction *I);
-
-  /// Original ABCD algorithm to prove redundant checks.
-  /// This implementation works on any kind of inequality branch.
-  bool demandProve(Value *a, Value *b, int c, bool upper_bound);
-
-  /// Prove that distance between b and a is <= bound
-  ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
-
-  /// Updates the distance value for a and b
-  void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
-                         meet_function meet);
-
-  InequalityGraph inequality_graph;
-  MemoizedResult mem_result;
-  DenseMap<Value*, const Bound*> active;
-  SmallPtrSet<Value*, 16> created;
-  SmallVector<PHINode *, 16> phis_to_remove;
-};
-
-}  // end anonymous namespace.
-
-char ABCD::ID = 0;
-INITIALIZE_PASS(ABCD, "abcd",
-                "ABCD: Eliminating Array Bounds Checks on Demand",
-                false, false);
-
-bool ABCD::runOnFunction(Function &F) {
-  modified = false;
-  createSSI(F);
-  executeABCD(F);
-  DEBUG(inequality_graph.printGraph(dbgs(), F));
-  removePhis();
-
-  inequality_graph.clear();
-  mem_result.clear();
-  active.clear();
-  created.clear();
-  phis_to_remove.clear();
-  return modified;
-}
-
-/// Iterates through all BasicBlocks, if the Terminator Instruction
-/// uses an Comparator Instruction, all operands of this comparator
-/// are sent to be transformed to SSI. Only Instruction operands are
-/// transformed.
-void ABCD::createSSI(Function &F) {
-  SSI *ssi = &getAnalysis<SSI>();
-
-  SmallVector<Instruction *, 16> Insts;
-
-  for (Function::iterator begin = F.begin(), end = F.end();
-       begin != end; ++begin) {
-    BasicBlock *BB = begin;
-    TerminatorInst *TI = BB->getTerminator();
-    if (TI->getNumOperands() == 0)
-      continue;
-
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
-      if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
-        modified = true;  // XXX: but yet createSSI might do nothing
-        Insts.push_back(I);
-      }
-      if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
-        modified = true;
-        Insts.push_back(I);
-      }
-    }
-  }
-  ssi->createSSI(Insts);
-}
-
-/// Creates the graphs for this function.
-/// It will look for all comparators used in branches, and create them.
-/// These comparators will create constraints for any instruction as an
-/// operand.
-void ABCD::executeABCD(Function &F) {
-  for (Function::iterator begin = F.begin(), end = F.end();
-       begin != end; ++begin) {
-    BasicBlock *BB = begin;
-    TerminatorInst *TI = BB->getTerminator();
-    if (TI->getNumOperands() == 0)
-      continue;
-
-    ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
-    if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
-      continue;
-
-    createConstraintCmpInst(ICI, TI);
-    seekRedundancy(ICI, TI);
-  }
-}
-
-/// Seeks redundancies in the comparator instruction CI.
-/// If the ABCD algorithm can prove that the comparator CI always
-/// takes one way, then the Terminator Instruction TI is substituted from
-/// a conditional branch to a unconditional one.
-/// This code basically receives a comparator, and verifies which kind of
-/// instruction it is. Depending on the kind of instruction, we use different
-/// strategies to prove its redundancy.
-void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
-  CmpInst::Predicate Pred = ICI->getPredicate();
-
-  Value *source, *dest;
-  int distance1, distance2;
-  bool upper;
-
-  switch(Pred) {
-    case CmpInst::ICMP_SGT: // signed greater than
-      upper = false;
-      distance1 = 1;
-      distance2 = 0;
-      break;
-
-    case CmpInst::ICMP_SGE: // signed greater or equal
-      upper = false;
-      distance1 = 0;
-      distance2 = -1;
-      break;
-
-    case CmpInst::ICMP_SLT: // signed less than
-      upper = true;
-      distance1 = -1;
-      distance2 = 0;
-      break;
-
-    case CmpInst::ICMP_SLE: // signed less or equal
-      upper = true;
-      distance1 = 0;
-      distance2 = 1;
-      break;
-
-    default:
-      return;
-  }
-
-  ++NumBranchTested;
-  source = ICI->getOperand(0);
-  dest = ICI->getOperand(1);
-  if (demandProve(dest, source, distance1, upper)) {
-    removeRedundancy(TI, true);
-  } else if (demandProve(dest, source, distance2, !upper)) {
-    removeRedundancy(TI, false);
-  }
-}
-
-/// Substitutes Terminator Instruction TI, that is a conditional branch,
-/// with one unconditional branch. Succ_edge determines if the new
-/// unconditional edge will be the first or second edge of the former TI
-/// instruction.
-void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
-  BasicBlock *Succ;
-  if (Succ_edge) {
-    Succ = TI->getSuccessor(0);
-    fixPhi(TI->getParent(), TI->getSuccessor(1));
-  } else {
-    Succ = TI->getSuccessor(1);
-    fixPhi(TI->getParent(), TI->getSuccessor(0));
-  }
-
-  BranchInst::Create(Succ, TI);
-  TI->eraseFromParent();  // XXX: invoke
-  ++NumBranchRemoved;
-  modified = true;
-}
-
-/// When an conditional branch is removed, the BasicBlock that is no longer
-/// reachable will have problems in phi functions. This method fixes these
-/// phis removing the former BasicBlock from the list of incoming BasicBlocks
-/// of all phis. In case the phi remains with no predecessor it will be
-/// marked to be removed later.
-void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
-  BasicBlock::iterator begin = Succ->begin();
-  while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
-    PN->removeIncomingValue(BB, false);
-    if (PN->getNumIncomingValues() == 0)
-      phis_to_remove.push_back(PN);
-  }
-}
-
-/// Removes phis that have no predecessor
-void ABCD::removePhis() {
-  for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
-    PHINode *PN = phis_to_remove[i];
-    PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-    PN->eraseFromParent();
-  }
-}
-
-/// Creates constraints for Instructions.
-/// If the constraint for this instruction has already been created
-/// nothing is done.
-void ABCD::createConstraintInstruction(Instruction *I) {
-  // Test if this instruction has not been created before
-  if (created.insert(I)) {
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-      createConstraintBinaryOperator(BO);
-    } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
-      createConstraintPHINode(PN);
-    }
-  }
-}
-
-/// Creates constraints for Binary Operators.
-/// It will create constraints only for addition and subtraction,
-/// the other binary operations are not treated by ABCD.
-/// For additions in the form a = b + X and a = X + b, where X is a constant,
-/// the constraint a <= b + X can be obtained. For this constraint, an edge
-/// a->b with weight X is added to the lower bound graph, and an edge
-/// b->a with weight -X is added to the upper bound graph.
-/// Only subtractions in the format a = b - X is used by ABCD.
-/// Edges are created using the same semantic as addition.
-void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
-  Instruction *I1 = NULL, *I2 = NULL;
-  ConstantInt *CI1 = NULL, *CI2 = NULL;
-
-  // Test if an operand is an Instruction and the other is a Constant
-  if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
-    return;
-
-  Instruction *I = 0;
-  APInt value;
-
-  switch (BO->getOpcode()) {
-    case Instruction::Add:
-      if (I1) {
-        I = I1;
-        value = CI2->getValue();
-      } else if (I2) {
-        I = I2;
-        value = CI1->getValue();
-      }
-      break;
-
-    case Instruction::Sub:
-      // Instructions like a = X-b, where X is a constant are not represented
-      // in the graph.
-      if (!I1)
-        return;
-
-      I = I1;
-      value = -CI2->getValue();
-      break;
-
-    default:
-      return;
-  }
-
-  inequality_graph.addEdge(I, BO, value, true);
-  inequality_graph.addEdge(BO, I, -value, false);
-  createConstraintInstruction(I);
-}
-
-/// Given a binary operator, we are only interest in the case
-/// that one operand is an Instruction and the other is a ConstantInt. In
-/// this case the method returns true, otherwise false. It also obtains the
-/// Instruction and ConstantInt from the BinaryOperator and returns it.
-bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
-                                    Instruction **I2, ConstantInt **C1,
-                                    ConstantInt **C2) {
-  Value *op1 = BO->getOperand(0);
-  Value *op2 = BO->getOperand(1);
-
-  if ((*I1 = dyn_cast<Instruction>(op1))) {
-    if ((*C2 = dyn_cast<ConstantInt>(op2)))
-      return true; // First is Instruction and second ConstantInt
-
-    return false; // Both are Instruction
-  } else {
-    if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
-        (*I2 = dyn_cast<Instruction>(op2)))
-      return true; // First is ConstantInt and second Instruction
-
-    return false; // Both are not Instruction
-  }
-}
-
-/// Creates constraints for Comparator Instructions.
-/// Only comparators that have any of the following operators
-/// are used to create constraints: >=, >, <=, <. And only if
-/// at least one operand is an Instruction. In a Comparator Instruction
-/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-/// t and f represent sigma for operands in true and false branches. The
-/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-/// b_f <= b. There are two more constraints that depend on the operator.
-/// For the operator <= : a_t <= b_t   and b_f <= a_f-1
-/// For the operator <  : a_t <= b_t-1 and b_f <= a_f
-/// For the operator >= : b_t <= a_t   and a_f <= b_f-1
-/// For the operator >  : b_t <= a_t-1 and a_f <= b_f
-void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
-  Value *V_op1 = ICI->getOperand(0);
-  Value *V_op2 = ICI->getOperand(1);
-
-  if (!V_op1->getType()->isIntegerTy())
-    return;
-
-  Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
-  Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
-
-  // Test if at least one operand is an Instruction
-  if (!I_op1 && !I_op2)
-    return;
-
-  BasicBlock *BB_succ_t = TI->getSuccessor(0);
-  BasicBlock *BB_succ_f = TI->getSuccessor(1);
-
-  PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
-          *SIG_op2_t = NULL, *SIG_op2_f = NULL;
-
-  createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
-  createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
-
-  int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
-  APInt MinusOne = APInt::getAllOnesValue(width);
-  APInt Zero = APInt::getNullValue(width);
-
-  CmpInst::Predicate Pred = ICI->getPredicate();
-  ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
-  ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
-  switch (Pred) {
-  case CmpInst::ICMP_SGT:  // signed greater than
-    createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
-    createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
-    break;
-
-  case CmpInst::ICMP_SGE:  // signed greater or equal
-    createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
-    createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
-    break;
-
-  case CmpInst::ICMP_SLT:  // signed less than
-    createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
-    createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
-    break;
-
-  case CmpInst::ICMP_SLE:  // signed less or equal
-    createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
-    createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
-    break;
-
-  default:
-    break;
-  }
-
-  if (I_op1)
-    createConstraintInstruction(I_op1);
-  if (I_op2)
-    createConstraintInstruction(I_op2);
-}
-
-/// Creates constraints for PHI nodes.
-/// In a PHI node a = phi(b,c) we can create the constraint
-/// a<= max(b,c). With this constraint there will be the edges,
-/// b->a and c->a with weight 0 in the lower bound graph, and the edges
-/// a->b and a->c with weight 0 in the upper bound graph.
-void ABCD::createConstraintPHINode(PHINode *PN) {
-  // FIXME: We really want to disallow sigma nodes, but I don't know the best
-  // way to detect the other than this.
-  if (PN->getNumOperands() == 2) return;
-  
-  int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *V = PN->getIncomingValue(i);
-    if (Instruction *I = dyn_cast<Instruction>(V)) {
-      createConstraintInstruction(I);
-    }
-    inequality_graph.addEdge(V, PN, APInt(width, 0), true);
-    inequality_graph.addEdge(V, PN, APInt(width, 0), false);
-  }
-}
-
-/// This method creates a constraint between a Sigma and an Instruction.
-/// These constraints are created as soon as we find a comparator that uses a
-/// SSI variable.
-void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
-                                   BasicBlock *BB_succ_f, PHINode **SIG_op_t,
-                                   PHINode **SIG_op_f) {
-  *SIG_op_t = findSigma(BB_succ_t, I_op);
-  *SIG_op_f = findSigma(BB_succ_f, I_op);
-
-  if (*SIG_op_t) {
-    int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
-    inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
-    inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
-  }
-  if (*SIG_op_f) {
-    int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
-    inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
-    inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
-  }
-}
-
-/// If PN_op1 and PN_o2 are different from NULL, create a constraint
-/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-/// with the respective V_op#, if V_op# is a ConstantInt.
-void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
-                                  ConstantInt *V_op1, ConstantInt *V_op2,
-                                  APInt value) {
-  if (SIG_op1 && SIG_op2) {
-    inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
-    inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
-  } else if (SIG_op1 && V_op2) {
-    inequality_graph.addEdge(V_op2, SIG_op1, value, true);
-    inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
-  } else if (SIG_op2 && V_op1) {
-    inequality_graph.addEdge(SIG_op2, V_op1, value, true);
-    inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
-  }
-}
-
-/// Returns the sigma representing the Instruction I in BasicBlock BB.
-/// Returns NULL in case there is no sigma for this Instruction in this
-/// Basic Block. This methods assume that sigmas are the first instructions
-/// in a block, and that there can be only two sigmas in a block. So it will
-/// only look on the first two instructions of BasicBlock BB.
-PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
-  // BB has more than one predecessor, BB cannot have sigmas.
-  if (I == NULL || BB->getSinglePredecessor() == NULL)
-    return NULL;
-
-  BasicBlock::iterator begin = BB->begin();
-  BasicBlock::iterator end = BB->end();
-
-  for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
-    Instruction *I_succ = begin;
-    if (PHINode *PN = dyn_cast<PHINode>(I_succ))
-      if (PN->getIncomingValue(0) == I)
-        return PN;
-  }
-
-  return NULL;
-}
-
-/// Original ABCD algorithm to prove redundant checks.
-/// This implementation works on any kind of inequality branch.
-bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
-  int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
-  Bound bound(APInt(width, c), upper_bound);
-
-  mem_result.clear();
-  active.clear();
-
-  ProveResult res = prove(a, b, bound, 0);
-  return res != False;
-}
-
-/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
-                              unsigned level) {
-  // if (C[b-a<=e] == True for some e <= bound
-  // Same or stronger difference was already proven
-  if (mem_result.hasTrue(b, bound))
-    return True;
-
-  // if (C[b-a<=e] == False for some e >= bound
-  // Same or weaker difference was already disproved
-  if (mem_result.hasFalse(b, bound))
-    return False;
-
-  // if (C[b-a<=e] == Reduced for some e <= bound
-  // b is on a cycle that was reduced for same or stronger difference
-  if (mem_result.hasReduced(b, bound))
-    return Reduced;
-
-  // traversal reached the source vertex
-  if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
-    return True;
-
-  // if b has no predecessor then fail
-  if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
-    return False;
-
-  // a cycle was encountered
-  if (active.count(b)) {
-    if (Bound::leq(*active.lookup(b), bound))
-      return Reduced; // a "harmless" cycle
-
-    return False; // an amplifying cycle
-  }
-
-  active[b] = &bound;
-  PHINode *PN = dyn_cast<PHINode>(b);
-
-  // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
-  // value, then it is a phi, if it has 1 incoming value it is a sigma.
-  if (PN && PN->getNumIncomingValues() > 1)
-    updateMemDistance(a, b, bound, level, min);
-  else
-    updateMemDistance(a, b, bound, level, max);
-
-  active.erase(b);
-
-  ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
-  return res;
-}
-
-/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
-                             unsigned level, meet_function meet) {
-  ABCD::ProveResult res = (meet == max) ? False : True;
-
-  SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
-  SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
-
-  for (; begin != end ; ++begin) {
-    if (((res >= Reduced) && (meet == max)) ||
-       ((res == False) && (meet == min))) {
-      break;
-    }
-    const Edge &in = *begin;
-    if (in.isUpperBound() == bound.isUpperBound()) {
-      Value *succ = in.getVertex();
-      res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
-                            level+1));
-    }
-  }
-
-  mem_result.updateBound(b, bound, res);
-}
-
-/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
-  if (max_false && Bound::leq(bound, *max_false))
-    return False;
-  if (min_true && Bound::leq(*min_true, bound))
-    return True;
-  if (min_reduced && Bound::leq(*min_reduced, bound))
-    return Reduced;
-  return False;
-}
-
-/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
-  if (!max_false || Bound::leq(*max_false, bound))
-    max_false.reset(new Bound(bound));
-
-  if (Bound::eq(max_false.get(), min_reduced.get()))
-    min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
-  if (Bound::eq(max_false.get(), min_true.get()))
-    min_true.reset(new Bound(Bound::createIncrement(*min_true)));
-  if (Bound::eq(min_reduced.get(), min_true.get()))
-    min_reduced.reset();
-  clearRedundantReduced();
-}
-
-/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
-  if (!min_true || Bound::leq(bound, *min_true))
-    min_true.reset(new Bound(bound));
-
-  if (Bound::eq(min_true.get(), min_reduced.get()))
-    min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
-  if (Bound::eq(min_true.get(), max_false.get()))
-    max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-  if (Bound::eq(max_false.get(), min_reduced.get()))
-    min_reduced.reset();
-  clearRedundantReduced();
-}
-
-/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
-  if (!min_reduced || Bound::leq(bound, *min_reduced))
-    min_reduced.reset(new Bound(bound));
-
-  if (Bound::eq(min_reduced.get(), min_true.get()))
-    min_true.reset(new Bound(Bound::createIncrement(*min_true)));
-  if (Bound::eq(min_reduced.get(), max_false.get()))
-    max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-}
-
-/// Clears redundant reduced
-/// If a min_true is smaller than a min_reduced then the min_reduced
-/// is unnecessary and then removed. It also works for min_reduced
-/// begin smaller than max_false.
-void ABCD::MemoizedResultChart::clearRedundantReduced() {
-  if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
-    min_reduced.reset();
-  if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
-    min_reduced.reset();
-}
-
-/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
-                                       const ProveResult res) {
-  if (res == False) {
-    map[b].addFalse(bound);
-  } else if (res == True) {
-    map[b].addTrue(bound);
-  } else {
-    map[b].addReduced(bound);
-  }
-}
-
-/// Adds an edge from V_from to V_to with weight value
-void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
-                                    APInt value, bool upper) {
-  assert(V_from->getType() == V_to->getType());
-  assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
-         value.getBitWidth());
-
-  graph[V_from].push_back(Edge(V_to, value, upper));
-}
-
-/// Test if there is any edge from V in the upper direction
-bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
-  SmallVector<Edge, 16> it = graph.lookup(V);
-
-  SmallVector<Edge, 16>::iterator begin = it.begin();
-  SmallVector<Edge, 16>::iterator end = it.end();
-  for (; begin != end; ++begin) {
-    if (begin->isUpperBound() == upper) {
-      return true;
-    }
-  }
-  return false;
-}
-
-/// Prints the header of the dot file
-void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
-  OS << "digraph dotgraph {\n";
-  OS << "label=\"Inequality Graph for \'";
-  OS << F.getNameStr() << "\' function\";\n";
-  OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
-}
-
-/// Prints the body of the dot file
-void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
-  DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
-      graph.begin(), end = graph.end();
-
-  for (; begin != end ; ++begin) {
-    SmallVector<Edge, 16>::const_iterator begin_par =
-        begin->second.begin(), end_par = begin->second.end();
-    Value *source = begin->first;
-
-    printVertex(OS, source);
-
-    for (; begin_par != end_par ; ++begin_par) {
-      const Edge &edge = *begin_par;
-      printEdge(OS, source, edge);
-    }
-  }
-}
-
-/// Prints vertex source to the dot file
-///
-void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
-  OS << "\"";
-  printName(OS, source);
-  OS << "\"";
-  OS << " [label=\"{";
-  printName(OS, source);
-  OS << "}\"];\n";
-}
-
-/// Prints the edge to the dot file
-void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
-                                      const Edge &edge) const {
-  Value *dest = edge.getVertex();
-  APInt value = edge.getValue();
-  bool upper = edge.isUpperBound();
-
-  OS << "\"";
-  printName(OS, source);
-  OS << "\"";
-  OS << " -> ";
-  OS << "\"";
-  printName(OS, dest);
-  OS << "\"";
-  OS << " [label=\"" << value << "\"";
-  if (upper) {
-    OS << "color=\"blue\"";
-  } else {
-    OS << "color=\"red\"";
-  }
-  OS << "];\n";
-}
-
-void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
-    OS << *CI;
-  } else {
-    if (!info->hasName()) {
-      info->setName("V");
-    }
-    OS << info->getNameStr();
-  }
-}
-
-/// createABCDPass - The public interface to this file...
-FunctionPass *llvm::createABCDPass() {
-  return new ABCD();
-}

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp Tue Oct 26 19:48:03 2010
@@ -33,7 +33,9 @@
 namespace {
   struct ADCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    ADCE() : FunctionPass(&ID) {}
+    ADCE() : FunctionPass(ID) {
+      initializeADCEPass(*PassRegistry::getPassRegistry());
+    }
     
     virtual bool runOnFunction(Function& F);
     
@@ -45,7 +47,7 @@
 }
 
 char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
 
 bool ADCE::runOnFunction(Function& F) {
   SmallPtrSet<Instruction*, 128> alive;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp Tue Oct 26 19:48:03 2010
@@ -41,7 +41,9 @@
 namespace {
   struct BlockPlacement : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    BlockPlacement() : FunctionPass(&ID) {}
+    BlockPlacement() : FunctionPass(ID) {
+      initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -74,8 +76,11 @@
 }
 
 char BlockPlacement::ID = 0;
-INITIALIZE_PASS(BlockPlacement, "block-placement",
-                "Profile Guided Basic Block Placement", false, false);
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
 
 FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -1,9 +1,9 @@
 add_llvm_library(LLVMScalarOpts
-  ABCD.cpp
   ADCE.cpp
   BasicBlockPlacement.cpp
   CodeGenPrepare.cpp
   ConstantProp.cpp
+  CorrelatedValuePropagation.cpp
   DCE.cpp
   DeadStoreElimination.cpp
   GEPSplitter.cpp
@@ -12,11 +12,11 @@
   JumpThreading.cpp
   LICM.cpp
   LoopDeletion.cpp
-  LoopIndexSplit.cpp
   LoopRotation.cpp
   LoopStrengthReduce.cpp
   LoopUnrollPass.cpp
   LoopUnswitch.cpp
+  LowerAtomic.cpp
   MemCpyOptimizer.cpp
   Reassociate.cpp
   Reg2Mem.cpp
@@ -30,5 +30,3 @@
   TailDuplication.cpp
   TailRecursionElimination.cpp
   )
-
-target_link_libraries (LLVMScalarOpts LLVMTransformUtils)

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp Tue Oct 26 19:48:03 2010
@@ -31,8 +31,10 @@
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
@@ -41,6 +43,13 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+STATISTIC(NumElim,  "Number of blocks eliminated");
+
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+                  cl::desc("Split critical edges during codegen prepare"),
+                  cl::init(false), cl::Hidden);
+
 namespace {
   class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -54,7 +63,9 @@
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetLowering *tli = 0)
-      : FunctionPass(&ID), TLI(tli) {}
+      : FunctionPass(ID), TLI(tli) {
+        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+      }
     bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -83,7 +94,7 @@
 
 char CodeGenPrepare::ID = 0;
 INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
-                "Optimize for code generation", false, false);
+                "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
   return new CodeGenPrepare(TLI);
@@ -296,6 +307,7 @@
     PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
   }
   BB->eraseFromParent();
+  ++NumElim;
 
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
@@ -427,9 +439,9 @@
   // If these values will be promoted, find out what they will be promoted
   // to.  This helps us consider truncates on PPC as noop copies when they
   // are.
-  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
-  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
 
   // If, after promotion, these are the same types, this is a noop copy.
@@ -730,43 +742,21 @@
 bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
                                            DenseMap<Value*,Value*> &SunkAddrs) {
   bool MadeChange = false;
-  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
-
-  // Do a prepass over the constraints, canonicalizing them, and building up the
-  // ConstraintOperands list.
-  std::vector<InlineAsm::ConstraintInfo>
-    ConstraintInfos = IA->ParseConstraints();
-
-  /// ConstraintOperands - Information about all of the constraints.
-  std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
-  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    ConstraintOperands.
-      push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
-    TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
-
-    // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-    case InlineAsm::isOutput:
-      if (OpInfo.isIndirect)
-        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
-      break;
-    case InlineAsm::isInput:
-      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
-      break;
-    case InlineAsm::isClobber:
-      // Nothing to do.
-      break;
-    }
 
+  std::vector<TargetLowering::AsmOperandInfo> TargetConstraints = TLI->ParseConstraints(CS);
+  unsigned ArgNo = 0;
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
     // Compute the constraint code and ConstraintType to use.
     TLI->ComputeConstraintToUse(OpInfo, SDValue());
 
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         OpInfo.isIndirect) {
-      Value *OpVal = OpInfo.CallOperandVal;
+      Value *OpVal = const_cast<Value *>(CS.getArgument(ArgNo++));
       MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
-    }
+    } else if (OpInfo.Type == InlineAsm::isInput)
+      ArgNo++;
   }
 
   return MadeChange;
@@ -788,7 +778,9 @@
   // If the load has other users and the truncate is not free, this probably
   // isn't worthwhile.
   if (!LI->hasOneUse() &&
-      TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+      !TLI->isTruncateFree(I->getType(), LI->getType()))
     return false;
 
   // Check whether the target supports casts folded into loads.
@@ -812,7 +804,7 @@
 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   BasicBlock *DefBB = I->getParent();
 
-  // If both result of the {s|z}xt and its source are live out, rewrite all
+  // If the result of a {s|z}ext and its source are both live out, rewrite all
   // other uses of the source with result of extension.
   Value *Src = I->getOperand(0);
   if (Src->hasOneUse())
@@ -891,12 +883,14 @@
   bool MadeChange = false;
 
   // Split all critical edges where the dest block has a PHI.
-  TerminatorInst *BBTI = BB.getTerminator();
-  if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
-    for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
-      BasicBlock *SuccBB = BBTI->getSuccessor(i);
-      if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
-        SplitEdgeNicely(BBTI, i, BackEdges, this);
+  if (CriticalEdgeSplit) {
+    TerminatorInst *BBTI = BB.getTerminator();
+    if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+      for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *SuccBB = BBTI->getSuccessor(i);
+        if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+          SplitEdgeNicely(BBTI, i, BackEdges, this);
+      }
     }
   }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp Tue Oct 26 19:48:03 2010
@@ -34,7 +34,9 @@
 namespace {
   struct ConstantPropagation : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    ConstantPropagation() : FunctionPass(&ID) {}
+    ConstantPropagation() : FunctionPass(ID) {
+      initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
 
@@ -46,7 +48,7 @@
 
 char ConstantPropagation::ID = 0;
 INITIALIZE_PASS(ConstantPropagation, "constprop",
-                "Simple constant propagation", false, false);
+                "Simple constant propagation", false, false)
 
 FunctionPass *llvm::createConstantPropagationPass() {
   return new ConstantPropagation();

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
   //
   struct DeadInstElimination : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
-    DeadInstElimination() : BasicBlockPass(&ID) {}
+    DeadInstElimination() : BasicBlockPass(ID) {
+      initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+    }
     virtual bool runOnBasicBlock(BasicBlock &BB) {
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -57,7 +59,7 @@
 
 char DeadInstElimination::ID = 0;
 INITIALIZE_PASS(DeadInstElimination, "die",
-                "Dead Instruction Elimination", false, false);
+                "Dead Instruction Elimination", false, false)
 
 Pass *llvm::createDeadInstEliminationPass() {
   return new DeadInstElimination();
@@ -70,7 +72,9 @@
   //
   struct DCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    DCE() : FunctionPass(&ID) {}
+    DCE() : FunctionPass(ID) {
+      initializeDCEPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -81,7 +85,7 @@
 }
 
 char DCE::ID = 0;
-INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
 
 bool DCE::runOnFunction(Function &F) {
   // Start out with all of the instructions in the worklist...

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp Tue Oct 26 19:48:03 2010
@@ -40,7 +40,9 @@
     TargetData *TD;
 
     static char ID; // Pass identification, replacement for typeid
-    DSE() : FunctionPass(&ID) {}
+    DSE() : FunctionPass(ID) {
+      initializeDSEPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F) {
       bool Changed = false;
@@ -77,12 +79,16 @@
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
 
-    unsigned getPointerSize(Value *V) const;
+    uint64_t getPointerSize(Value *V) const;
   };
 }
 
 char DSE::ID = 0;
-INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
 
 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
@@ -136,11 +142,11 @@
 }
 
 /// getStoreSize - Return the length in bytes of the write by the clobbering
-/// instruction. If variable or unknown, returns -1.
-static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+/// instruction. If variable or unknown, returns AliasAnalysis::UnknownSize.
+static uint64_t getStoreSize(Instruction *I, const TargetData *TD) {
   assert(doesClobberMemory(I));
   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-    if (!TD) return -1u;
+    if (!TD) return AliasAnalysis::UnknownSize;
     return TD->getTypeStoreSize(SI->getOperand(0)->getType());
   }
 
@@ -152,7 +158,7 @@
     switch (II->getIntrinsicID()) {
     default: assert(false && "Unexpected intrinsic!");
     case Intrinsic::init_trampoline:
-      return -1u;
+      return AliasAnalysis::UnknownSize;
     case Intrinsic::lifetime_end:
       Len = II->getArgOperand(0);
       break;
@@ -161,7 +167,7 @@
   if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
     if (!LenCI->isAllOnesValue())
       return LenCI->getZExtValue();
-  return -1u;
+  return AliasAnalysis::UnknownSize;
 }
 
 /// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
@@ -176,10 +182,12 @@
   // Exactly the same type, must have exactly the same size.
   if (I1Ty == I2Ty) return true;
   
-  int I1Size = getStoreSize(I1, TD);
-  int I2Size = getStoreSize(I2, TD);
+  uint64_t I1Size = getStoreSize(I1, TD);
+  uint64_t I2Size = getStoreSize(I2, TD);
   
-  return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
+  return I1Size != AliasAnalysis::UnknownSize &&
+         I2Size != AliasAnalysis::UnknownSize &&
+         I1Size >= I2Size;
 }
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -367,7 +375,7 @@
     }
     
     Value *killPointer = 0;
-    uint64_t killPointerSize = ~0UL;
+    uint64_t killPointerSize = AliasAnalysis::UnknownSize;
     
     // If we encounter a use of the pointer, it is no longer considered dead
     if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
@@ -559,7 +567,7 @@
   } while (!NowDeadInsts.empty());
 }
 
-unsigned DSE::getPointerSize(Value *V) const {
+uint64_t DSE::getPointerSize(Value *V) const {
   if (TD) {
     if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
       // Get size information for the alloca
@@ -571,5 +579,5 @@
       return TD->getTypeAllocSize(PT->getElementType());
     }
   }
-  return ~0U;
+  return AliasAnalysis::UnknownSize;
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp Tue Oct 26 19:48:03 2010
@@ -27,13 +27,15 @@
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit GEPSplitter() : FunctionPass(&ID) {}
+    explicit GEPSplitter() : FunctionPass(ID) {
+      initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
+    }
   };
 }
 
 char GEPSplitter::ID = 0;
 INITIALIZE_PASS(GEPSplitter, "split-geps",
-                "split complex GEPs into simple GEPs", false, false);
+                "split complex GEPs into simple GEPs", false, false)
 
 FunctionPass *llvm::createGEPSplitterPass() {
   return new GEPSplitter();

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp Tue Oct 26 19:48:03 2010
@@ -61,7 +61,6 @@
 static cl::opt<bool> EnablePRE("enable-pre",
                                cl::init(true), cl::Hidden);
 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
-static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
 
 //===----------------------------------------------------------------------===//
 //                         ValueTable Class
@@ -140,9 +139,9 @@
       }
     }
 
-    bool operator!=(const Expression &other) const {
+    /*bool operator!=(const Expression &other) const {
       return !(*this == other);
-    }
+    }*/
   };
 
   class ValueTable {
@@ -165,7 +164,6 @@
       Expression create_expression(CastInst* C);
       Expression create_expression(GetElementPtrInst* G);
       Expression create_expression(CallInst* C);
-      Expression create_expression(Constant* C);
       Expression create_expression(ExtractValueInst* C);
       Expression create_expression(InsertValueInst* C);
       
@@ -177,7 +175,6 @@
       void add(Value *V, uint32_t num);
       void clear();
       void erase(Value *v);
-      unsigned size();
       void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
       AliasAnalysis *getAliasAnalysis() const { return AA; }
       void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
@@ -665,7 +662,9 @@
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit GVN(bool noloads = false)
-      : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
+        : FunctionPass(ID), NoLoads(noloads), MD(0) {
+      initializeGVNPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     bool NoLoads;
@@ -716,7 +715,11 @@
   return new GVN(NoLoads);
 }
 
-INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
 void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   errs() << "{\n";
@@ -1311,7 +1314,7 @@
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
   SSAUpdater SSAUpdate(&NewPHIs);
-  SSAUpdate.Initialize(LI);
+  SSAUpdate.Initialize(LI->getType(), LI->getName());
   
   const Type *LoadTy = LI->getType();
   
@@ -1534,26 +1537,19 @@
       return false;
     if (Blockers.count(TmpBB))
       return false;
+    
+    // If any of these blocks has more than one successor (i.e. if the edge we
+    // just traversed was critical), then there are other paths through this 
+    // block along which the load may not be anticipated.  Hoisting the load 
+    // above this block would be adding the load to execution paths along
+    // which it was not previously executed.
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
-      allSingleSucc = false;
+      return false;
   }
 
   assert(TmpBB);
   LoadBB = TmpBB;
 
-  // If we have a repl set with LI itself in it, this means we have a loop where
-  // at least one of the values is LI.  Since this means that we won't be able
-  // to eliminate LI even if we insert uses in the other predecessors, we will
-  // end up increasing code size.  Reject this by scanning for LI.
-  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-    if (ValuesPerBlock[i].isSimpleValue() &&
-        ValuesPerBlock[i].getSimpleValue() == LI) {
-      // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
-      if (!EnableFullLoadPRE || e == 1)
-        return false;
-    }
-  }
-
   // FIXME: It is extremely unclear what this loop is doing, other than
   // artificially restricting loadpre.
   if (isSinglePred) {
@@ -1613,14 +1609,13 @@
   unsigned NumUnavailablePreds = PredLoads.size();
   assert(NumUnavailablePreds != 0 &&
          "Fully available value should be eliminated above!");
-  if (!EnableFullLoadPRE) {
-    // If this load is unavailable in multiple predecessors, reject it.
-    // FIXME: If we could restructure the CFG, we could make a common pred with
-    // all the preds that don't have an available LI and insert a new load into
-    // that one block.
-    if (NumUnavailablePreds != 1)
+  
+  // If this load is unavailable in multiple predecessors, reject it.
+  // FIXME: If we could restructure the CFG, we could make a common pred with
+  // all the preds that don't have an available LI and insert a new load into
+  // that one block.
+  if (NumUnavailablePreds != 1)
       return false;
-  }
 
   // Check if the load can safely be moved to all the unavailable predecessors.
   bool CanDoPRE = true;
@@ -2111,6 +2106,11 @@
           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
           isa<DbgInfoIntrinsic>(CurInst))
         continue;
+      
+      // We don't currently value number ANY inline asm calls.
+      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+        if (CallI->isInlineAsm())
+          continue;
 
       uint32_t ValNo = VN.lookup(CurInst);
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp Tue Oct 26 19:48:03 2010
@@ -77,7 +77,9 @@
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(&ID) {}
+    IndVarSimplify() : LoopPass(ID) {
+      initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -117,8 +119,16 @@
 }
 
 char IndVarSimplify::ID = 0;
-INITIALIZE_PASS(IndVarSimplify, "indvars",
-                "Canonicalize Induction Variables", false, false);
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false)
 
 Pass *llvm::createIndVarSimplifyPass() {
   return new IndVarSimplify();
@@ -346,7 +356,7 @@
     PHIs.push_back(PN);
 
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
-    if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
       HandleFloatingPointIV(L, PN);
 
   // If the loop previously had floating-point IV, ScalarEvolution
@@ -395,7 +405,7 @@
   // which are now dead.
   while (!DeadInsts.empty())
     if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+        dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
@@ -462,7 +472,7 @@
   // which are now dead.
   while (!DeadInsts.empty())
     if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
@@ -725,7 +735,7 @@
   // which are now dead.
   while (!DeadInsts.empty())
     if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp Tue Oct 26 19:48:03 2010
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -43,12 +44,6 @@
           cl::desc("Max block size to duplicate for jump threading"),
           cl::init(6), cl::Hidden);
 
-// Turn on use of LazyValueInfo.
-static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
-
-
-
 namespace {
   /// This pass performs 'jump threading', which looks at blocks that have
   /// multiple predecessors and multiple successors.  If one or more of the
@@ -74,15 +69,32 @@
 #else
     SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
 #endif
+    DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+    
+    // RAII helper for updating the recursion stack.
+    struct RecursionSetRemover {
+      DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+      std::pair<Value*, BasicBlock*> ThePair;
+      
+      RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+                          std::pair<Value*, BasicBlock*> P)
+        : TheSet(S), ThePair(P) { }
+      
+      ~RecursionSetRemover() {
+        TheSet.erase(ThePair);
+      }
+    };
   public:
     static char ID; // Pass identification
-    JumpThreading() : FunctionPass(&ID) {}
+    JumpThreading() : FunctionPass(ID) {
+      initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      if (EnableLVI)
-        AU.addRequired<LazyValueInfo>();
+      AU.addRequired<LazyValueInfo>();
+      AU.addPreserved<LazyValueInfo>();
     }
     
     void FindLoopHeaders(Function &F);
@@ -111,8 +123,11 @@
 }
 
 char JumpThreading::ID = 0;
-INITIALIZE_PASS(JumpThreading, "jump-threading",
-                "Jump Threading", false, false);
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
 
 // Public interface to the Jump Threading pass
 FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -122,7 +137,7 @@
 bool JumpThreading::runOnFunction(Function &F) {
   DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TD = getAnalysisIfAvailable<TargetData>();
-  LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
+  LVI = &getAnalysis<LazyValueInfo>();
   
   FindLoopHeaders(F);
   
@@ -144,6 +159,7 @@
         DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
               << "' with terminator: " << *BB->getTerminator() << '\n');
         LoopHeaders.erase(BB);
+        LVI->eraseBlock(BB);
         DeleteDeadBlock(BB);
         Changed = true;
       } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -164,6 +180,11 @@
             bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
             BasicBlock *Succ = BI->getSuccessor(0);
             
+            // FIXME: It is always conservatively correct to drop the info
+            // for a block even if it doesn't get erased.  This isn't totally
+            // awesome, but it allows us to use AssertingVH to prevent nasty
+            // dangling pointer issues within LazyValueInfo.
+            LVI->eraseBlock(BB);
             if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
               Changed = true;
               // If we deleted BB and BB was the header of a loop, then the
@@ -251,6 +272,17 @@
     LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
 }
 
+// Helper method for ComputeValueKnownInPredecessors.  If Value is a
+// ConstantInt, push it.  If it's an undef, push 0.  Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+                                                        BasicBlock*> > &Result,
+                              Constant *Value, BasicBlock* BB){
+  if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+    Result.push_back(std::make_pair(FoldedCInt, BB));
+  else if (isa<UndefValue>(Value))
+    Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
 /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
 /// if we can infer that the value is a known ConstantInt in any of our
 /// predecessors.  If so, return the known list of value and pred BB in the
@@ -260,12 +292,24 @@
 ///
 bool JumpThreading::
 ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+  // This method walks up use-def chains recursively.  Because of this, we could
+  // get into an infinite loop going around loops in the use-def chain.  To
+  // prevent this, keep track of what (value, block) pairs we've already visited
+  // and terminate the search if we loop back to them
+  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+    return false;
+  
+  // An RAII help to remove this pair from the recursion set once the recursion
+  // stack pops back out again.
+  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+  
   // If V is a constantint, then it is known in all predecessors.
   if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
     ConstantInt *CI = dyn_cast<ConstantInt>(V);
     
     for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
       Result.push_back(std::make_pair(CI, *PI));
+    
     return true;
   }
   
@@ -281,29 +325,25 @@
     /// TODO: Per PR2563, we could infer value range information about a
     /// predecessor based on its terminator.
     //
-    if (LVI) {
-      // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
-      // "I" is a non-local compare-with-a-constant instruction.  This would be
-      // able to handle value inequalities better, for example if the compare is
-      // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
-      // Perhaps getConstantOnEdge should be smart enough to do this?
-      
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        // If the value is known by LazyValueInfo to be a constant in a
-        // predecessor, use that information to try to thread this block.
-        Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
-        if (PredCst == 0 ||
-            (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
-          continue;
+    // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+    // "I" is a non-local compare-with-a-constant instruction.  This would be
+    // able to handle value inequalities better, for example if the compare is
+    // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+    // Perhaps getConstantOnEdge should be smart enough to do this?
+    
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      // If the value is known by LazyValueInfo to be a constant in a
+      // predecessor, use that information to try to thread this block.
+      Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+      if (PredCst == 0 ||
+          (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+        continue;
         
-        Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
-      }
-      
-      return !Result.empty();
+      Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
     }
-    
-    return false;
+      
+    return !Result.empty();
   }
   
   /// If I is a PHI node, then we know the incoming values for any constants.
@@ -313,8 +353,15 @@
       if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
         ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
         Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+      } else {
+        Constant *CI = LVI->getConstantOnEdge(InVal,
+                                              PN->getIncomingBlock(i), BB);
+        // LVI returns null is no value could be determined.
+        if (!CI) continue;
+        PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
       }
     }
+    
     return !Result.empty();
   }
   
@@ -338,29 +385,26 @@
       else
         InterestingVal = ConstantInt::getFalse(I->getContext());
       
+      SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+      
       // Scan for the sentinel.  If we find an undef, force it to the
       // interesting value: x|undef -> true and x&undef -> false.
       for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
         if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
           Result.push_back(LHSVals[i]);
           Result.back().first = InterestingVal;
+          LHSKnownBBs.insert(LHSVals[i].second);
         }
       for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
         if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
           // If we already inferred a value for this block on the LHS, don't
           // re-add it.
-          bool HasValue = false;
-          for (unsigned r = 0, e = Result.size(); r != e; ++r)
-            if (Result[r].second == RHSVals[i].second) {
-              HasValue = true;
-              break;
-            }
-          
-          if (!HasValue) {
+          if (!LHSKnownBBs.count(RHSVals[i].second)) {
             Result.push_back(RHSVals[i]);
             Result.back().first = InterestingVal;
           }
         }
+      
       return !Result.empty();
     }
     
@@ -377,8 +421,27 @@
         if (Result[i].first)
           Result[i].first =
             cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+      
       return true;
     }
+  
+  // Try to simplify some other binary operator values.
+  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+      SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+    
+      // Try to use constant folding to simplify the binary operator.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+        Constant *V = LHSVals[i].first;
+        if (V == 0) V = UndefValue::get(BO->getType());
+        Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+        
+        PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+      }
+    }
+      
+    return !Result.empty();
   }
   
   // Handle compare with phi operand, where the PHI is defined in this block.
@@ -394,7 +457,7 @@
         
         Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
         if (Res == 0) {
-          if (!LVI || !isa<Constant>(RHS))
+          if (!isa<Constant>(RHS))
             continue;
           
           LazyValueInfo::Tristate 
@@ -405,10 +468,8 @@
           Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
         }
         
-        if (isa<UndefValue>(Res))
-          Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
-        else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
-          Result.push_back(std::make_pair(CI, PredBB));
+        if (Constant *ConstRes = dyn_cast<Constant>(Res))
+          PushConstantIntOrUndef(Result, ConstRes, PredBB);
       }
       
       return !Result.empty();
@@ -417,30 +478,56 @@
     
     // If comparing a live-in value against a constant, see if we know the
     // live-in value on any predecessors.
-    if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
-        Cmp->getType()->isIntegerTy() && // Not vector compare.
-        (!isa<Instruction>(Cmp->getOperand(0)) ||
-         cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
-      Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
-
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        // If the value is known by LazyValueInfo to be a constant in a
-        // predecessor, use that information to try to thread this block.
-        LazyValueInfo::Tristate
-          Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
-                                        RHSCst, P, BB);
-        if (Res == LazyValueInfo::Unknown)
-          continue;
+    if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
+      if (!isa<Instruction>(Cmp->getOperand(0)) ||
+          cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+        Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+          BasicBlock *P = *PI;
+          // If the value is known by LazyValueInfo to be a constant in a
+          // predecessor, use that information to try to thread this block.
+          LazyValueInfo::Tristate Res =
+            LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+                                    RHSCst, P, BB);
+          if (Res == LazyValueInfo::Unknown)
+            continue;
 
-        Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
-        Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
-      }
+          Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+          Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+        }
 
-      return !Result.empty();
+        return !Result.empty();
+      }
+      
+      // Try to find a constant value for the LHS of a comparison,
+      // and evaluate it statically if we can.
+      if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+        SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+        ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+        
+        for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+          Constant *V = LHSVals[i].first;
+          if (V == 0) V = UndefValue::get(CmpConst->getType());
+          Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+                                                      V, CmpConst);
+          PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+        }
+        
+        return !Result.empty();
+      }
     }
   }
-  return false;
+  
+  // If all else fails, see if LVI can figure out a constant value for us.
+  Constant *CI = LVI->getConstant(V, BB);
+  ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+  if (CInt) {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(CInt, *PI));
+  }
+    
+  return !Result.empty();
 }
 
 
@@ -490,6 +577,7 @@
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      LVI->eraseBlock(SinglePred);
       MergeBasicBlockIntoOnlyPred(BB);
       
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
@@ -530,7 +618,7 @@
     TerminatorInst *BBTerm = BB->getTerminator();
     for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
       if (i == BestSucc) continue;
-      RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
+      BBTerm->getSuccessor(i)->removePredecessor(BB, true);
     }
     
     DEBUG(dbgs() << "  In block '" << BB->getName()
@@ -542,65 +630,50 @@
   
   Instruction *CondInst = dyn_cast<Instruction>(Condition);
 
-  // If the condition is an instruction defined in another block, see if a
-  // predecessor has the same condition:
-  //     br COND, BBX, BBY
-  //  BBX:
-  //     br COND, BBZ, BBW
-  if (!LVI &&
-      !Condition->hasOneUse() && // Multiple uses.
-      (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
-    pred_iterator PI = pred_begin(BB), E = pred_end(BB);
-    if (isa<BranchInst>(BB->getTerminator())) {
-      for (; PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
-          if (PBI->isConditional() && PBI->getCondition() == Condition &&
-              ProcessBranchOnDuplicateCond(P, BB))
-            return true;
-      }
-    } else {
-      assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
-      for (; PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
-          if (PSI->getCondition() == Condition &&
-              ProcessSwitchOnDuplicateCond(P, BB))
-            return true;
-      }
-    }
-  }
-
   // All the rest of our checks depend on the condition being an instruction.
   if (CondInst == 0) {
     // FIXME: Unify this with code below.
-    if (LVI && ProcessThreadableEdges(Condition, BB))
+    if (ProcessThreadableEdges(Condition, BB))
       return true;
     return false;
   }  
     
   
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
-    if (!LVI &&
-        (!isa<PHINode>(CondCmp->getOperand(0)) ||
-         cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
-      // If we have a comparison, loop over the predecessors to see if there is
-      // a condition with a lexically identical value.
-      pred_iterator PI = pred_begin(BB), E = pred_end(BB);
-      for (; PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
-          if (PBI->isConditional() && P != BB) {
-            if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
-              if (CI->getOperand(0) == CondCmp->getOperand(0) &&
-                  CI->getOperand(1) == CondCmp->getOperand(1) &&
-                  CI->getPredicate() == CondCmp->getPredicate()) {
-                // TODO: Could handle things like (x != 4) --> (x == 17)
-                if (ProcessBranchOnDuplicateCond(P, BB))
-                  return true;
-              }
-            }
-          }
+    // For a comparison where the LHS is outside this block, it's possible
+    // that we've branched on it before.  Used LVI to see if we can simplify
+    // the branch based on that.
+    BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+    Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+        (!isa<Instruction>(CondCmp->getOperand(0)) ||
+         cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+      // For predecessor edge, determine if the comparison is true or false
+      // on that edge.  If they're all true or all false, we can simplify the
+      // branch.
+      // FIXME: We could handle mixed true/false by duplicating code.
+      LazyValueInfo::Tristate Baseline =      
+        LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+                                CondConst, *PI, BB);
+      if (Baseline != LazyValueInfo::Unknown) {
+        // Check that all remaining incoming values match the first one.
+        while (++PI != PE) {
+          LazyValueInfo::Tristate Ret =
+            LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+                                    CondCmp->getOperand(0), CondConst, *PI, BB);
+          if (Ret != Baseline) break;
+        }
+        
+        // If we terminated early, then one of the values didn't match.
+        if (PI == PE) {
+          unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+          unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+          CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+          BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+          CondBr->eraseFromParent();
+          return true;
+        }
       }
     }
   }
@@ -1020,6 +1093,7 @@
   SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
   if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
     return false;
+  
   assert(!PredValues.empty() &&
          "ComputeValueKnownInPredecessors returned true with no values");
 
@@ -1314,8 +1388,7 @@
         << ", across block:\n    "
         << *BB << "\n");
   
-  if (LVI)
-    LVI->threadEdge(PredBB, BB, SuccBB);
+  LVI->threadEdge(PredBB, BB, SuccBB);
   
   // We are going to have to map operands from the original BB block to the new
   // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
@@ -1386,7 +1459,7 @@
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
-    SSAUpdate.Initialize(I);
+    SSAUpdate.Initialize(I->getType(), I->getName());
     SSAUpdate.AddAvailableValue(BB, I);
     SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
     
@@ -1402,7 +1475,7 @@
   TerminatorInst *PredTerm = PredBB->getTerminator();
   for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
     if (PredTerm->getSuccessor(i) == BB) {
-      RemovePredecessorAndSimplify(BB, PredBB, TD);
+      BB->removePredecessor(PredBB, true);
       PredTerm->setSuccessor(i, NewBB);
     }
   
@@ -1541,7 +1614,7 @@
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
-    SSAUpdate.Initialize(I);
+    SSAUpdate.Initialize(I->getType(), I->getName());
     SSAUpdate.AddAvailableValue(BB, I);
     SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
     
@@ -1552,7 +1625,7 @@
   
   // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
   // that we nuked.
-  RemovePredecessorAndSimplify(BB, PredBB, TD);
+  BB->removePredecessor(PredBB, true);
   
   // Remove the unconditional branch at the end of the PredBB block.
   OldPredBranch->eraseFromParent();

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp Tue Oct 26 19:48:03 2010
@@ -26,8 +26,7 @@
 //          pointer.  There are no calls in the loop which mod/ref the pointer.
 //     If these conditions are true, we can promote the loads and stores in the
 //     loop of the pointer to use a temporary alloca'd variable.  We then use
-//     the mem2reg functionality to construct the appropriate SSA form for the
-//     variable.
+//     the SSAUpdater to construct the appropriate SSA form for the value.
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,14 +36,16 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -66,7 +67,9 @@
 namespace {
   struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LICM() : LoopPass(&ID) {}
+    LICM() : LoopPass(ID) {
+      initializeLICMPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -76,38 +79,30 @@
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();  // For scalar promotion (mem2reg)
       AU.addRequired<LoopInfo>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
-      AU.addPreserved<DominanceFrontier>();
       AU.addPreservedID(LoopSimplifyID);
     }
 
     bool doFinalization() {
-      // Free the values stored in the map
-      for (std::map<Loop *, AliasSetTracker *>::iterator
-             I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
-        delete I->second;
-
-      LoopToAliasMap.clear();
+      assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
       return false;
     }
 
   private:
-    // Various analyses that we use...
     AliasAnalysis *AA;       // Current AliasAnalysis information
     LoopInfo      *LI;       // Current LoopInfo
-    DominatorTree *DT;       // Dominator Tree for the current Loop...
-    DominanceFrontier *DF;   // Current Dominance Frontier
+    DominatorTree *DT;       // Dominator Tree for the current Loop.
 
-    // State that is updated as we process loops
+    // State that is updated as we process loops.
     bool Changed;            // Set to true when we change anything.
     BasicBlock *Preheader;   // The preheader block of the current loop...
     Loop *CurLoop;           // The current loop we are working on...
     AliasSetTracker *CurAST; // AliasSet information for the current loop...
-    std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+    DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
 
     /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
     void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
@@ -195,34 +190,26 @@
     /// pointerInvalidatedByLoop - Return true if the body of this loop may
     /// store into the memory location pointed to by V.
     ///
-    bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+    bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+                                  const MDNode *TBAAInfo) {
       // Check to see if any of the basic blocks in CurLoop invalidate *V.
-      return CurAST->getAliasSetForPointer(V, Size).isMod();
+      return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
     }
 
     bool canSinkOrHoistInst(Instruction &I);
-    bool isLoopInvariantInst(Instruction &I);
     bool isNotUsedInLoop(Instruction &I);
 
-    /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
-    /// to scalars as we can.
-    ///
-    void PromoteValuesInLoop();
-
-    /// FindPromotableValuesInLoop - Check the current loop for stores to
-    /// definite pointers, which are not loaded and stored through may aliases.
-    /// If these are found, create an alloca for the value, add it to the
-    /// PromotedValues list, and keep track of the mapping from value to
-    /// alloca...
-    ///
-    void FindPromotableValuesInLoop(
-                   std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
-                                    std::map<Value*, AllocaInst*> &Val2AlMap);
+    void PromoteAliasSet(AliasSet &AS);
   };
 }
 
 char LICM::ID = 0;
-INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 
 Pass *llvm::createLICMPass() { return new LICM(); }
 
@@ -236,19 +223,23 @@
   // Get our Loop and Alias Analysis information...
   LI = &getAnalysis<LoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
-  DF = &getAnalysis<DominanceFrontier>();
   DT = &getAnalysis<DominatorTree>();
 
   CurAST = new AliasSetTracker(*AA);
-  // Collect Alias info from subloops
+  // Collect Alias info from subloops.
   for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
        LoopItr != LoopItrE; ++LoopItr) {
     Loop *InnerL = *LoopItr;
-    AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
-    assert (InnerAST && "Where is my AST?");
+    AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+    assert(InnerAST && "Where is my AST?");
 
     // What if InnerLoop was modified by other passes ?
     CurAST->add(*InnerAST);
+    
+    // Once we've incorporated the inner loop's AST into ours, we don't need the
+    // subloop's anymore.
+    delete InnerAST;
+    LoopToAliasSetMap.erase(InnerL);
   }
   
   CurLoop = L;
@@ -263,7 +254,7 @@
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I) {
     BasicBlock *BB = *I;
-    if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops...
+    if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
       CurAST->add(*BB);                 // Incorporate the specified basic block
   }
 
@@ -283,15 +274,24 @@
     HoistRegion(DT->getNode(L->getHeader()));
 
   // Now that all loop invariants have been removed from the loop, promote any
-  // memory references to scalars that we can...
-  if (!DisablePromotion && Preheader && L->hasDedicatedExits())
-    PromoteValuesInLoop();
-
+  // memory references to scalars that we can.
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+    // Loop over all of the alias sets in the tracker object.
+    for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+         I != E; ++I)
+      PromoteAliasSet(*I);
+  }
+  
   // Clear out loops state information for the next iteration
   CurLoop = 0;
   Preheader = 0;
 
-  LoopToAliasMap[L] = CurAST;
+  // If this loop is nested inside of another one, save the alias information
+  // for when we process the outer loop.
+  if (L->getParentLoop())
+    LoopToAliasSetMap[L] = CurAST;
+  else
+    delete CurAST;
   return Changed;
 }
 
@@ -308,7 +308,7 @@
   // If this subregion is not in the top level loop at all, exit.
   if (!CurLoop->contains(BB)) return;
 
-  // We are processing blocks in reverse dfo, so process children first...
+  // We are processing blocks in reverse dfo, so process children first.
   const std::vector<DomTreeNode*> &Children = N->getChildren();
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
     SinkRegion(Children[i]);
@@ -319,6 +319,17 @@
 
   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
     Instruction &I = *--II;
+    
+    // If the instruction is dead, we would try to sink it because it isn't used
+    // in the loop, instead, just delete it.
+    if (isInstructionTriviallyDead(&I)) {
+      DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+      ++II;
+      CurAST->deleteValue(&I);
+      I.eraseFromParent();
+      Changed = true;
+      continue;
+    }
 
     // Check to see if we can sink this instruction to the exit blocks
     // of the loop.  We can do this if the all users of the instruction are
@@ -350,14 +361,26 @@
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
       Instruction &I = *II++;
 
+      // Try constant folding this instruction.  If all the operands are
+      // constants, it is technically hoistable, but it would be better to just
+      // fold it.
+      if (Constant *C = ConstantFoldInstruction(&I)) {
+        DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
+        CurAST->copyValue(&I, C);
+        CurAST->deleteValue(&I);
+        I.replaceAllUsesWith(C);
+        I.eraseFromParent();
+        continue;
+      }
+      
       // Try hoisting the instruction out to the preheader.  We can only do this
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
       //
-      if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+      if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
           isSafeToExecuteUnconditionally(I))
         hoist(I);
-      }
+    }
 
   const std::vector<DomTreeNode*> &Children = N->getChildren();
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -379,10 +402,11 @@
       return true;
     
     // Don't hoist loads which have may-aliased stores in loop.
-    unsigned Size = 0;
+    uint64_t Size = 0;
     if (LI->getType()->isSized())
       Size = AA->getTypeStoreSize(LI->getType());
-    return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+    return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+                                     LI->getMetadata(LLVMContext::MD_tbaa));
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
     // Handle obvious cases efficiently.
     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
@@ -437,20 +461,6 @@
 }
 
 
-/// isLoopInvariantInst - Return true if all operands of this instruction are
-/// loop invariant.  We also filter out non-hoistable instructions here just for
-/// efficiency.
-///
-bool LICM::isLoopInvariantInst(Instruction &I) {
-  // The instruction is loop invariant if all of its operands are loop-invariant
-  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-    if (!CurLoop->isLoopInvariant(I.getOperand(i)))
-      return false;
-
-  // If we got this far, the instruction is loop invariant!
-  return true;
-}
-
 /// sink - When an instruction is found to only be used outside of the loop,
 /// this function moves it to the exit blocks and patches up SSA form as needed.
 /// This method is guaranteed to remove the original instruction from its
@@ -460,7 +470,7 @@
   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getExitBlocks(ExitBlocks);
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
 
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -477,122 +487,101 @@
       // If I has users in unreachable blocks, eliminate.
       // If I is not void type then replaceAllUsesWith undef.
       // This allows ValueHandlers and custom metadata to adjust itself.
-      if (!I.getType()->isVoidTy())
+      if (!I.use_empty())
         I.replaceAllUsesWith(UndefValue::get(I.getType()));
       I.eraseFromParent();
     } else {
       // Move the instruction to the start of the exit block, after any PHI
       // nodes in it.
-      I.removeFromParent();
-      BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
-      ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+      I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+      // This instruction is no longer in the AST for the current loop, because
+      // we just sunk it out of the loop.  If we just sunk it into an outer
+      // loop, we will rediscover the operation when we process it.
+      CurAST->deleteValue(&I);
     }
-  } else if (ExitBlocks.empty()) {
+    return;
+  }
+  
+  if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
     // If I has users in unreachable blocks, eliminate.
     // If I is not void type then replaceAllUsesWith undef.
     // This allows ValueHandlers and custom metadata to adjust itself.
-    if (!I.getType()->isVoidTy())
+    if (!I.use_empty())
       I.replaceAllUsesWith(UndefValue::get(I.getType()));
     I.eraseFromParent();
-  } else {
-    // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
-    // do all of the hard work of inserting PHI nodes as necessary.  We convert
-    // the value into a stack object to get it to do this.
-
-    // Firstly, we create a stack object to hold the value...
-    AllocaInst *AI = 0;
-
-    if (!I.getType()->isVoidTy()) {
-      AI = new AllocaInst(I.getType(), 0, I.getName(),
-                          I.getParent()->getParent()->getEntryBlock().begin());
-      CurAST->add(AI);
-    }
-
-    // Secondly, insert load instructions for each use of the instruction
-    // outside of the loop.
-    while (!I.use_empty()) {
-      Instruction *U = cast<Instruction>(I.use_back());
-
-      // If the user is a PHI Node, we actually have to insert load instructions
-      // in all predecessor blocks, not in the PHI block itself!
-      if (PHINode *UPN = dyn_cast<PHINode>(U)) {
-        // Only insert into each predecessor once, so that we don't have
-        // different incoming values from the same block!
-        std::map<BasicBlock*, Value*> InsertedBlocks;
-        for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
-          if (UPN->getIncomingValue(i) == &I) {
-            BasicBlock *Pred = UPN->getIncomingBlock(i);
-            Value *&PredVal = InsertedBlocks[Pred];
-            if (!PredVal) {
-              // Insert a new load instruction right before the terminator in
-              // the predecessor block.
-              PredVal = new LoadInst(AI, "", Pred->getTerminator());
-              CurAST->add(cast<LoadInst>(PredVal));
-            }
-
-            UPN->setIncomingValue(i, PredVal);
-          }
-
-      } else {
-        LoadInst *L = new LoadInst(AI, "", U);
-        U->replaceUsesOfWith(&I, L);
-        CurAST->add(L);
-      }
-    }
-
-    // Thirdly, insert a copy of the instruction in each exit block of the loop
-    // that is dominated by the instruction, storing the result into the memory
-    // location.  Be careful not to insert the instruction into any particular
-    // basic block more than once.
-    std::set<BasicBlock*> InsertedBlocks;
-    BasicBlock *InstOrigBB = I.getParent();
-
-    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-      BasicBlock *ExitBlock = ExitBlocks[i];
-
-      if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
-        // If we haven't already processed this exit block, do so now.
-        if (InsertedBlocks.insert(ExitBlock).second) {
-          // Insert the code after the last PHI node...
-          BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
-          // If this is the first exit block processed, just move the original
-          // instruction, otherwise clone the original instruction and insert
-          // the copy.
-          Instruction *New;
-          if (InsertedBlocks.size() == 1) {
-            I.removeFromParent();
-            ExitBlock->getInstList().insert(InsertPt, &I);
-            New = &I;
-          } else {
-            New = I.clone();
-            CurAST->copyValue(&I, New);
-            if (!I.getName().empty())
-              New->setName(I.getName()+".le");
-            ExitBlock->getInstList().insert(InsertPt, New);
-          }
-
-          // Now that we have inserted the instruction, store it into the alloca
-          if (AI) new StoreInst(New, AI, InsertPt);
-        }
-      }
-    }
-
-    // If the instruction doesn't dominate any exit blocks, it must be dead.
-    if (InsertedBlocks.empty()) {
-      CurAST->deleteValue(&I);
-      I.eraseFromParent();
-    }
-
-    // Finally, promote the fine value to SSA form.
-    if (AI) {
-      std::vector<AllocaInst*> Allocas;
-      Allocas.push_back(AI);
-      PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+    return;
+  }
+  
+  // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+  // hard work of inserting PHI nodes as necessary.
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  
+  if (!I.use_empty())
+    SSA.Initialize(I.getType(), I.getName());
+  
+  // Insert a copy of the instruction in each exit block of the loop that is
+  // dominated by the instruction.  Each exit block is known to only be in the
+  // ExitBlocks list once.
+  BasicBlock *InstOrigBB = I.getParent();
+  unsigned NumInserted = 0;
+  
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    
+    if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+      continue;
+    
+    // Insert the code after the last PHI node.
+    BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+    
+    // If this is the first exit block processed, just move the original
+    // instruction, otherwise clone the original instruction and insert
+    // the copy.
+    Instruction *New;
+    if (NumInserted++ == 0) {
+      I.moveBefore(InsertPt);
+      New = &I;
+    } else {
+      New = I.clone();
+      if (!I.getName().empty())
+        New->setName(I.getName()+".le");
+      ExitBlock->getInstList().insert(InsertPt, New);
     }
+    
+    // Now that we have inserted the instruction, inform SSAUpdater.
+    if (!I.use_empty())
+      SSA.AddAvailableValue(ExitBlock, New);
+  }
+  
+  // If the instruction doesn't dominate any exit blocks, it must be dead.
+  if (NumInserted == 0) {
+    CurAST->deleteValue(&I);
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    I.eraseFromParent();
+    return;
   }
+  
+  // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+    // Grab the use before incrementing the iterator.
+    Use &U = UI.getUse();
+    // Increment the iterator before removing the use from the list.
+    ++UI;
+    SSA.RewriteUseAfterInsertions(U);
+  }
+  
+  // Update CurAST for NewPHIs if I had pointer type.
+  if (I.getType()->isPointerTy())
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(&I, NewPHIs[i]);
+  
+  // Finally, remove the instruction from CurAST.  It is no longer in the loop.
+  CurAST->deleteValue(&I);
 }
 
 /// hoist - When an instruction is found to only use loop invariant operands
@@ -602,12 +591,8 @@
   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
         << I << "\n");
 
-  // Remove the instruction from its current basic block... but don't delete the
-  // instruction.
-  I.removeFromParent();
-
-  // Insert the new node in Preheader, before the terminator.
-  Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+  // Move the new node to the Preheader, before its terminator.
+  I.moveBefore(Preheader->getTerminator());
 
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -647,223 +632,269 @@
   return true;
 }
 
-
-/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
 /// stores out of the loop and moving loads to before the loop.  We do this by
 /// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant.  We promote these memory locations to use allocas
-/// instead.  These allocas can easily be raised to register values by the
-/// PromoteMem2Reg functionality.
+/// which are loop invariant.
 ///
-void LICM::PromoteValuesInLoop() {
-  // PromotedValues - List of values that are promoted out of the loop.  Each
-  // value has an alloca instruction for it, and a canonical version of the
-  // pointer.
-  std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
-  std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
-
-  FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
-  if (ValueToAllocaMap.empty()) return;   // If there are values to promote.
-
-  Changed = true;
-  NumPromoted += PromotedValues.size();
-
-  std::vector<Value*> PointerValueNumbers;
-
-  // Emit a copy from the value into the alloca'd value in the loop preheader
-  TerminatorInst *LoopPredInst = Preheader->getTerminator();
-  for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
-    Value *Ptr = PromotedValues[i].second;
-
-    // If we are promoting a pointer value, update alias information for the
-    // inserted load.
-    Value *LoadValue = 0;
-    if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
-      // Locate a load or store through the pointer, and assign the same value
-      // to LI as we are loading or storing.  Since we know that the value is
-      // stored in this loop, this will always succeed.
-      for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
-           UI != E; ++UI) {
-        User *U = *UI;
-        if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-          LoadValue = LI;
-          break;
-        } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-          if (SI->getOperand(1) == Ptr) {
-            LoadValue = SI->getOperand(0);
-            break;
-          }
-        }
-      }
-      assert(LoadValue && "No store through the pointer found!");
-      PointerValueNumbers.push_back(LoadValue);  // Remember this for later.
-    }
-
-    // Load from the memory we are promoting.
-    LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
-
-    if (LoadValue) CurAST->copyValue(LoadValue, LI);
-
-    // Store into the temporary alloca.
-    new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
-  }
+void LICM::PromoteAliasSet(AliasSet &AS) {
+  // We can promote this alias set if it has a store, if it is a "Must" alias
+  // set, if the pointer is loop invariant, and if we are not eliminating any
+  // volatile loads or stores.
+  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+    return;
+  
+  assert(!AS.empty() &&
+         "Must alias set should have at least one pointer element in it!");
+  Value *SomePtr = AS.begin()->getValue();
 
-  // Scan the basic blocks in the loop, replacing uses of our pointers with
-  // uses of the allocas in question.
+  // It isn't safe to promote a load/store from the loop if the load/store is
+  // conditional.  For example, turning:
   //
-  for (Loop::block_iterator I = CurLoop->block_begin(),
-         E = CurLoop->block_end(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    // Rewrite all loads and stores in the block of the pointer...
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
-        std::map<Value*, AllocaInst*>::iterator
-          I = ValueToAllocaMap.find(L->getOperand(0));
-        if (I != ValueToAllocaMap.end())
-          L->setOperand(0, I->second);    // Rewrite load instruction...
-      } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
-        std::map<Value*, AllocaInst*>::iterator
-          I = ValueToAllocaMap.find(S->getOperand(1));
-        if (I != ValueToAllocaMap.end())
-          S->setOperand(1, I->second);    // Rewrite store instruction...
-      }
-    }
-  }
-
-  // Now that the body of the loop uses the allocas instead of the original
-  // memory locations, insert code to copy the alloca value back into the
-  // original memory location on all exits from the loop.  Note that we only
-  // want to insert one copy of the code in each exit block, though the loop may
-  // exit to the same block more than once.
+  //    for () { if (c) *P += 1; }
   //
-  SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
-
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getExitBlocks(ExitBlocks);
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-    if (!ProcessedBlocks.insert(ExitBlocks[i]))
-      continue;
-  
-    // Copy all of the allocas into their memory locations.
-    BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
-    Instruction *InsertPos = BI;
-    unsigned PVN = 0;
-    for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
-      // Load from the alloca.
-      LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
-      // If this is a pointer type, update alias info appropriately.
-      if (LI->getType()->isPointerTy())
-        CurAST->copyValue(PointerValueNumbers[PVN++], LI);
-
-      // Store into the memory we promoted.
-      new StoreInst(LI, PromotedValues[i].second, InsertPos);
-    }
-  }
-
-  // Now that we have done the deed, use the mem2reg functionality to promote
-  // all of the new allocas we just created into real SSA registers.
+  // into:
   //
-  std::vector<AllocaInst*> PromotedAllocas;
-  PromotedAllocas.reserve(PromotedValues.size());
-  for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
-    PromotedAllocas.push_back(PromotedValues[i].first);
-  PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
-}
-
-/// FindPromotableValuesInLoop - Check the current loop for stores to definite
-/// pointers, which are not loaded and stored through may aliases and are safe
-/// for promotion.  If these are found, create an alloca for the value, add it 
-/// to the PromotedValues list, and keep track of the mapping from value to 
-/// alloca. 
-void LICM::FindPromotableValuesInLoop(
-                   std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
-                             std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
-  Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
+  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
+  //
+  // is not safe, because *P may only be valid to access if 'c' is true.
+  // 
+  // It is safe to promote P if all uses are direct load/stores and if at
+  // least one is guaranteed to be executed.
+  bool GuaranteedToExecute = false;
+  
+  SmallVector<Instruction*, 64> LoopUses;
+  SmallPtrSet<Value*, 4> PointerMustAliases;
 
-  // Loop over all of the alias sets in the tracker object.
-  for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
-       I != E; ++I) {
-    AliasSet &AS = *I;
-    // We can promote this alias set if it has a store, if it is a "Must" alias
-    // set, if the pointer is loop invariant, and if we are not eliminating any
-    // volatile loads or stores.
-    if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
-        AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
-      continue;
+  // Check that all of the pointers in the alias set have the same type.  We
+  // cannot (yet) promote a memory location that is loaded and stored in
+  // different sizes.
+  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+    Value *ASIV = ASI->getValue();
+    PointerMustAliases.insert(ASIV);
     
-    assert(!AS.empty() &&
-           "Must alias set should have at least one pointer element in it!");
-    Value *V = AS.begin()->getValue();
-
     // Check that all of the pointers in the alias set have the same type.  We
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
-    {
-      bool PointerOk = true;
-      for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
-        if (V->getType() != I->getValue()->getType()) {
-          PointerOk = false;
-          break;
-        }
-      if (!PointerOk)
-        continue;
-    }
-
-    // It isn't safe to promote a load/store from the loop if the load/store is
-    // conditional.  For example, turning:
-    //
-    //    for () { if (c) *P += 1; }
-    //
-    // into:
-    //
-    //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
-    //
-    // is not safe, because *P may only be valid to access if 'c' is true.
-    // 
-    // It is safe to promote P if all uses are direct load/stores and if at
-    // least one is guaranteed to be executed.
-    bool GuaranteedToExecute = false;
-    bool InvalidInst = false;
-    for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+    if (SomePtr->getType() != ASIV->getType())
+      return;
+    
+    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
          UI != UE; ++UI) {
-      // Ignore instructions not in this loop.
+      // Ignore instructions that are outside the loop.
       Instruction *Use = dyn_cast<Instruction>(*UI);
       if (!Use || !CurLoop->contains(Use))
         continue;
-
-      if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
-        InvalidInst = true;
-        break;
-      }
+      
+      // If there is an non-load/store instruction in the loop, we can't promote
+      // it.
+      if (isa<LoadInst>(Use))
+        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+      else if (isa<StoreInst>(Use)) {
+        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+        if (Use->getOperand(0) == ASIV) return;
+      } else
+        return; // Not a load or store.
       
       if (!GuaranteedToExecute)
         GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+      
+      LoopUses.push_back(Use);
     }
+  }
+  
+  // If there isn't a guaranteed-to-execute instruction, we can't promote.
+  if (!GuaranteedToExecute)
+    return;
+  
+  // Otherwise, this is safe to promote, lets do it!
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
+  Changed = true;
+  ++NumPromoted;
 
-    // If there is an non-load/store instruction in the loop, we can't promote
-    // it.  If there isn't a guaranteed-to-execute instruction, we can't
-    // promote.
-    if (InvalidInst || !GuaranteedToExecute)
+  // We use the SSAUpdater interface to insert phi nodes as required.
+  SmallVector<PHINode*, 16> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  
+  // It wants to know some value of the same type as what we'll be inserting.
+  Value *SomeValue;
+  if (isa<LoadInst>(LoopUses[0]))
+    SomeValue = LoopUses[0];
+  else
+    SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+  SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+  // First step: bucket up uses of the pointers by the block they occur in.
+  // This is important because we have to handle multiple defs/uses in a block
+  // ourselves: SSAUpdater is purely for cross-block references.
+  // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+    Instruction *User = LoopUses[i];
+    UsesByBlock[User->getParent()].push_back(User);
+  }
+  
+  // Okay, now we can iterate over all the blocks in the loop with uses,
+  // processing them.  Keep track of which loads are loading a live-in value.
+  SmallVector<LoadInst*, 32> LiveInLoads;
+  DenseMap<Value*, Value*> ReplacedLoads;
+  
+  for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+    Instruction *User = LoopUses[LoopUse];
+    std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+    
+    // If this block has already been processed, ignore this repeat use.
+    if (BlockUses.empty()) continue;
+    
+    // Okay, this is the first use in the block.  If this block just has a
+    // single user in it, we can rewrite it trivially.
+    if (BlockUses.size() == 1) {
+      // If it is a store, it is a trivial def of the value in the block.
+      if (isa<StoreInst>(User)) {
+        SSA.AddAvailableValue(User->getParent(),
+                              cast<StoreInst>(User)->getOperand(0));
+      } else {
+        // Otherwise it is a load, queue it to rewrite as a live-in load.
+        LiveInLoads.push_back(cast<LoadInst>(User));
+      }
+      BlockUses.clear();
       continue;
+    }
+    
+    // Otherwise, check to see if this block is all loads.  If so, we can queue
+    // them all as live in loads.
+    bool HasStore = false;
+    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+      if (isa<StoreInst>(BlockUses[i])) {
+        HasStore = true;
+        break;
+      }
+    }
     
-    const Type *Ty = cast<PointerType>(V->getType())->getElementType();
-    AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
-    PromotedValues.push_back(std::make_pair(AI, V));
+    if (!HasStore) {
+      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      BlockUses.clear();
+      continue;
+    }
 
-    // Update the AST and alias analysis.
-    CurAST->copyValue(V, AI);
+    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+    // Since SSAUpdater is purely for cross-block values, we need to determine
+    // the order of these instructions in the block.  If the first use in the
+    // block is a load, then it uses the live in value.  The last store defines
+    // the live out value.  We handle this by doing a linear scan of the block.
+    BasicBlock *BB = User->getParent();
+    Value *StoredValue = 0;
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+        // If this is a load from an unrelated pointer, ignore it.
+        if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+        // If we haven't seen a store yet, this is a live in use, otherwise
+        // use the stored value.
+        if (StoredValue) {
+          L->replaceAllUsesWith(StoredValue);
+          ReplacedLoads[L] = StoredValue;
+        } else {
+          LiveInLoads.push_back(L);
+        }
+        continue;
+      }
+      
+      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+        // If this is a store to an unrelated pointer, ignore it.
+        if (!PointerMustAliases.count(S->getOperand(1))) continue;
 
-    for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
-      ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+        // Remember that this is the active value in the block.
+        StoredValue = S->getOperand(0);
+      }
+    }
+    
+    // The last stored value that happened is the live-out for the block.
+    assert(StoredValue && "Already checked that there is a store in block");
+    SSA.AddAvailableValue(BB, StoredValue);
+    BlockUses.clear();
+  }
+  
+  // Now that all the intra-loop values are classified, set up the preheader.
+  // It gets a load of the pointer we're promoting, and it is the live-out value
+  // from the preheader.
+  LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+                                         Preheader->getTerminator());
+  SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+  // Now that the preheader is good to go, set up the exit blocks.  Each exit
+  // block gets a store of the live-out values that feed them.  Since we've
+  // already told the SSA updater about the defs in the loop and the preheader
+  // definition, it is all set and we can start using it.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+    Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+    new StoreInst(LiveInValue, SomePtr, InsertPos);
+  }
 
-    DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
+  // Okay, now we rewrite all loads that use live-in values in the loop,
+  // inserting PHI nodes as necessary.
+  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+    LoadInst *ALoad = LiveInLoads[i];
+    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+    ALoad->replaceAllUsesWith(NewVal);
+    CurAST->copyValue(ALoad, NewVal);
+    ReplacedLoads[ALoad] = NewVal;
+  }
+  
+  // If the preheader load is itself a pointer, we need to tell alias analysis
+  // about the new pointer we created in the preheader block and about any PHI
+  // nodes that just got inserted.
+  if (PreheaderLoad->getType()->isPointerTy()) {
+    // Copy any value stored to or loaded from a must-alias of the pointer.
+    CurAST->copyValue(SomeValue, PreheaderLoad);
+    
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(SomeValue, NewPHIs[i]);
   }
+  
+  // Now that everything is rewritten, delete the old instructions from the body
+  // of the loop.  They should all be dead now.
+  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+    Instruction *User = LoopUses[i];
+    
+    // If this is a load that still has uses, then the load must have been added
+    // as a live value in the SSAUpdate data structure for a block (e.g. because
+    // the loaded value was stored later).  In this case, we need to recursively
+    // propagate the updates until we get to the real value.
+    if (!User->use_empty()) {
+      Value *NewVal = ReplacedLoads[User];
+      assert(NewVal && "not a replaced load?");
+      
+      // Propagate down to the ultimate replacee.  The intermediately loads
+      // could theoretically already have been deleted, so we don't want to
+      // dereference the Value*'s.
+      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+      while (RLI != ReplacedLoads.end()) {
+        NewVal = RLI->second;
+        RLI = ReplacedLoads.find(NewVal);
+      }
+      
+      User->replaceAllUsesWith(NewVal);
+      CurAST->copyValue(User, NewVal);
+    }
+    
+    CurAST->deleteValue(User);
+    User->eraseFromParent();
+  }
+  
+  // fwew, we're done!
 }
 
+
 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
 void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
-  AliasSetTracker *AST = LoopToAliasMap[L];
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
     return;
 
@@ -873,7 +904,7 @@
 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
 /// set.
 void LICM::deleteAnalysisValue(Value *V, Loop *L) {
-  AliasSetTracker *AST = LoopToAliasMap[L];
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
     return;
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp Tue Oct 26 19:48:03 2010
@@ -28,7 +28,9 @@
   class LoopDeletion : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopDeletion() : LoopPass(&ID) {}
+    LoopDeletion() : LoopPass(ID) {
+      initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+    }
     
     // Possibly eliminate loop L if it is dead.
     bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -55,8 +57,15 @@
 }
   
 char LoopDeletion::ID = 0;
-INITIALIZE_PASS(LoopDeletion, "loop-deletion",
-                "Delete dead loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
 
 Pass* llvm::createLoopDeletionPass() {
   return new LoopDeletion();

Removed: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (removed)
@@ -1,1270 +0,0 @@
-//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Loop Index Splitting Pass. This pass handles three
-// kinds of loops.
-//
-// [1] A loop may be eliminated if the body is executed exactly once.
-//     For example,
-//
-// for (i = 0; i < N; ++i) {
-//   if (i == X) {
-//     body;
-//   }
-// }
-//
-// is transformed to
-//
-// i = X;
-// body;
-//
-// [2] A loop's iteration space may be shrunk if the loop body is executed
-//     for a proper sub-range of the loop's iteration space. For example,
-//
-// for (i = 0; i < N; ++i) {
-//   if (i > A && i < B) {
-//     ...
-//   }
-// }
-//
-// is transformed to iterators from A to B, if A > 0 and B < N.
-//
-// [3] A loop may be split if the loop body is dominated by a branch.
-//     For example,
-//
-// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
-//
-// is transformed into
-//
-// AEV = BSV = SV
-// for (i = LB; i < min(UB, AEV); ++i)
-//    A;
-// for (i = max(LB, BSV); i < UB; ++i);
-//    B;
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-index-split"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-STATISTIC(NumIndexSplit, "Number of loop index split");
-STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
-STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
-
-namespace {
-
-  class LoopIndexSplit : public LoopPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    LoopIndexSplit() : LoopPass(&ID) {}
-
-    // Index split Loop L. Return true if loop is split.
-    bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<ScalarEvolution>();
-      AU.addRequiredID(LCSSAID);
-      AU.addPreservedID(LCSSAID);
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();
-      AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
-    }
-
-  private:
-    /// processOneIterationLoop -- Eliminate loop if loop body is executed 
-    /// only once. For example,
-    /// for (i = 0; i < N; ++i) {
-    ///   if ( i == X) {
-    ///     ...
-    ///   }
-    /// }
-    ///
-    bool processOneIterationLoop();
-
-    // -- Routines used by updateLoopIterationSpace();
-
-    /// updateLoopIterationSpace -- Update loop's iteration space if loop 
-    /// body is executed for certain IV range only. For example,
-    /// 
-    /// for (i = 0; i < N; ++i) {
-    ///   if ( i > A && i < B) {
-    ///     ...
-    ///   }
-    /// }
-    /// is transformed to iterators from A to B, if A > 0 and B < N.
-    ///
-    bool updateLoopIterationSpace();
-
-    /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
-    /// with a loop invariant value. Update loop's lower and upper bound based on
-    /// the loop invariant value.
-    bool restrictLoopBound(ICmpInst &Op);
-
-    // --- Routines used by splitLoop(). --- /
-
-    bool splitLoop();
-
-    /// removeBlocks - Remove basic block DeadBB and all blocks dominated by 
-    /// DeadBB. This routine is used to remove split condition's dead branch, 
-    /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
-    void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
-    
-    /// moveExitCondition - Move exit condition EC into split condition block.
-    void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
-                           BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
-                           PHINode *IV, Instruction *IVAdd, Loop *LP,
-                           unsigned);
-    
-    /// updatePHINodes - CFG has been changed. 
-    /// Before 
-    ///   - ExitBB's single predecessor was Latch
-    ///   - Latch's second successor was Header
-    /// Now
-    ///   - ExitBB's single predecessor was Header
-    ///   - Latch's one and only successor was Header
-    ///
-    /// Update ExitBB PHINodes' to reflect this change.
-    void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, 
-                        BasicBlock *Header,
-                        PHINode *IV, Instruction *IVIncrement, Loop *LP);
-
-    // --- Utility routines --- /
-
-    /// cleanBlock - A block is considered clean if all non terminal 
-    /// instructions are either PHINodes or IV based values.
-    bool cleanBlock(BasicBlock *BB);
-
-    /// IVisLT - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is less than  the loop invariant then return the loop 
-    /// invariant. Otherwise return NULL.
-    Value * IVisLT(ICmpInst &Op);
-
-    /// IVisLE - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is less than or equal to the loop invariant then 
-    /// return the loop invariant. Otherwise return NULL.
-    Value * IVisLE(ICmpInst &Op);
-
-    /// IVisGT - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is greater than  the loop invariant then return the loop 
-    /// invariant. Otherwise return NULL.
-    Value * IVisGT(ICmpInst &Op);
-
-    /// IVisGE - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is greater than or equal to the loop invariant then 
-    /// return the loop invariant. Otherwise return NULL.
-    Value * IVisGE(ICmpInst &Op);
-
-  private:
-
-    // Current Loop information.
-    Loop *L;
-    LPPassManager *LPM;
-    LoopInfo *LI;
-    DominatorTree *DT;
-    DominanceFrontier *DF;
-
-    PHINode *IndVar;
-    ICmpInst *ExitCondition;
-    ICmpInst *SplitCondition;
-    Value *IVStartValue;
-    Value *IVExitValue;
-    Instruction *IVIncrement;
-    SmallPtrSet<Value *, 4> IVBasedValues;
-  };
-}
-
-char LoopIndexSplit::ID = 0;
-INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
-                "Index Split Loops", false, false);
-
-Pass *llvm::createLoopIndexSplitPass() {
-  return new LoopIndexSplit();
-}
-
-// Index split Loop L. Return true if loop is split.
-bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
-  L = IncomingLoop;
-  LPM = &LPM_Ref;
-
-  // If LoopSimplify form is not available, stay out of trouble.
-  if (!L->isLoopSimplifyForm())
-    return false;
-
-  // FIXME - Nested loops make dominator info updates tricky. 
-  if (!L->getSubLoops().empty())
-    return false;
-
-  DT = &getAnalysis<DominatorTree>();
-  LI = &getAnalysis<LoopInfo>();
-  DF = &getAnalysis<DominanceFrontier>();
-
-  // Initialize loop data.
-  IndVar = L->getCanonicalInductionVariable();
-  if (!IndVar) return false;
-
-  bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
-  IVStartValue = IndVar->getIncomingValue(!P1InLoop);
-  IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
-  if (!IVIncrement) return false;
-  
-  IVBasedValues.clear();
-  IVBasedValues.insert(IndVar);
-  IVBasedValues.insert(IVIncrement);
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) 
-    for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); 
-        BI != BE; ++BI) {
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI)) 
-        if (BO != IVIncrement 
-            && (BO->getOpcode() == Instruction::Add
-                || BO->getOpcode() == Instruction::Sub))
-          if (IVBasedValues.count(BO->getOperand(0))
-              && L->isLoopInvariant(BO->getOperand(1)))
-            IVBasedValues.insert(BO);
-    }
-
-  // Reject loop if loop exit condition is not suitable.
-  BasicBlock *ExitingBlock = L->getExitingBlock();
-  if (!ExitingBlock)
-    return false;
-  BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (!EBR) return false;
-  ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
-  if (!ExitCondition) return false;
-  if (ExitingBlock != L->getLoopLatch()) return false;
-  IVExitValue = ExitCondition->getOperand(1);
-  if (!L->isLoopInvariant(IVExitValue))
-    IVExitValue = ExitCondition->getOperand(0);
-  if (!L->isLoopInvariant(IVExitValue))
-    return false;
-  if (!IVBasedValues.count(
-        ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
-    return false;
-
-  // If start value is more then exit value where induction variable
-  // increments by 1 then we are potentially dealing with an infinite loop.
-  // Do not index split this loop.
-  if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
-    if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
-      if (SV->getSExtValue() > EV->getSExtValue())
-        return false;
-
-  if (processOneIterationLoop())
-    return true;
-
-  if (updateLoopIterationSpace())
-    return true;
-
-  if (splitLoop())
-    return true;
-
-  return false;
-}
-
-// --- Helper routines --- 
-// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
-static bool isUsedOutsideLoop(Value *V, Loop *L) {
-  for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (!L->contains(cast<Instruction>(*UI)))
-      return true;
-  return false;
-}
-
-// Return V+1
-static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, 
-                         LLVMContext &Context) {
-  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
-  return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
-}
-
-// Return V-1
-static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
-                          LLVMContext &Context) {
-  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
-  return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
-}
-
-// Return min(V1, V1)
-static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
- 
-  Value *C = new ICmpInst(InsertPt,
-                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp");
-  return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
-}
-
-// Return max(V1, V2)
-static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
- 
-  Value *C = new ICmpInst(InsertPt, 
-                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp");
-  return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
-}
-
-/// processOneIterationLoop -- Eliminate loop if loop body is executed 
-/// only once. For example,
-/// for (i = 0; i < N; ++i) {
-///   if ( i == X) {
-///     ...
-///   }
-/// }
-///
-bool LoopIndexSplit::processOneIterationLoop() {
-  SplitCondition = NULL;
-  BasicBlock *Latch = L->getLoopLatch();
-  BasicBlock *Header = L->getHeader();
-  BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
-  if (!BR) return false;
-  if (!isa<BranchInst>(Latch->getTerminator())) return false;
-  if (BR->isUnconditional()) return false;
-  SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
-  if (!SplitCondition) return false;
-  if (SplitCondition == ExitCondition) return false;
-  if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
-  if (BR->getOperand(1) != Latch) return false;
-  if (!IVBasedValues.count(SplitCondition->getOperand(0))
-      && !IVBasedValues.count(SplitCondition->getOperand(1)))
-    return false;
-
-  // If IV is used outside the loop then this loop traversal is required.
-  // FIXME: Calculate and use last IV value. 
-  if (isUsedOutsideLoop(IVIncrement, L))
-    return false;
-
-  // If BR operands are not IV or not loop invariants then skip this loop.
-  Value *OPV = SplitCondition->getOperand(0);
-  Value *SplitValue = SplitCondition->getOperand(1);
-  if (!L->isLoopInvariant(SplitValue))
-    std::swap(OPV, SplitValue);
-  if (!L->isLoopInvariant(SplitValue))
-    return false;
-  Instruction *OPI = dyn_cast<Instruction>(OPV);
-  if (!OPI) 
-    return false;
-  if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
-    return false;
-  Value *StartValue = IVStartValue;
-  Value *ExitValue = IVExitValue;;
-
-  if (OPV != IndVar) {
-    // If BR operand is IV based then use this operand to calculate
-    // effective conditions for loop body.
-    BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
-    if (!BOPV) 
-      return false;
-    if (BOPV->getOpcode() != Instruction::Add) 
-      return false;
-    StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
-    ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
-  }
-
-  if (!cleanBlock(Header))
-    return false;
-
-  if (!cleanBlock(Latch))
-    return false;
-    
-  // If the merge point for BR is not loop latch then skip this loop.
-  if (BR->getSuccessor(0) != Latch) {
-    DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
-    assert (DF0 != DF->end() && "Unable to find dominance frontier");
-    if (!DF0->second.count(Latch))
-      return false;
-  }
-  
-  if (BR->getSuccessor(1) != Latch) {
-    DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
-    assert (DF1 != DF->end() && "Unable to find dominance frontier");
-    if (!DF1->second.count(Latch))
-      return false;
-  }
-    
-  // Now, Current loop L contains compare instruction
-  // that compares induction variable, IndVar, against loop invariant. And
-  // entire (i.e. meaningful) loop body is dominated by this compare
-  // instruction. In such case eliminate 
-  // loop structure surrounding this loop body. For example,
-  //     for (int i = start; i < end; ++i) {
-  //         if ( i == somevalue) {
-  //           loop_body
-  //         }
-  //     }
-  // can be transformed into
-  //     if (somevalue >= start && somevalue < end) {
-  //        i = somevalue;
-  //        loop_body
-  //     }
-
-  // Replace index variable with split value in loop body. Loop body is executed
-  // only when index variable is equal to split value.
-  IndVar->replaceAllUsesWith(SplitValue);
-
-  // Replace split condition in header.
-  // Transform 
-  //      SplitCondition : icmp eq i32 IndVar, SplitValue
-  // into
-  //      c1 = icmp uge i32 SplitValue, StartValue
-  //      c2 = icmp ult i32 SplitValue, ExitValue
-  //      and i32 c1, c2 
-  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ? 
-                                 ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
-                                 SplitValue, StartValue, "lisplit");
-
-  CmpInst::Predicate C2P  = ExitCondition->getPredicate();
-  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-  if (LatchBR->getOperand(1) != Header)
-    C2P = CmpInst::getInversePredicate(C2P);
-  Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
-  Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
-
-  SplitCondition->replaceAllUsesWith(NSplitCond);
-  SplitCondition->eraseFromParent();
-
-  // Remove Latch to Header edge.
-  BasicBlock *LatchSucc = NULL;
-  Header->removePredecessor(Latch);
-  for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
-       SI != E; ++SI) {
-    if (Header != *SI)
-      LatchSucc = *SI;
-  }
-
-  // Clean up latch block.
-  Value *LatchBRCond = LatchBR->getCondition();
-  LatchBR->setUnconditionalDest(LatchSucc);
-  RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
-  
-  LPM->deleteLoopFromQueue(L);
-
-  // Update Dominator Info.
-  // Only CFG change done is to remove Latch to Header edge. This
-  // does not change dominator tree because Latch did not dominate
-  // Header.
-  if (DF) {
-    DominanceFrontier::iterator HeaderDF = DF->find(Header);
-    if (HeaderDF != DF->end()) 
-      DF->removeFromFrontier(HeaderDF, Header);
-
-    DominanceFrontier::iterator LatchDF = DF->find(Latch);
-    if (LatchDF != DF->end()) 
-      DF->removeFromFrontier(LatchDF, Header);
-  }
-
-  ++NumIndexSplitRemoved;
-  return true;
-}
-
-/// restrictLoopBound - Op dominates loop body. Op compares an IV based value 
-/// with a loop invariant value. Update loop's lower and upper bound based on 
-/// the loop invariant value.
-bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
-  bool Sign = Op.isSigned();
-  Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
-  if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
-    BranchInst *EBR = 
-      cast<BranchInst>(ExitCondition->getParent()->getTerminator());
-    ExitCondition->setPredicate(ExitCondition->getInversePredicate());
-    BasicBlock *T = EBR->getSuccessor(0);
-    EBR->setSuccessor(0, EBR->getSuccessor(1));
-    EBR->setSuccessor(1, T);
-  }
-
-  LLVMContext &Context = Op.getContext();
-
-  // New upper and lower bounds.
-  Value *NLB = NULL;
-  Value *NUB = NULL;
-  if (Value *V = IVisLT(Op)) {
-    // Restrict upper bound.
-    if (IVisLE(*ExitCondition)) 
-      V = getMinusOne(V, Sign, PHTerm, Context);
-    NUB = getMin(V, IVExitValue, Sign, PHTerm);
-  } else if (Value *V = IVisLE(Op)) {
-    // Restrict upper bound.
-    if (IVisLT(*ExitCondition)) 
-      V = getPlusOne(V, Sign, PHTerm, Context);
-    NUB = getMin(V, IVExitValue, Sign, PHTerm);
-  } else if (Value *V = IVisGT(Op)) {
-    // Restrict lower bound.
-    V = getPlusOne(V, Sign, PHTerm, Context);
-    NLB = getMax(V, IVStartValue, Sign, PHTerm);
-  } else if (Value *V = IVisGE(Op))
-    // Restrict lower bound.
-    NLB = getMax(V, IVStartValue, Sign, PHTerm);
-
-  if (!NLB && !NUB) 
-    return false;
-
-  if (NLB) {
-    unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
-    IndVar->setIncomingValue(i, NLB);
-  }
-
-  if (NUB) {
-    unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
-    ExitCondition->setOperand(i, NUB);
-  }
-  return true;
-}
-
-/// updateLoopIterationSpace -- Update loop's iteration space if loop 
-/// body is executed for certain IV range only. For example,
-/// 
-/// for (i = 0; i < N; ++i) {
-///   if ( i > A && i < B) {
-///     ...
-///   }
-/// }
-/// is transformed to iterators from A to B, if A > 0 and B < N.
-///
-bool LoopIndexSplit::updateLoopIterationSpace() {
-  SplitCondition = NULL;
-  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
-      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
-    return false;
-  BasicBlock *Latch = L->getLoopLatch();
-  BasicBlock *Header = L->getHeader();
-  BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
-  if (!BR) return false;
-  if (!isa<BranchInst>(Latch->getTerminator())) return false;
-  if (BR->isUnconditional()) return false;
-  BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
-  if (!AND) return false;
-  if (AND->getOpcode() != Instruction::And) return false;
-  ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
-  ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
-  if (!Op0 || !Op1)
-    return false;
-  IVBasedValues.insert(AND);
-  IVBasedValues.insert(Op0);
-  IVBasedValues.insert(Op1);
-  if (!cleanBlock(Header)) return false;
-  BasicBlock *ExitingBlock = ExitCondition->getParent();
-  if (!cleanBlock(ExitingBlock)) return false;
-
-  // If the merge point for BR is not loop latch then skip this loop.
-  if (BR->getSuccessor(0) != Latch) {
-    DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
-    assert (DF0 != DF->end() && "Unable to find dominance frontier");
-    if (!DF0->second.count(Latch))
-      return false;
-  }
-  
-  if (BR->getSuccessor(1) != Latch) {
-    DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
-    assert (DF1 != DF->end() && "Unable to find dominance frontier");
-    if (!DF1->second.count(Latch))
-      return false;
-  }
-    
-  // Verify that loop exiting block has only two predecessor, where one pred
-  // is split condition block. The other predecessor will become exiting block's
-  // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
-  // more then two predecessors. This requires extra work in updating dominator
-  // information.
-  BasicBlock *ExitingBBPred = NULL;
-  for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
-       PI != PE; ++PI) {
-    BasicBlock *BB = *PI;
-    if (Header == BB)
-      continue;
-    if (ExitingBBPred)
-      return false;
-    else
-      ExitingBBPred = BB;
-  }
-
-  if (!restrictLoopBound(*Op0))
-    return false;
-
-  if (!restrictLoopBound(*Op1))
-    return false;
-
-  // Update CFG.
-  if (BR->getSuccessor(0) == ExitingBlock)
-    BR->setUnconditionalDest(BR->getSuccessor(1));
-  else
-    BR->setUnconditionalDest(BR->getSuccessor(0));
-
-  AND->eraseFromParent();
-  if (Op0->use_empty())
-    Op0->eraseFromParent();
-  if (Op1->use_empty())
-    Op1->eraseFromParent();
-
-  // Update domiantor info. Now, ExitingBlock has only one predecessor, 
-  // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
-  DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
-
-  BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
-  if (L->contains(ExitBlock))
-    ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
-
-  // If ExitingBlock is a member of the loop basic blocks' DF list then
-  // replace ExitingBlock with header and exit block in the DF list
-  DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-    if (BB == Header || BB == ExitingBlock)
-      continue;
-    DominanceFrontier::iterator BBDF = DF->find(BB);
-    DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
-    DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
-    while (DomSetI != DomSetE) {
-      DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
-      ++DomSetI;
-      BasicBlock *DFBB = *CurrentItr;
-      if (DFBB == ExitingBlock) {
-        BBDF->second.erase(DFBB);
-        for (DominanceFrontier::DomSetType::iterator 
-               EBI = ExitingBlockDF->second.begin(),
-               EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI) 
-          BBDF->second.insert(*EBI);
-      }
-    }
-  }
-  ++NumRestrictBounds;
-  return true;
-}
-
-/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
-/// This routine is used to remove split condition's dead branch, dominated by
-/// DeadBB. LiveBB dominates split conidition's other branch.
-void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, 
-                                  BasicBlock *LiveBB) {
-
-  // First update DeadBB's dominance frontier. 
-  SmallVector<BasicBlock *, 8> FrontierBBs;
-  DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
-  if (DeadBBDF != DF->end()) {
-    SmallVector<BasicBlock *, 8> PredBlocks;
-    
-    DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
-    for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
-           DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI) 
-      {
-      BasicBlock *FrontierBB = *DeadBBSetI;
-      FrontierBBs.push_back(FrontierBB);
-
-      // Rremove any PHI incoming edge from blocks dominated by DeadBB.
-      PredBlocks.clear();
-      for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
-          PI != PE; ++PI) {
-        BasicBlock *P = *PI;
-        if (DT->dominates(DeadBB, P))
-          PredBlocks.push_back(P);
-      }
-
-      for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
-          FBI != FBE; ++FBI) {
-        if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
-          for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
-                PE = PredBlocks.end(); PI != PE; ++PI) {
-            BasicBlock *P = *PI;
-            PN->removeIncomingValue(P);
-          }
-        }
-        else
-          break;
-      }      
-    }
-  }
-  
-  // Now remove DeadBB and all nodes dominated by DeadBB in df order.
-  SmallVector<BasicBlock *, 32> WorkList;
-  DomTreeNode *DN = DT->getNode(DeadBB);
-  for (df_iterator<DomTreeNode*> DI = df_begin(DN),
-         E = df_end(DN); DI != E; ++DI) {
-    BasicBlock *BB = DI->getBlock();
-    WorkList.push_back(BB);
-    BB->replaceAllUsesWith(UndefValue::get(
-                                       Type::getLabelTy(DeadBB->getContext())));
-  }
-
-  while (!WorkList.empty()) {
-    BasicBlock *BB = WorkList.pop_back_val();
-    LPM->deleteSimpleAnalysisValue(BB, LP);
-    for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); 
-        BBI != BBE; ) {
-      Instruction *I = BBI;
-      ++BBI;
-      I->replaceAllUsesWith(UndefValue::get(I->getType()));
-      LPM->deleteSimpleAnalysisValue(I, LP);
-      I->eraseFromParent();
-    }
-    DT->eraseNode(BB);
-    DF->removeBlock(BB);
-    LI->removeBlock(BB);
-    BB->eraseFromParent();
-  }
-
-  // Update Frontier BBs' dominator info.
-  while (!FrontierBBs.empty()) {
-    BasicBlock *FBB = FrontierBBs.pop_back_val();
-    BasicBlock *NewDominator = FBB->getSinglePredecessor();
-    if (!NewDominator) {
-      pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
-      NewDominator = *PI;
-      ++PI;
-      if (NewDominator != LiveBB) {
-        for(; PI != PE; ++PI) {
-          BasicBlock *P = *PI;
-          if (P == LiveBB) {
-            NewDominator = LiveBB;
-            break;
-          }
-          NewDominator = DT->findNearestCommonDominator(NewDominator, P);
-        }
-      }
-    }
-    assert (NewDominator && "Unable to fix dominator info.");
-    DT->changeImmediateDominator(FBB, NewDominator);
-    DF->changeImmediateDominator(FBB, NewDominator, DT);
-  }
-
-}
-
-// moveExitCondition - Move exit condition EC into split condition block CondBB.
-void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
-                                       BasicBlock *ExitBB, ICmpInst *EC, 
-                                       ICmpInst *SC, PHINode *IV, 
-                                       Instruction *IVAdd, Loop *LP,
-                                       unsigned ExitValueNum) {
-
-  BasicBlock *ExitingBB = EC->getParent();
-  Instruction *CurrentBR = CondBB->getTerminator();
-
-  // Move exit condition into split condition block.
-  EC->moveBefore(CurrentBR);
-  EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
-
-  // Move exiting block's branch into split condition block. Update its branch
-  // destination.
-  BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
-  ExitingBR->moveBefore(CurrentBR);
-  BasicBlock *OrigDestBB = NULL;
-  if (ExitingBR->getSuccessor(0) == ExitBB) {
-    OrigDestBB = ExitingBR->getSuccessor(1);
-    ExitingBR->setSuccessor(1, ActiveBB);
-  }
-  else {
-    OrigDestBB = ExitingBR->getSuccessor(0);
-    ExitingBR->setSuccessor(0, ActiveBB);
-  }
-    
-  // Remove split condition and current split condition branch.
-  SC->eraseFromParent();
-  CurrentBR->eraseFromParent();
-
-  // Connect exiting block to original destination.
-  BranchInst::Create(OrigDestBB, ExitingBB);
-
-  // Update PHINodes
-  updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
-
-  // Fix dominator info.
-  // ExitBB is now dominated by CondBB
-  DT->changeImmediateDominator(ExitBB, CondBB);
-  DF->changeImmediateDominator(ExitBB, CondBB, DT);
-
-  // Blocks outside the loop may have been in the dominance frontier of blocks
-  // inside the condition; this is now impossible because the blocks inside the
-  // condition no loger dominate the exit.  Remove the relevant blocks from
-  // the dominance frontiers.
-  for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
-       I != E; ++I) {
-    if (!DT->properlyDominates(CondBB, *I)) continue;
-    DominanceFrontier::iterator BBDF = DF->find(*I);
-    DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
-    DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
-    while (DomSetI != DomSetE) {
-      DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
-      ++DomSetI;
-      BasicBlock *DFBB = *CurrentItr;
-      if (!LP->contains(DFBB))
-        BBDF->second.erase(DFBB);
-    }
-  }
-}
-
-/// updatePHINodes - CFG has been changed. 
-/// Before 
-///   - ExitBB's single predecessor was Latch
-///   - Latch's second successor was Header
-/// Now
-///   - ExitBB's single predecessor is Header
-///   - Latch's one and only successor is Header
-///
-/// Update ExitBB PHINodes' to reflect this change.
-void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, 
-                                    BasicBlock *Header,
-                                    PHINode *IV, Instruction *IVIncrement,
-                                    Loop *LP) {
-
-  for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end(); 
-       BI != BE; ) {
-    PHINode *PN = dyn_cast<PHINode>(BI);
-    ++BI;
-    if (!PN)
-      break;
-
-    Value *V = PN->getIncomingValueForBlock(Latch);
-    if (PHINode *PHV = dyn_cast<PHINode>(V)) {
-      // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
-      // in Header which is new incoming value for PN.
-      Value *NewV = NULL;
-      for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); 
-           UI != E; ++UI) 
-        if (PHINode *U = dyn_cast<PHINode>(*UI)) 
-          if (LP->contains(U)) {
-            NewV = U;
-            break;
-          }
-
-      // Add incoming value from header only if PN has any use inside the loop.
-      if (NewV)
-        PN->addIncoming(NewV, Header);
-
-    } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
-      // If this instruction is IVIncrement then IV is new incoming value 
-      // from header otherwise this instruction must be incoming value from 
-      // header because loop is in LCSSA form.
-      if (PHI == IVIncrement)
-        PN->addIncoming(IV, Header);
-      else
-        PN->addIncoming(V, Header);
-    } else
-      // Otherwise this is an incoming value from header because loop is in 
-      // LCSSA form.
-      PN->addIncoming(V, Header);
-    
-    // Remove incoming value from Latch.
-    PN->removeIncomingValue(Latch);
-  }
-}
-
-bool LoopIndexSplit::splitLoop() {
-  SplitCondition = NULL;
-  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
-      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
-    return false;
-  BasicBlock *Header = L->getHeader();
-  BasicBlock *Latch = L->getLoopLatch();
-  BranchInst *SBR = NULL; // Split Condition Branch
-  BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
-  // If Exiting block includes loop variant instructions then this
-  // loop may not be split safely.
-  BasicBlock *ExitingBlock = ExitCondition->getParent();
-  if (!cleanBlock(ExitingBlock)) return false;
-
-  LLVMContext &Context = Header->getContext();
-
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
-    if (!BR || BR->isUnconditional()) continue;
-    ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
-    if (!CI || CI == ExitCondition 
-        || CI->getPredicate() == ICmpInst::ICMP_NE
-        || CI->getPredicate() == ICmpInst::ICMP_EQ)
-      continue;
-
-    // Unable to handle triangle loops at the moment.
-    // In triangle loop, split condition is in header and one of the
-    // the split destination is loop latch. If split condition is EQ
-    // then such loops are already handle in processOneIterationLoop().
-    if (Header == (*I)
-        && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
-      continue;
-
-    // If the block does not dominate the latch then this is not a diamond.
-    // Such loop may not benefit from index split.
-    if (!DT->dominates((*I), Latch))
-      continue;
-
-    // If split condition branches heads do not have single predecessor, 
-    // SplitCondBlock, then is not possible to remove inactive branch.
-    if (!BR->getSuccessor(0)->getSinglePredecessor() 
-        || !BR->getSuccessor(1)->getSinglePredecessor())
-      return false;
-
-    // If the merge point for BR is not loop latch then skip this condition.
-    if (BR->getSuccessor(0) != Latch) {
-      DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
-      assert (DF0 != DF->end() && "Unable to find dominance frontier");
-      if (!DF0->second.count(Latch))
-        continue;
-    }
-    
-    if (BR->getSuccessor(1) != Latch) {
-      DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
-      assert (DF1 != DF->end() && "Unable to find dominance frontier");
-      if (!DF1->second.count(Latch))
-        continue;
-    }
-    SplitCondition = CI;
-    SBR = BR;
-    break;
-  }
-   
-  if (!SplitCondition)
-    return false;
-
-  // If the predicate sign does not match then skip.
-  if (ExitCondition->isSigned() != SplitCondition->isSigned())
-    return false;
-
-  unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
-  unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
-  Value *SplitValue = SplitCondition->getOperand(SVOpNum);
-  if (!L->isLoopInvariant(SplitValue))
-    return false;
-  if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
-    return false;
-
-  // Check for side effects.
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-
-    assert(DT->dominates(Header, BB));
-    if (DT->properlyDominates(SplitCondition->getParent(), BB))
-      continue;
-
-    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
-         BI != BE; ++BI) {
-      Instruction *Inst = BI;
-
-      if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
-          && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
-        return false;
-    }
-  }
-
-  // Normalize loop conditions so that it is easier to calculate new loop
-  // bounds.
-  if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
-    ExitCondition->setPredicate(ExitCondition->getInversePredicate());
-    BasicBlock *T = EBR->getSuccessor(0);
-    EBR->setSuccessor(0, EBR->getSuccessor(1));
-    EBR->setSuccessor(1, T);
-  }
-
-  if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
-    SplitCondition->setPredicate(SplitCondition->getInversePredicate());
-    BasicBlock *T = SBR->getSuccessor(0);
-    SBR->setSuccessor(0, SBR->getSuccessor(1));
-    SBR->setSuccessor(1, T);
-  }
-
-  //[*] Calculate new loop bounds.
-  Value *AEV = SplitValue;
-  Value *BSV = SplitValue;
-  bool Sign = SplitCondition->isSigned();
-  Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
-  if (IVisLT(*ExitCondition)) {
-    if (IVisLT(*SplitCondition)) {
-      /* Do nothing */
-    }
-    else if (IVisLE(*SplitCondition)) {
-      AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
-      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
-    } else {
-      assert (0 && "Unexpected split condition!");
-    }
-  }
-  else if (IVisLE(*ExitCondition)) {
-    if (IVisLT(*SplitCondition)) {
-      AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
-    }
-    else if (IVisLE(*SplitCondition)) {
-      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
-    } else {
-      assert (0 && "Unexpected split condition!");
-    }
-  } else {
-    assert (0 && "Unexpected exit condition!");
-  }
-  AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
-  BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
-
-  // [*] Clone Loop
-  ValueMap<const Value *, Value *> VMap;
-  Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
-  Loop *ALoop = L;
-
-  // [*] ALoop's exiting edge enters BLoop's header.
-  //    ALoop's original exit block becomes BLoop's exit block.
-  PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
-  BasicBlock *A_ExitingBlock = ExitCondition->getParent();
-  BranchInst *A_ExitInsn =
-    dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
-  assert (A_ExitInsn && "Unable to find suitable loop exit branch");
-  BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
-  BasicBlock *B_Header = BLoop->getHeader();
-  if (ALoop->contains(B_ExitBlock)) {
-    B_ExitBlock = A_ExitInsn->getSuccessor(0);
-    A_ExitInsn->setSuccessor(0, B_Header);
-  } else
-    A_ExitInsn->setSuccessor(1, B_Header);
-
-  // [*] Update ALoop's exit value using new exit value.
-  ExitCondition->setOperand(EVOpNum, AEV);
-
-  // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
-  //     original loop's preheader. Add incoming PHINode values from
-  //     ALoop's exiting block. Update BLoop header's domiantor info.
-
-  // Collect inverse map of Header PHINodes.
-  DenseMap<Value *, Value *> InverseMap;
-  for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), 
-         BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      PHINode *PNClone = cast<PHINode>(VMap[PN]);
-      InverseMap[PNClone] = PN;
-    } else
-      break;
-  }
-
-  BasicBlock *A_Preheader = ALoop->getLoopPreheader();
-  for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
-       BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      // Remove incoming value from original preheader.
-      PN->removeIncomingValue(A_Preheader);
-
-      // Add incoming value from A_ExitingBlock.
-      if (PN == B_IndVar)
-        PN->addIncoming(BSV, A_ExitingBlock);
-      else { 
-        PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
-        Value *V2 = NULL;
-        // If loop header is also loop exiting block then
-        // OrigPN is incoming value for B loop header.
-        if (A_ExitingBlock == ALoop->getHeader())
-          V2 = OrigPN;
-        else
-          V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
-        PN->addIncoming(V2, A_ExitingBlock);
-      }
-    } else
-      break;
-  }
-
-  DT->changeImmediateDominator(B_Header, A_ExitingBlock);
-  DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
-  
-  // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
-  //     block. Remove incoming PHINode values from ALoop's exiting block.
-  //     Add new incoming values from BLoop's incoming exiting value.
-  //     Update BLoop exit block's dominator info..
-  BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
-  for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
-       BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], 
-                                                            B_ExitingBlock);
-      PN->removeIncomingValue(A_ExitingBlock);
-    } else
-      break;
-  }
-
-  DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
-  DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
-
-  //[*] Split ALoop's exit edge. This creates a new block which
-  //    serves two purposes. First one is to hold PHINode defnitions
-  //    to ensure that ALoop's LCSSA form. Second use it to act
-  //    as a preheader for BLoop.
-  BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
-
-  //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
-  //    in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
-  for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
-      BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
-      PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
-      newPHI->addIncoming(V1, A_ExitingBlock);
-      A_ExitBlock->getInstList().push_front(newPHI);
-      PN->removeIncomingValue(A_ExitBlock);
-      PN->addIncoming(newPHI, A_ExitBlock);
-    } else
-      break;
-  }
-
-  //[*] Eliminate split condition's inactive branch from ALoop.
-  BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
-  BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
-  BasicBlock *A_InactiveBranch = NULL;
-  BasicBlock *A_ActiveBranch = NULL;
-  A_ActiveBranch = A_BR->getSuccessor(0);
-  A_InactiveBranch = A_BR->getSuccessor(1);
-  A_BR->setUnconditionalDest(A_ActiveBranch);
-  removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
-
-  //[*] Eliminate split condition's inactive branch in from BLoop.
-  BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
-  BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
-  BasicBlock *B_InactiveBranch = NULL;
-  BasicBlock *B_ActiveBranch = NULL;
-  B_ActiveBranch = B_BR->getSuccessor(1);
-  B_InactiveBranch = B_BR->getSuccessor(0);
-  B_BR->setUnconditionalDest(B_ActiveBranch);
-  removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
-
-  BasicBlock *A_Header = ALoop->getHeader();
-  if (A_ExitingBlock == A_Header)
-    return true;
-
-  //[*] Move exit condition into split condition block to avoid
-  //    executing dead loop iteration.
-  ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
-  Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
-  ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
-
-  moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
-                    cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, 
-                    ALoop, EVOpNum);
-
-  moveExitCondition(B_SplitCondBlock, B_ActiveBranch, 
-                    B_ExitBlock, B_ExitCondition,
-                    B_SplitCondition, B_IndVar, B_IndVarIncrement, 
-                    BLoop, EVOpNum);
-
-  ++NumIndexSplit;
-  return true;
-}
-
-/// cleanBlock - A block is considered clean if all non terminal instructions 
-/// are either, PHINodes, IV based.
-bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
-  Instruction *Terminator = BB->getTerminator();
-  for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 
-      BI != BE; ++BI) {
-    Instruction *I = BI;
-
-    if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
-        || I == SplitCondition || IVBasedValues.count(I) 
-        || isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    if (I->mayHaveSideEffects())
-      return false;
-
-    // I is used only inside this block then it is OK.
-    bool usedOutsideBB = false;
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 
-         UI != UE; ++UI) {
-      Instruction *U = cast<Instruction>(*UI);
-      if (U->getParent() != BB)
-        usedOutsideBB = true;
-    }
-    if (!usedOutsideBB)
-      continue;
-
-    // Otherwise we have a instruction that may not allow loop spliting.
-    return false;
-  }
-  return true;
-}
-
-/// IVisLT - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is less than  the loop invariant then return the loop 
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) 
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
-/// IVisLE - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is less than or equal to the loop invariant then 
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
-/// IVisGT - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is greater than  the loop invariant then return the loop 
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) 
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
-/// IVisGE - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is greater than or equal to the loop invariant then 
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
   class LoopRotate : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopRotate() : LoopPass(&ID) {}
+    LoopRotate() : LoopPass(ID) {
+      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+    }
 
     // Rotate Loop L as many times as possible. Return true if
     // loop is rotated at least once.
@@ -79,7 +81,11 @@
 }
   
 char LoopRotate::ID = 0;
-INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
 
 Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 
@@ -143,11 +149,11 @@
   // FIXME: Use common api to estimate size.
   for (BasicBlock::const_iterator OI = OrigHeader->begin(), 
          OE = OrigHeader->end(); OI != OE; ++OI) {
-      if (isa<PHINode>(OI)) 
-        continue;           // PHI nodes don't count.
-      if (isa<DbgInfoIntrinsic>(OI))
-        continue;  // Debug intrinsics don't count as size.
-      ++Size;
+    if (isa<PHINode>(OI)) 
+      continue;           // PHI nodes don't count.
+    if (isa<DbgInfoIntrinsic>(OI))
+      continue;  // Debug intrinsics don't count as size.
+    ++Size;
   }
 
   if (Size > MAX_HEADER_SIZE)
@@ -187,13 +193,30 @@
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
     ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
 
-  // For the rest of the instructions, create a clone in the OldPreHeader.
+  // For the rest of the instructions, either hoist to the OrigPreheader if
+  // possible or create a clone in the OldPreHeader if not.
   TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
-  for (; I != E; ++I) {
-    Instruction *C = I->clone();
-    C->setName(I->getName());
+  while (I != E) {
+    Instruction *Inst = I++;
+    
+    // If the instruction's operands are invariant and it doesn't read or write
+    // memory, then it is safe to hoist.  Doing this doesn't change the order of
+    // execution in the preheader, but does prevent the instruction from
+    // executing in each iteration of the loop.  This means it is safe to hoist
+    // something that might trap, but isn't safe to hoist something that reads
+    // memory (without proving that the loop doesn't write).
+    if (L->hasLoopInvariantOperands(Inst) &&
+        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+        !isa<TerminatorInst>(Inst)) {
+      Inst->moveBefore(LoopEntryBranch);
+      continue;
+    }
+    
+    // Otherwise, create a duplicate of the instruction.
+    Instruction *C = Inst->clone();
+    C->setName(Inst->getName());
     C->insertBefore(LoopEntryBranch);
-    ValueMap[I] = C;
+    ValueMap[Inst] = C;
   }
 
   // Along with all the other instructions, we just cloned OrigHeader's
@@ -221,7 +244,7 @@
 
     // The value now exits in two versions: the initial value in the preheader
     // and the loop "next" value in the original header.
-    SSA.Initialize(OrigHeaderVal);
+    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
     SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
     SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
 
@@ -261,6 +284,26 @@
   // NewHeader is now the header of the loop.
   L->moveToHeader(NewHeader);
 
+  // Move the original header to the bottom of the loop, where it now more
+  // naturally belongs. This isn't necessary for correctness, and CodeGen can
+  // usually reorder blocks on its own to fix things like this up, but it's
+  // still nice to keep the IR readable.
+  //
+  // The original header should have only one predecessor at this point, since
+  // we checked that the loop had a proper preheader and unique backedge before
+  // we started.
+  assert(OrigHeader->getSinglePredecessor() &&
+         "Original loop header has too many predecessors after loop rotation!");
+  OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+  // Also, since this original header only has one predecessor, zap its
+  // PHI nodes, which are now trivial.
+  FoldSingleEntryPHINodes(OrigHeader);
+
+  // TODO: We could just go ahead and merge OrigHeader into its predecessor
+  // at this point, if we don't mind updating dominator info.
+
+  // Establish a new preheader, update dominators, etc.
   preserveCanonicalLoopForm(LPM);
 
   ++NumRotated;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Oct 26 19:48:03 2010
@@ -113,7 +113,7 @@
 public:
   void CountRegister(const SCEV *Reg, size_t LUIdx);
   void DropRegister(const SCEV *Reg, size_t LUIdx);
-  void DropUse(size_t LUIdx);
+  void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
 
   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
 
@@ -152,18 +152,27 @@
 }
 
 void
-RegUseTracker::DropUse(size_t LUIdx) {
-  // Remove the use index from every register's use list.
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+  assert(LUIdx <= LastLUIdx);
+
+  // Update RegUses. The data structure is not optimized for this purpose;
+  // we must iterate through it and update each of the bit vectors.
   for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
-       I != E; ++I)
-    I->second.UsedByIndices.reset(LUIdx);
+       I != E; ++I) {
+    SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+    if (LUIdx < UsedByIndices.size())
+      UsedByIndices[LUIdx] =
+        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+    UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+  }
 }
 
 bool
 RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
-  if (!RegUsesMap.count(Reg)) return false;
-  const SmallBitVector &UsedByIndices =
-    RegUsesMap.find(Reg)->second.UsedByIndices;
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  if (I == RegUsesMap.end())
+    return false;
+  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
   int i = UsedByIndices.find_first();
   if (i == -1) return false;
   if ((size_t)i != LUIdx) return true;
@@ -441,12 +450,12 @@
   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
     if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
-      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
-                                       IgnoreSignificantBits);
-      if (!Start) return 0;
       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
                                       IgnoreSignificantBits);
       if (!Step) return 0;
+      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+                                       IgnoreSignificantBits);
+      if (!Start) return 0;
       return SE.getAddRecExpr(Start, Step, AR->getLoop());
     }
     return 0;
@@ -505,12 +514,14 @@
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
     int64_t Result = ExtractImmediate(NewOps.front(), SE);
-    S = SE.getAddExpr(NewOps);
+    if (Result != 0)
+      S = SE.getAddExpr(NewOps);
     return Result;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     int64_t Result = ExtractImmediate(NewOps.front(), SE);
-    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    if (Result != 0)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop());
     return Result;
   }
   return 0;
@@ -528,12 +539,14 @@
   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
     GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
-    S = SE.getAddExpr(NewOps);
+    if (Result)
+      S = SE.getAddExpr(NewOps);
     return Result;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
-    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    if (Result)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop());
     return Result;
   }
   return 0;
@@ -603,7 +616,7 @@
   bool Changed = false;
 
   while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+    Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
 
     if (I == 0 || !isInstructionTriviallyDead(I))
       continue;
@@ -640,8 +653,6 @@
     : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
       SetupCost(0) {}
 
-  unsigned getNumRegs() const { return NumRegs; }
-
   bool operator<(const Cost &Other) const;
 
   void Loose();
@@ -717,6 +728,9 @@
         (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
          isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
     ++SetupCost;
+
+    NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+                 Reg->hasComputableLoopEvolution(L);
 }
 
 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
@@ -751,9 +765,6 @@
       return;
     }
     RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
-
-    NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
-                 BaseReg->hasComputableLoopEvolution(L);
   }
 
   if (F.BaseRegs.size() > 1)
@@ -990,8 +1001,6 @@
   void DeleteFormula(Formula &F);
   void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
 
-  void check() const;
-
   void print(raw_ostream &OS) const;
   void dump() const;
 };
@@ -1083,7 +1092,7 @@
   for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
        E = Offsets.end(); I != E; ++I) {
     OS << *I;
-    if (next(I) != E)
+    if (llvm::next(I) != E)
       OS << ',';
   }
   OS << '}';
@@ -1254,32 +1263,6 @@
   }
 };
 
-/// FormulaSorter - This class implements an ordering for formulae which sorts
-/// the by their standalone cost.
-class FormulaSorter {
-  /// These two sets are kept empty, so that we compute standalone costs.
-  DenseSet<const SCEV *> VisitedRegs;
-  SmallPtrSet<const SCEV *, 16> Regs;
-  Loop *L;
-  LSRUse *LU;
-  ScalarEvolution &SE;
-  DominatorTree &DT;
-
-public:
-  FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
-    : L(l), LU(&lu), SE(se), DT(dt) {}
-
-  bool operator()(const Formula &A, const Formula &B) {
-    Cost CostA;
-    CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
-    Regs.clear();
-    Cost CostB;
-    CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
-    Regs.clear();
-    return CostA < CostB;
-  }
-};
-
 /// LSRInstance - This class holds state for the main loop strength reduction
 /// logic.
 class LSRInstance {
@@ -1338,7 +1321,7 @@
                                     LSRUse::KindType Kind,
                                     const Type *AccessTy);
 
-  void DeleteUse(LSRUse &LU);
+  void DeleteUse(LSRUse &LU, size_t LUIdx);
 
   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
 
@@ -1364,6 +1347,10 @@
   void FilterOutUndesirableDedicatedRegisters();
 
   size_t EstimateSearchSpaceComplexity() const;
+  void NarrowSearchSpaceByDetectingSupersets();
+  void NarrowSearchSpaceByCollapsingUnrolledCode();
+  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceByPickingWinnerRegs();
   void NarrowSearchSpaceUsingHeuristics();
 
   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1597,7 +1584,7 @@
   const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
 
   // Add one to the backedge-taken count to get the trip count.
-  const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
+  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
   if (IterationCount != SE.getSCEV(Sel)) return Cond;
 
   // Check for a max calculation that matches the pattern. There's no check
@@ -1918,10 +1905,13 @@
 }
 
 /// DeleteUse - Delete the given use from the Uses list.
-void LSRInstance::DeleteUse(LSRUse &LU) {
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
   if (&LU != &Uses.back())
     std::swap(LU, Uses.back());
   Uses.pop_back();
+
+  // Update RegUses.
+  RegUses.SwapAndDropUse(LUIdx, Uses.size());
 }
 
 /// FindUseWithFormula - Look for a use distinct from OrigLU which is has
@@ -1929,33 +1919,41 @@
 LSRUse *
 LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
                                        const LSRUse &OrigLU) {
-  // Search all uses for the formula. This could be more clever. Ignore
-  // ICmpZero uses because they may contain formulae generated by
-  // GenerateICmpZeroScales, in which case adding fixup offsets may
-  // be invalid.
+  // Search all uses for the formula. This could be more clever.
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
+    // Check whether this use is close enough to OrigLU, to see whether it's
+    // worthwhile looking through its formulae.
+    // Ignore ICmpZero uses because they may contain formulae generated by
+    // GenerateICmpZeroScales, in which case adding fixup offsets may
+    // be invalid.
     if (&LU != &OrigLU &&
         LU.Kind != LSRUse::ICmpZero &&
         LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
         LU.WidestFixupType == OrigLU.WidestFixupType &&
         LU.HasFormulaWithSameRegs(OrigF)) {
+      // Scan through this use's formulae.
       for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
            E = LU.Formulae.end(); I != E; ++I) {
         const Formula &F = *I;
+        // Check to see if this formula has the same registers and symbols
+        // as OrigF.
         if (F.BaseRegs == OrigF.BaseRegs &&
             F.ScaledReg == OrigF.ScaledReg &&
             F.AM.BaseGV == OrigF.AM.BaseGV &&
-            F.AM.Scale == OrigF.AM.Scale &&
-            LU.Kind) {
+            F.AM.Scale == OrigF.AM.Scale) {
           if (F.AM.BaseOffs == 0)
             return &LU;
+          // This is the formula where all the registers and symbols matched;
+          // there aren't going to be any others. Since we declined it, we
+          // can skip the rest of the formulae and procede to the next LSRUse.
           break;
         }
       }
     }
   }
 
+  // Nothing looked good.
   return 0;
 }
 
@@ -1987,7 +1985,7 @@
   for (SmallSetVector<const SCEV *, 4>::const_iterator
        I = Strides.begin(), E = Strides.end(); I != E; ++I)
     for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
-         next(I); NewStrideIter != E; ++NewStrideIter) {
+         llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
       const SCEV *OldStride = *I;
       const SCEV *NewStride = *NewStrideIter;
 
@@ -2226,14 +2224,13 @@
 /// separate registers. If C is non-null, multiply each subexpression by C.
 static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
                             SmallVectorImpl<const SCEV *> &Ops,
-                            SmallVectorImpl<const SCEV *> &UninterestingOps,
                             const Loop *L,
                             ScalarEvolution &SE) {
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     // Break out add operands.
     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
          I != E; ++I)
-      CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
+      CollectSubexprs(*I, C, Ops, L, SE);
     return;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // Split a non-zero base out of an addrec.
@@ -2241,8 +2238,8 @@
       CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
                                        AR->getLoop()),
-                      C, Ops, UninterestingOps, L, SE);
-      CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
+                      C, Ops, L, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, L, SE);
       return;
     }
   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2252,17 +2249,13 @@
             dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
         CollectSubexprs(Mul->getOperand(1),
                         C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
-                        Ops, UninterestingOps, L, SE);
+                        Ops, L, SE);
         return;
       }
   }
 
-  // Otherwise use the value itself. Loop-variant "unknown" values are
-  // uninteresting; we won't be able to do anything meaningful with them.
-  if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
-    UninterestingOps.push_back(S);
-  else
-    Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+  // Otherwise use the value itself, optionally with a scale applied.
+  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
 }
 
 /// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2276,19 +2269,19 @@
   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
     const SCEV *BaseReg = Base.BaseRegs[i];
 
-    SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
-    CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
-
-    // Add any uninteresting values as one register, as we won't be able to
-    // form any interesting reassociation opportunities with them. They'll
-    // just have to be added inside the loop no matter what we do.
-    if (!UninterestingAddOps.empty())
-      AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+    SmallVector<const SCEV *, 8> AddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, L, SE);
 
     if (AddOps.size() == 1) continue;
 
     for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
          JE = AddOps.end(); J != JE; ++J) {
+
+      // Loop-variant "unknown" values are uninteresting; we won't be able to
+      // do anything meaningful with them.
+      if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+        continue;
+
       // Don't pull a constant into a register if the constant could be folded
       // into an immediate field.
       if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
@@ -2298,9 +2291,9 @@
 
       // Collect all operands except *J.
       SmallVector<const SCEV *, 8> InnerAddOps
-        (         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+        (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
       InnerAddOps.append
-        (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+        (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
 
       // Don't leave just a constant behind in a register if the constant could
       // be folded into an immediate field.
@@ -2396,7 +2389,7 @@
       if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
                      LU.Kind, LU.AccessTy, TLI)) {
         // Add the offset to the base register.
-        const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+        const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
         // If it cancelled out, drop the base register, otherwise update it.
         if (NewG->isZero()) {
           std::swap(F.BaseRegs[i], F.BaseRegs.back());
@@ -2797,11 +2790,17 @@
   }
 
   GenerateCrossUseConstantOffsets();
+
+  DEBUG(dbgs() << "\n"
+                  "After generating reuse formulae:\n";
+        print_uses(dbgs()));
 }
 
-/// If their are multiple formulae with the same set of registers used
+/// If there are multiple formulae with the same set of registers used
 /// by other uses, pick the best one and delete the others.
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
 #ifndef NDEBUG
   bool ChangedFormulae = false;
 #endif
@@ -2814,7 +2813,6 @@
 
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
-    FormulaSorter Sorter(L, LU, SE, DT);
     DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
 
     bool Any = false;
@@ -2840,7 +2838,14 @@
         BestFormulae.insert(std::make_pair(Key, FIdx));
       if (!P.second) {
         Formula &Best = LU.Formulae[P.first->second];
-        if (Sorter.operator()(F, Best))
+
+        Cost CostF;
+        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        Cost CostBest;
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        if (CostF < CostBest)
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
@@ -2880,7 +2885,7 @@
 /// this many solutions because it prune the search space, but the pruning
 /// isn't always sufficient.
 size_t LSRInstance::EstimateSearchSpaceComplexity() const {
-  uint32_t Power = 1;
+  size_t Power = 1;
   for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
        E = Uses.end(); I != E; ++I) {
     size_t FSize = I->Formulae.size();
@@ -2895,11 +2900,11 @@
   return Power;
 }
 
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
-void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     DEBUG(dbgs() << "The search space is too complex.\n");
 
@@ -2957,7 +2962,12 @@
     DEBUG(dbgs() << "After pre-selection:\n";
           print_uses(dbgs()));
   }
+}
 
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     DEBUG(dbgs() << "The search space is too complex.\n");
 
@@ -2982,6 +2992,28 @@
 
               LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
 
+              // Update the relocs to reference the new use.
+              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+                   E = Fixups.end(); I != E; ++I) {
+                LSRFixup &Fixup = *I;
+                if (Fixup.LUIdx == LUIdx) {
+                  Fixup.LUIdx = LUThatHas - &Uses.front();
+                  Fixup.Offset += F.AM.BaseOffs;
+                  // Add the new offset to LUThatHas' offset list.
+                  if (LUThatHas->Offsets.back() != Fixup.Offset) {
+                    LUThatHas->Offsets.push_back(Fixup.Offset);
+                    if (Fixup.Offset > LUThatHas->MaxOffset)
+                      LUThatHas->MaxOffset = Fixup.Offset;
+                    if (Fixup.Offset < LUThatHas->MinOffset)
+                      LUThatHas->MinOffset = Fixup.Offset;
+                  }
+                  DEBUG(dbgs() << "New fixup has offset "
+                               << Fixup.Offset << '\n');
+                }
+                if (Fixup.LUIdx == NumUses-1)
+                  Fixup.LUIdx = LUIdx;
+              }
+
               // Delete formulae from the new use which are no longer legal.
               bool Any = false;
               for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -3000,22 +3032,8 @@
               if (Any)
                 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
 
-              // Update the relocs to reference the new use.
-              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
-                   E = Fixups.end(); I != E; ++I) {
-                LSRFixup &Fixup = *I;
-                if (Fixup.LUIdx == LUIdx) {
-                  Fixup.LUIdx = LUThatHas - &Uses.front();
-                  Fixup.Offset += F.AM.BaseOffs;
-                  DEBUG(dbgs() << "New fixup has offset "
-                               << Fixup.Offset << '\n');
-                }
-                if (Fixup.LUIdx == NumUses-1)
-                  Fixup.LUIdx = LUIdx;
-              }
-
               // Delete the old use.
-              DeleteUse(LU);
+              DeleteUse(LU, LUIdx);
               --LUIdx;
               --NumUses;
               break;
@@ -3028,7 +3046,30 @@
     DEBUG(dbgs() << "After pre-selection:\n";
           print_uses(dbgs()));
   }
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call 
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+                    "undesirable dedicated registers.\n");
+
+    FilterOutUndesirableDedicatedRegisters();
 
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
   // With all other options exhausted, loop until the system is simple
   // enough to handle.
   SmallPtrSet<const SCEV *, 4> Taken;
@@ -3090,6 +3131,17 @@
   }
 }
 
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+  NarrowSearchSpaceByDetectingSupersets();
+  NarrowSearchSpaceByCollapsingUnrolledCode();
+  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  NarrowSearchSpaceByPickingWinnerRegs();
+}
+
 /// SolveRecurse - This is the recursive solver.
 void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
                                Cost &SolutionCost,
@@ -3633,10 +3685,6 @@
   // to formulate the values needed for the uses.
   GenerateAllReuseFormulae();
 
-  DEBUG(dbgs() << "\n"
-                  "After generating reuse formulae:\n";
-        print_uses(dbgs()));
-
   FilterOutUndesirableDedicatedRegisters();
   NarrowSearchSpaceUsingHeuristics();
 
@@ -3743,15 +3791,25 @@
 }
 
 char LoopStrengthReduce::ID = 0;
-INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
-                "Loop Strength Reduction", false, false);
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+
 
 Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
   return new LoopStrengthReduce(TLI);
 }
 
 LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(&ID), TLI(tli) {}
+  : LoopPass(ID), TLI(tli) {
+    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+  }
 
 void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp Tue Oct 26 19:48:03 2010
@@ -27,7 +27,7 @@
 using namespace llvm;
 
 static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
   cl::desc("The cut-off point for automatic loop unrolling"));
 
 static cl::opt<unsigned>
@@ -43,12 +43,20 @@
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll() : LoopPass(&ID) {}
+    LoopUnroll() : LoopPass(ID) {
+      initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+    }
 
     /// A magic value for use with the Threshold parameter to indicate
     /// that the loop unroll should be performed regardless of how much
     /// code expansion would result.
     static const unsigned NoThreshold = UINT_MAX;
+    
+    // Threshold to use when optsize is specified (and there is no
+    // explicit -unroll-threshold).
+    static const unsigned OptSizeUnrollThreshold = 50;
+    
+    unsigned CurrentThreshold;
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -62,19 +70,22 @@
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
+      AU.addPreserved<ScalarEvolution>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
       // If loop unroll does not preserve dom info then LCSSA pass on next
       // loop will receive invalid dom info.
       // For now, recreate dom info, if loop is unrolled.
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
-      AU.addPreserved<ScalarEvolution>();
     }
   };
 }
 
 char LoopUnroll::ID = 0;
-INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
 
@@ -84,17 +95,50 @@
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     Metrics.analyzeBasicBlock(*I);
-  NumCalls = Metrics.NumCalls;
-  return Metrics.NumInsts;
+  NumCalls = Metrics.NumInlineCandidates;
+  
+  unsigned LoopSize = Metrics.NumInsts;
+  
+  // If we can identify the induction variable, we know that it will become
+  // constant when we unroll the loop, so factor that into our loop size 
+  // estimate.
+  // FIXME: We have to divide by InlineConstants::InstrCost because the
+  // measure returned by CountCodeReductionForConstant is not an instruction
+  // count, but rather a weight as defined by InlineConstants.  It would 
+  // probably be a good idea to standardize on a single weighting scheme by
+  // pushing more of the logic for weighting into CodeMetrics.
+  if (PHINode *IndVar = L->getCanonicalInductionVariable()) {
+    unsigned SizeDecrease = Metrics.CountCodeReductionForConstant(IndVar);
+    // NOTE: Because SizeDecrease is a fuzzy estimate, we don't want to allow
+    // it to totally negate the cost of unrolling a loop.
+    SizeDecrease = SizeDecrease > LoopSize / 2 ? LoopSize / 2 : SizeDecrease;
+  }
+  
+  // Don't allow an estimate of size zero.  This would allows unrolling of loops
+  // with huge iteration counts, which is a compile time problem even if it's
+  // not a problem for code quality.
+  if (LoopSize == 0) LoopSize = 1;
+  
+  return LoopSize;
 }
 
 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+  
   LoopInfo *LI = &getAnalysis<LoopInfo>();
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
         << "] Loop %" << Header->getName() << "\n");
   (void)Header;
+  
+  // Determine the current unrolling threshold.  While this is normally set
+  // from UnrollThreshold, it is overridden to a smaller value if the current
+  // function is marked as optimize-for-size, and the unroll threshold was
+  // not user specified.
+  CurrentThreshold = UnrollThreshold;
+  if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
+      UnrollThreshold.getNumOccurrences() == 0)
+    CurrentThreshold = OptSizeUnrollThreshold;
 
   // Find trip count
   unsigned TripCount = L->getSmallConstantTripCount();
@@ -112,25 +156,25 @@
   }
 
   // Enforce the threshold.
-  if (UnrollThreshold != NoThreshold) {
-    unsigned NumCalls;
-    unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
+  if (CurrentThreshold != NoThreshold) {
+    unsigned NumInlineCandidates;
+    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
-    if (NumCalls != 0) {
-      DEBUG(dbgs() << "  Not unrolling loop with function calls.\n");
+    if (NumInlineCandidates != 0) {
+      DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
       return false;
     }
     uint64_t Size = (uint64_t)LoopSize*Count;
-    if (TripCount != 1 && Size > UnrollThreshold) {
+    if (TripCount != 1 && Size > CurrentThreshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
-            << " because size: " << Size << ">" << UnrollThreshold << "\n");
+            << " because size: " << Size << ">" << CurrentThreshold << "\n");
       if (!UnrollAllowPartial) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
       // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = UnrollThreshold / LoopSize;
+      Count = CurrentThreshold / LoopSize;
       while (Count != 0 && TripCount%Count != 0) {
         Count--;
       }
@@ -148,12 +192,7 @@
     return false;
 
   // FIXME: Reconstruct dom info, because it is not preserved properly.
-  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
-  if (DT) {
+  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
     DT->runOnFunction(*F);
-    DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
-    if (DF)
-      DF->runOnFunction(*F);
-  }
   return true;
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp Tue Oct 26 19:48:03 2010
@@ -77,7 +77,6 @@
     bool redoLoop;
 
     Loop *currentLoop;
-    DominanceFrontier *DF;
     DominatorTree *DT;
     BasicBlock *loopHeader;
     BasicBlock *loopPreheader;
@@ -92,15 +91,17 @@
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LoopUnswitch(bool Os = false) : 
-      LoopPass(&ID), OptimizeForSize(Os), redoLoop(false), 
-      currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
-      loopPreheader(NULL) {}
+      LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 
+      currentLoop(NULL), DT(NULL), loopHeader(NULL),
+      loopPreheader(NULL) {
+        initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+      }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
     bool processCurrentLoop();
 
     /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG...
+    /// loop preheaders be inserted into the CFG.
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(LoopSimplifyID);
@@ -110,7 +111,6 @@
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
     }
 
   private:
@@ -160,7 +160,13 @@
   };
 }
 char LoopUnswitch::ID = 0;
-INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
 
 Pass *llvm::createLoopUnswitchPass(bool Os) { 
   return new LoopUnswitch(Os); 
@@ -201,7 +207,6 @@
 bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
   LI = &getAnalysis<LoopInfo>();
   LPM = &LPM_Ref;
-  DF = getAnalysisIfAvailable<DominanceFrontier>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   currentLoop = L;
   Function *F = currentLoop->getHeader()->getParent();
@@ -216,8 +221,6 @@
     // FIXME: Reconstruct dom info, because it is not preserved properly.
     if (DT)
       DT->runOnFunction(*F);
-    if (DF)
-      DF->runOnFunction(*F);
   }
   return Changed;
 }
@@ -282,19 +285,18 @@
   return Changed;
 }
 
-/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
-///   1. Exit the loop with no side effects.
-///   2. Branch to the latch block with no side-effects.
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
 ///
-/// If these conditions are true, we return true and set ExitBB to the block we
+/// If true, we return true and set ExitBB to the block we
 /// exit through.
 ///
 static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
                                          BasicBlock *&ExitBB,
                                          std::set<BasicBlock*> &Visited) {
   if (!Visited.insert(BB).second) {
-    // Already visited and Ok, end of recursion.
-    return true;
+    // Already visited. Without more analysis, this could indicate an infinte loop.
+    return false;
   } else if (!L->contains(BB)) {
     // Otherwise, this is a loop exit, this is fine so long as this is the
     // first exit.
@@ -324,7 +326,7 @@
 /// process.  If so, return the block that is exited to, otherwise return null.
 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
   std::set<BasicBlock*> Visited;
-  Visited.insert(L->getHeader());  // Branches to header are ok.
+  Visited.insert(L->getHeader());  // Branches to header make infinite loops.
   BasicBlock *ExitBB = 0;
   if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
     return ExitBB;
@@ -356,8 +358,8 @@
     if (!BI->isConditional() || BI->getCondition() != Cond)
       return false;
   
-    // Check to see if a successor of the branch is guaranteed to go to the
-    // latch block or exit through a one exit block without having any 
+    // Check to see if a successor of the branch is guaranteed to 
+    // exit through a unique exit block without having any 
     // side-effects.  If so, determine the value of Cond that causes it to do
     // this.
     if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
@@ -460,10 +462,10 @@
 // current values into those specified by VMap.
 //
 static inline void RemapInstruction(Instruction *I,
-                                    ValueMap<const Value *, Value*> &VMap) {
+                                    ValueToValueMapTy &VMap) {
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
-    ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
     if (It != VMap.end()) Op = It->second;
     I->setOperand(op, Op);
   }
@@ -471,7 +473,7 @@
 
 /// CloneLoop - Recursively clone the specified loop and all of its children,
 /// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
                        LoopInfo *LI, LPPassManager *LPM) {
   Loop *New = new Loop();
   LPM->insertLoop(New, PL);
@@ -615,7 +617,7 @@
   // the loop preheader and exit blocks), keeping track of the mapping between
   // the instructions and blocks.
   NewBlocks.reserve(LoopBlocks.size());
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
     BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
     NewBlocks.push_back(NewBB);
@@ -653,7 +655,7 @@
     for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
       PN = cast<PHINode>(I);
       Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
-      ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+      ValueToValueMapTy::iterator It = VMap.find(V);
       if (It != VMap.end()) V = It->second;
       PN->addIncoming(V, NewExit);
     }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp Tue Oct 26 19:48:03 2010
@@ -304,7 +304,9 @@
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    MemCpyOpt() : FunctionPass(&ID) {}
+    MemCpyOpt() : FunctionPass(ID) {
+      initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     // This transformation requires dominator postdominator info
@@ -321,7 +323,8 @@
     bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
     bool processMemCpy(MemCpyInst *M);
     bool processMemMove(MemMoveInst *M);
-    bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+    bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+                              uint64_t cpyLen, CallInst *C);
     bool iterateOnFunction(Function &F);
   };
   
@@ -331,9 +334,13 @@
 // createMemCpyOptPass - The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
 
-INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
-
-
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                    false, false)
 
 /// processStore - When GVN is scanning forward over instructions, we look for
 /// some other patterns to fold away.  In particular, this looks for stores to
@@ -342,6 +349,37 @@
 bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   if (SI->isVolatile()) return false;
   
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD) return false;
+
+  // Detect cases where we're performing call slot forwarding, but
+  // happen to be using a load-store pair to implement it, rather than
+  // a memcpy.
+  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+    if (!LI->isVolatile() && LI->hasOneUse()) {
+      MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+
+      MemDepResult dep = MD.getDependency(LI);
+      CallInst *C = 0;
+      if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+        C = dyn_cast<CallInst>(dep.getInst());
+      
+      if (C) {
+        bool changed = performCallSlotOptzn(LI,
+                        SI->getPointerOperand()->stripPointerCasts(), 
+                        LI->getPointerOperand()->stripPointerCasts(),
+                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+        if (changed) {
+          MD.removeInstruction(SI);
+          SI->eraseFromParent();
+          LI->eraseFromParent();
+          ++NumMemCpyInstr;
+          return true;
+        }
+      }
+    }
+  }
+  
   LLVMContext &Context = SI->getContext();
 
   // There are two cases that are interesting for this code to handle: memcpy
@@ -354,8 +392,6 @@
   if (!ByteVal)
     return false;
 
-  TargetData *TD = getAnalysisIfAvailable<TargetData>();
-  if (!TD) return false;
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   Module *M = SI->getParent()->getParent()->getParent();
 
@@ -489,7 +525,9 @@
 /// performCallSlotOptzn - takes a memcpy and a call that it depends on,
 /// and checks for the possibility of a call slot optimization by having
 /// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+                                     Value *cpyDest, Value *cpySrc,
+                                     uint64_t cpyLen, CallInst *C) {
   // The general transformation to keep in mind is
   //
   //   call @func(..., src, ...)
@@ -506,16 +544,8 @@
 
   // Deliberately get the source and destination with bitcasts stripped away,
   // because we'll need to do type comparisons based on the underlying type.
-  Value *cpyDest = cpy->getDest();
-  Value *cpySrc = cpy->getSource();
   CallSite CS(C);
 
-  // We need to be able to reason about the size of the memcpy, so we require
-  // that it be a constant.
-  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
-  if (!cpyLength)
-    return false;
-
   // Require that src be an alloca.  This simplifies the reasoning considerably.
   AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
   if (!srcAlloca)
@@ -532,7 +562,7 @@
   uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
     srcArraySize->getZExtValue();
 
-  if (cpyLength->getZExtValue() < srcSize)
+  if (cpyLen < srcSize)
     return false;
 
   // Check that accessing the first srcSize bytes of dest will not cause a
@@ -601,7 +631,7 @@
   // the use analysis, we also need to know that it does not sneakily
   // access dest.  We rely on AA to figure this out for us.
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
+  if (AA.getModRefInfo(C, cpyDest, srcSize) !=
       AliasAnalysis::NoModRef)
     return false;
 
@@ -630,7 +660,6 @@
 
   // Remove the memcpy
   MD.removeInstruction(cpy);
-  cpy->eraseFromParent();
   ++NumMemCpyInstr;
 
   return true;
@@ -644,6 +673,10 @@
 bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
   MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
 
+  // We can only optimize statically-sized memcpy's.
+  ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
+  if (!cpyLen) return false;
+
   // The are two possible optimizations we can do for memcpy:
   //   a) memcpy-memcpy xform which exposes redundance for DSE.
   //   b) call-memcpy xform for return slot optimization.
@@ -651,8 +684,12 @@
   if (!dep.isClobber())
     return false;
   if (!isa<MemCpyInst>(dep.getInst())) {
-    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
-      return performCallSlotOptzn(M, C);
+    if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
+      bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
+                                  cpyLen->getZExtValue(), C);
+      if (changed) M->eraseFromParent();
+      return changed;
+    }
     return false;
   }
   
@@ -697,14 +734,21 @@
                                  M->getParent()->getParent()->getParent(),
                                  M->getIntrinsicID(), ArgTys, 3);
     
+  // Make sure to use the lesser of the alignment of the source and the dest
+  // since we're changing where we're reading from, but don't want to increase
+  // the alignment past what can be read from or written to.
+  // TODO: Is this worth it if we're creating a less aligned memcpy? For
+  // example we could be moving from movaps -> movq on x86.
+  unsigned Align = std::min(MDep->getAlignmentCst()->getZExtValue(),
+                            M->getAlignmentCst()->getZExtValue());
+  LLVMContext &Context = M->getContext();
+  ConstantInt *AlignCI = ConstantInt::get(Type::getInt32Ty(Context), Align);
   Value *Args[5] = {
     M->getRawDest(), MDep->getRawSource(), M->getLength(),
-    M->getAlignmentCst(), M->getVolatileCst()
+    AlignCI, M->getVolatileCst()
   };
-  
   CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
   
-  
   // If C and M don't interfere, then this is a valid transformation.  If they
   // did, this would mean that the two sources overlap, which would be bad.
   if (MD.getDependency(C) == dep) {
@@ -728,7 +772,7 @@
 
   // If the memmove is a constant size, use it for the alias query, this allows
   // us to optimize things like: memmove(P, P+64, 64);
-  uint64_t MemMoveSize = ~0ULL;
+  uint64_t MemMoveSize = AliasAnalysis::UnknownSize;
   if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
     MemMoveSize = Len->getZExtValue();
   

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp Tue Oct 26 19:48:03 2010
@@ -77,7 +77,9 @@
     bool MadeChange;
   public:
     static char ID; // Pass identification, replacement for typeid
-    Reassociate() : FunctionPass(&ID) {}
+    Reassociate() : FunctionPass(ID) {
+      initializeReassociatePass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
 
@@ -104,7 +106,7 @@
 
 char Reassociate::ID = 0;
 INITIALIZE_PASS(Reassociate, "reassociate",
-                "Reassociate expressions", false, false);
+                "Reassociate expressions", false, false)
 
 // Public interface to the Reassociate pass
 FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
 namespace {
   struct RegToMem : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    RegToMem() : FunctionPass(&ID) {}
+    RegToMem() : FunctionPass(ID) {
+      initializeRegToMemPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,9 +61,11 @@
 }
   
 char RegToMem::ID = 0;
-static RegisterPass<RegToMem>
-X("reg2mem", "Demote all values to stack slots");
-
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
 
 bool RegToMem::runOnFunction(Function &F) {
   if (F.isDeclaration()) 
@@ -124,7 +128,7 @@
 
 // createDemoteRegisterToMemory - Provide an entry point to create this pass.
 //
-const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
 FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
   return new RegToMem();
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp Tue Oct 26 19:48:03 2010
@@ -275,12 +275,12 @@
     return I->second;
   }
   
-  LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+  /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
     DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = 
       StructValueState.find(std::make_pair(V, i));
     assert(I != StructValueState.end() && "V is not in valuemap!");
     return I->second;
-  }
+  }*/
 
   /// getTrackedRetVals - Get the inferred return value map.
   ///
@@ -518,7 +518,6 @@
   void visitUnwindInst    (TerminatorInst &I) { /*returns void*/ }
   void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
   void visitAllocaInst    (Instruction &I) { markOverdefined(&I); }
-  void visitVANextInst    (Instruction &I) { markOverdefined(&I); }
   void visitVAArgInst     (Instruction &I) { markAnythingOverdefined(&I); }
 
   void visitInstruction(Instruction &I) {
@@ -1586,7 +1585,9 @@
   ///
   struct SCCP : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    SCCP() : FunctionPass(&ID) {}
+    SCCP() : FunctionPass(ID) {
+      initializeSCCPPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnFunction - Run the Sparse Conditional Constant Propagation
     // algorithm, and return true if the function was modified.
@@ -1601,7 +1602,7 @@
 
 char SCCP::ID = 0;
 INITIALIZE_PASS(SCCP, "sccp",
-                "Sparse Conditional Constant Propagation", false, false);
+                "Sparse Conditional Constant Propagation", false, false)
 
 // createSCCPPass - This is the public interface to this file.
 FunctionPass *llvm::createSCCPPass() {
@@ -1702,7 +1703,9 @@
   ///
   struct IPSCCP : public ModulePass {
     static char ID;
-    IPSCCP() : ModulePass(&ID) {}
+    IPSCCP() : ModulePass(ID) {
+      initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+    }
     bool runOnModule(Module &M);
   };
 } // end anonymous namespace
@@ -1710,7 +1713,7 @@
 char IPSCCP::ID = 0;
 INITIALIZE_PASS(IPSCCP, "ipsccp",
                 "Interprocedural Sparse Conditional Constant Propagation",
-                false, false);
+                false, false)
 
 // createIPSCCPPass - This is the public interface to this file.
 ModulePass *llvm::createIPSCCPPass() {
@@ -1749,6 +1752,13 @@
 bool IPSCCP::runOnModule(Module &M) {
   SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
 
+  // AddressTakenFunctions - This set keeps track of the address-taken functions
+  // that are in the input.  As IPSCCP runs through and simplifies code,
+  // functions that were address taken can end up losing their
+  // address-taken-ness.  Because of this, we keep track of their addresses from
+  // the first pass so we can use them for the later simplification pass.
+  SmallPtrSet<Function*, 32> AddressTakenFunctions;
+  
   // Loop over all functions, marking arguments to those with their addresses
   // taken or that are external as overdefined.
   //
@@ -1764,9 +1774,13 @@
     // If this function only has direct calls that we can see, we can track its
     // arguments and return value aggressively, and can assume it is not called
     // unless we see evidence to the contrary.
-    if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
-      Solver.AddArgumentTrackedFunction(F);
-      continue;
+    if (F->hasLocalLinkage()) {
+      if (AddressIsTaken(F))
+        AddressTakenFunctions.insert(F);
+      else {
+        Solver.AddArgumentTrackedFunction(F);
+        continue;
+      }
     }
 
     // Assume the function is called.
@@ -1951,7 +1965,7 @@
       continue;
   
     // We can only do this if we know that nothing else can call the function.
-    if (!F->hasLocalLinkage() || AddressIsTaken(F))
+    if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
       continue;
     
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp Tue Oct 26 19:48:03 2010
@@ -7,12 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMScalarOpts.a, which implements
-// several scalar transformations over the LLVM intermediate representation.
+// This file implements common infrastructure for libLLVMScalarOpts.a, which 
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
@@ -20,6 +23,46 @@
 
 using namespace llvm;
 
+/// initializeScalarOptsPasses - Initialize all passes linked into the 
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+  initializeADCEPass(Registry);
+  initializeBlockPlacementPass(Registry);
+  initializeCodeGenPreparePass(Registry);
+  initializeConstantPropagationPass(Registry);
+  initializeCorrelatedValuePropagationPass(Registry);
+  initializeDCEPass(Registry);
+  initializeDeadInstEliminationPass(Registry);
+  initializeDSEPass(Registry);
+  initializeGEPSplitterPass(Registry);
+  initializeGVNPass(Registry);
+  initializeIndVarSimplifyPass(Registry);
+  initializeJumpThreadingPass(Registry);
+  initializeLICMPass(Registry);
+  initializeLoopDeletionPass(Registry);
+  initializeLoopRotatePass(Registry);
+  initializeLoopStrengthReducePass(Registry);
+  initializeLoopUnrollPass(Registry);
+  initializeLoopUnswitchPass(Registry);
+  initializeLowerAtomicPass(Registry);
+  initializeMemCpyOptPass(Registry);
+  initializeReassociatePass(Registry);
+  initializeRegToMemPass(Registry);
+  initializeSCCPPass(Registry);
+  initializeIPSCCPPass(Registry);
+  initializeSROAPass(Registry);
+  initializeCFGSimplifyPassPass(Registry);
+  initializeSimplifyHalfPowrLibCallsPass(Registry);
+  initializeSimplifyLibCallsPass(Registry);
+  initializeSinkingPass(Registry);
+  initializeTailDupPass(Registry);
+  initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+  initializeScalarOpts(*unwrap(R));
+}
+
 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createAggressiveDCEPass());
 }
@@ -56,10 +99,6 @@
   unwrap(PM)->add(createLoopDeletionPass());
 }
 
-void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoopIndexSplitPass());
-}
-
 void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopRotatePass());
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Oct 26 19:48:03 2010
@@ -28,6 +28,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Target/TargetData.h"
@@ -51,7 +52,8 @@
 namespace {
   struct SROA : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    explicit SROA(signed T = -1) : FunctionPass(&ID) {
+    explicit SROA(signed T = -1) : FunctionPass(ID) {
+      initializeSROAPass(*PassRegistry::getPassRegistry());
       if (T == -1)
         SRThreshold = 128;
       else
@@ -114,8 +116,7 @@
     void DoScalarReplacement(AllocaInst *AI, 
                              std::vector<AllocaInst*> &WorkList);
     void DeleteDeadInstructions();
-    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
-    
+   
     void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
                               SmallVector<AllocaInst*, 32> &NewElts);
     void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -135,8 +136,12 @@
 }
 
 char SROA::ID = 0;
-INITIALIZE_PASS(SROA, "scalarrepl",
-                "Scalar Replacement of Aggregates", false, false);
+INITIALIZE_PASS_BEGIN(SROA, "scalarrepl",
+                "Scalar Replacement of Aggregates", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(SROA, "scalarrepl",
+                "Scalar Replacement of Aggregates", false, false)
 
 // Public interface to the ScalarReplAggregates pass
 FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 
@@ -194,6 +199,27 @@
 };
 } // end anonymous namespace.
 
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form.  We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+  StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+  if (!Triple.startswith("i386") &&
+      !Triple.startswith("x86_64"))
+    return false;
+  
+  // Reject all the MMX vector types.
+  switch (VTy->getNumElements()) {
+  default: return false;
+  case 1: return VTy->getElementType()->isIntegerTy(64);
+  case 2: return VTy->getElementType()->isIntegerTy(32);
+  case 4: return VTy->getElementType()->isIntegerTy(16);
+  case 8: return VTy->getElementType()->isIntegerTy(8);
+  }
+}
+
+
 /// TryConvert - Analyze the specified alloca, and if it is safe to do so,
 /// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
 /// alloca if possible or null if not.
@@ -210,7 +236,8 @@
   // we just get a lot of insert/extracts.  If at least one vector is
   // involved, then we probably really do have a union of vector/array.
   const Type *NewTy;
-  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+  if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+      !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
     DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
@@ -299,6 +326,9 @@
       // Don't break volatile loads.
       if (LI->isVolatile())
         return false;
+      // Don't touch MMX operations.
+      if (LI->getType()->isX86_MMXTy())
+        return false;
       MergeInType(LI->getType(), Offset);
       continue;
     }
@@ -306,6 +336,9 @@
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       // Storing the pointer, not into the value?
       if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+      // Don't touch MMX operations.
+      if (SI->getOperand(0)->getType()->isX86_MMXTy())
+        return false;
       MergeInType(SI->getOperand(0)->getType(), Offset);
       continue;
     }
@@ -1414,7 +1447,7 @@
         Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
                                              OtherPTy->getAddressSpace());
         OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
-                                   OtherElt->getNameStr(), MI);
+                                   OtherElt->getName(), MI);
       }
     }
     
@@ -1663,6 +1696,12 @@
 /// HasPadding - Return true if the specified type has any structure or
 /// alignment padding, false otherwise.
 static bool HasPadding(const Type *Ty, const TargetData &TD) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+    return HasPadding(ATy->getElementType(), TD);
+  
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return HasPadding(VTy->getElementType(), TD);
+  
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
     const StructLayout *SL = TD.getStructLayout(STy);
     unsigned PrevFieldBitOffset = 0;
@@ -1692,12 +1731,8 @@
       if (PrevFieldEnd < SL->getSizeInBits())
         return true;
     }
-
-  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    return HasPadding(ATy->getElementType(), TD);
-  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
-    return HasPadding(VTy->getElementType(), TD);
   }
+  
   return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
 }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp Tue Oct 26 19:48:03 2010
@@ -42,7 +42,9 @@
 namespace {
   struct CFGSimplifyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGSimplifyPass() : FunctionPass(&ID) {}
+    CFGSimplifyPass() : FunctionPass(ID) {
+      initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
   };
@@ -50,7 +52,7 @@
 
 char CFGSimplifyPass::ID = 0;
 INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
-                "Simplify the CFG", false, false);
+                "Simplify the CFG", false, false)
 
 // Public interface to the CFGSimplification pass
 FunctionPass *llvm::createCFGSimplificationPass() {
@@ -285,10 +287,9 @@
   while (LocalChange) {
     LocalChange = false;
     
-    // Loop over all of the basic blocks (except the first one) and remove them
-    // if they are unneeded...
+    // Loop over all of the basic blocks and remove them if they are unneeded...
     //
-    for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
       if (SimplifyCFG(BBIt++, TD)) {
         LocalChange = true;
         ++NumSimpl;

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp Tue Oct 26 19:48:03 2010
@@ -32,7 +32,9 @@
     const TargetData *TD;
   public:
     static char ID; // Pass identification
-    SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+    SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
+      initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
 
@@ -47,7 +49,7 @@
 } // end anonymous namespace.
 
 INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
-                "Simplify half_powr library calls", false, false);
+                "Simplify half_powr library calls", false, false)
 
 // Public interface to the Simplify HalfPowr LibCalls pass.
 FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Oct 26 19:48:03 2010
@@ -223,7 +223,8 @@
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
         FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
-        FT->getParamType(0) != FT->getReturnType())
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
       return 0;
 
     Value *SrcStr = CI->getArgOperand(0);
@@ -252,26 +253,61 @@
 
     // strchr can find the nul character.
     Str += '\0';
-    char CharValue = CharC->getSExtValue();
 
     // Compute the offset.
-    uint64_t i = 0;
-    while (1) {
-      if (i == Str.size())    // Didn't find the char.  strchr returns null.
-        return Constant::getNullValue(CI->getType());
-      // Did we find our match?
-      if (Str[i] == CharValue)
-        break;
-      ++i;
-    }
+    size_t I = Str.find(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char.  strchr returns null.
+      return Constant::getNullValue(CI->getType());
 
     // strchr(s+n,c)  -> gep(s+n+i,c)
-    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
     return B.CreateGEP(SrcStr, Idx, "strchr");
   }
 };
 
 //===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strrchr" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+    // Cannot fold anything if we're not looking for a constant.
+    if (!CharC)
+      return 0;
+
+    std::string Str;
+    if (!GetConstantStringInfo(SrcStr, Str)) {
+      // strrchr(s, 0) -> strchr(s, 0)
+      if (TD && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, TD);
+      return 0;
+    }
+
+    // strrchr can find the nul character.
+    Str += '\0';
+
+    // Compute the offset.
+    size_t I = Str.rfind(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char. Return null.
+      return Constant::getNullValue(CI->getType());
+
+    // strrchr(s+n,c) -> gep(s+n+i,c)
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
+    return B.CreateGEP(SrcStr, Idx, "strrchr");
+  }
+};
+
+//===---------------------------------------===//
 // 'strcmp' Optimizations
 
 struct StrCmpOpt : public LibCallOptimization {
@@ -488,6 +524,46 @@
   }
 };
 
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        FT->getReturnType() != FT->getParamType(0))
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strpbrk(s, "") -> NULL
+    // strpbrk("", s) -> NULL
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2) {
+      size_t I = S1.find_first_of(S2);
+      if (I == std::string::npos) // No match.
+        return Constant::getNullValue(CI->getType());
+
+      Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
+      return B.CreateGEP(CI->getArgOperand(0), Idx, "strpbrk");
+    }
+
+    // strpbrk(s, "a") -> strchr(s, 'a')
+    if (TD && HasS2 && S2.size() == 1)
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+    return 0;
+  }
+};
+
 //===---------------------------------------===//
 // 'strto*' Optimizations.  This handles strtol, strtod, strtof, strtoul, etc.
 
@@ -510,6 +586,67 @@
 };
 
 //===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strspn(s, "") -> 0
+    // strspn("", s) -> 0
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strcspn("", s) -> 0
+    if (HasS1 && S1.empty())
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+    // strcspn(s, "") -> strlen(s)
+    if (TD && HasS2 && S2.empty())
+      return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
 // 'strstr' Optimizations
 
 struct StrStrOpt : public LibCallOptimization {
@@ -1220,10 +1357,10 @@
   class SimplifyLibCalls : public FunctionPass {
     StringMap<LibCallOptimization*> Optimizations;
     // String and Memory LibCall Optimizations
-    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
-    StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
-    StrNCpyOpt StrNCpy; StrLenOpt StrLen;
-    StrToOpt StrTo; StrStrOpt StrStr;
+    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+    StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+    StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+    StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
     MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
     // Math Library Optimizations
     PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
@@ -1237,7 +1374,9 @@
     bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
-    SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
+    SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+      initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+    }
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
@@ -1255,7 +1394,7 @@
 } // end anonymous namespace.
 
 INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
-                "Simplify well-known library calls", false, false);
+                "Simplify well-known library calls", false, false)
 
 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1269,11 +1408,13 @@
   Optimizations["strcat"] = &StrCat;
   Optimizations["strncat"] = &StrNCat;
   Optimizations["strchr"] = &StrChr;
+  Optimizations["strrchr"] = &StrRChr;
   Optimizations["strcmp"] = &StrCmp;
   Optimizations["strncmp"] = &StrNCmp;
   Optimizations["strcpy"] = &StrCpy;
   Optimizations["strncpy"] = &StrNCpy;
   Optimizations["strlen"] = &StrLen;
+  Optimizations["strpbrk"] = &StrPBrk;
   Optimizations["strtol"] = &StrTo;
   Optimizations["strtod"] = &StrTo;
   Optimizations["strtof"] = &StrTo;
@@ -1281,6 +1422,8 @@
   Optimizations["strtoll"] = &StrTo;
   Optimizations["strtold"] = &StrTo;
   Optimizations["strtoull"] = &StrTo;
+  Optimizations["strspn"] = &StrSpn;
+  Optimizations["strcspn"] = &StrCSpn;
   Optimizations["strstr"] = &StrStr;
   Optimizations["memcmp"] = &MemCmp;
   Optimizations["memcpy"] = &MemCpy;
@@ -2156,7 +2299,7 @@
 //   * pow(pow(x,y),z)-> pow(x,y*z)
 //
 // puts:
-//   * puts("") -> putchar("\n")
+//   * puts("") -> putchar('\n')
 //
 // round, roundf, roundl:
 //   * round(cnst) -> cnst'
@@ -2173,24 +2316,6 @@
 // stpcpy:
 //   * stpcpy(str, "literal") ->
 //           llvm.memcpy(str,"literal",strlen("literal")+1,1)
-// strrchr:
-//   * strrchr(s,c) -> reverse_offset_of_in(c,s)
-//      (if c is a constant integer and s is a constant string)
-//   * strrchr(s1,0) -> strchr(s1,0)
-//
-// strpbrk:
-//   * strpbrk(s,a) -> offset_in_for(s,a)
-//      (if s and a are both constant strings)
-//   * strpbrk(s,"") -> 0
-//   * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
-//
-// strspn, strcspn:
-//   * strspn(s,a)   -> const_int (if both args are constant)
-//   * strspn("",a)  -> 0
-//   * strspn(s,"")  -> 0
-//   * strcspn(s,a)  -> const_int (if both args are constant)
-//   * strcspn("",a) -> 0
-//   * strcspn(s,"") -> strlen(a)
 //
 // tan, tanf, tanl:
 //   * tan(atan(x)) -> x

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
 
   public:
     static char ID; // Pass identification
-    Sinking() : FunctionPass(&ID) {}
+    Sinking() : FunctionPass(ID) {
+      initializeSinkingPass(*PassRegistry::getPassRegistry());
+    }
     
     virtual bool runOnFunction(Function &F);
     
@@ -56,7 +58,11 @@
 } // end anonymous namespace
   
 char Sinking::ID = 0;
-INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
 
 FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
 
@@ -151,7 +157,7 @@
     if (L->isVolatile()) return false;
 
     Value *Ptr = L->getPointerOperand();
-    unsigned Size = AA->getTypeStoreSize(L->getType());
+    uint64_t Size = AA->getTypeStoreSize(L->getType());
     for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
          E = Stores.end(); I != E; ++I)
       if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp Tue Oct 26 19:48:03 2010
@@ -49,7 +49,9 @@
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    TailDup() : FunctionPass(&ID) {}
+    TailDup() : FunctionPass(ID) {
+      initializeTailDupPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +61,7 @@
 }
 
 char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
 
 // Public interface to the Tail Duplication pass
 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }

Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp Tue Oct 26 19:48:03 2010
@@ -72,7 +72,9 @@
 namespace {
   struct TailCallElim : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    TailCallElim() : FunctionPass(&ID) {}
+    TailCallElim() : FunctionPass(ID) {
+      initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -88,7 +90,7 @@
 
 char TailCallElim::ID = 0;
 INITIALIZE_PASS(TailCallElim, "tailcallelim",
-                "Tail Call Elimination", false, false);
+                "Tail Call Elimination", false, false)
 
 // Public interface to the TailCallElimination pass
 FunctionPass *llvm::createTailCallEliminationPass() {
@@ -278,22 +280,22 @@
   Function *F = CI->getParent()->getParent();
   Value *ReturnedValue = 0;
 
-  for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
-      if (RI != IgnoreRI) {
-        Value *RetOp = RI->getOperand(0);
-
-        // We can only perform this transformation if the value returned is
-        // evaluatable at the start of the initial invocation of the function,
-        // instead of at the end of the evaluation.
-        //
-        if (!isDynamicConstant(RetOp, CI, RI))
-          return 0;
-
-        if (ReturnedValue && RetOp != ReturnedValue)
-          return 0;     // Cannot transform if differing values are returned.
-        ReturnedValue = RetOp;
-      }
+  for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+    ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+    if (RI == 0 || RI == IgnoreRI) continue;
+
+    // We can only perform this transformation if the value returned is
+    // evaluatable at the start of the initial invocation of the function,
+    // instead of at the end of the evaluation.
+    //
+    Value *RetOp = RI->getOperand(0);
+    if (!isDynamicConstant(RetOp, CI, RI))
+      return 0;
+
+    if (ReturnedValue && RetOp != ReturnedValue)
+      return 0;     // Cannot transform if differing values are returned.
+    ReturnedValue = RetOp;
+  }
   return ReturnedValue;
 }
 
@@ -307,7 +309,7 @@
   assert(I->getNumOperands() == 2 &&
          "Associative/commutative operations should have 2 args!");
 
-  // Exactly one operand should be the result of the call instruction...
+  // Exactly one operand should be the result of the call instruction.
   if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
       (I->getOperand(0) != CI && I->getOperand(1) != CI))
     return 0;
@@ -387,21 +389,22 @@
   // tail call if all of the instructions between the call and the return are
   // movable to above the call itself, leaving the call next to the return.
   // Check that this is the case now.
-  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
-    if (!CanMoveAboveCall(BBI, CI)) {
-      // If we can't move the instruction above the call, it might be because it
-      // is an associative and commutative operation that could be tranformed
-      // using accumulator recursion elimination.  Check to see if this is the
-      // case, and if so, remember the initial accumulator value for later.
-      if ((AccumulatorRecursionEliminationInitVal =
-                             CanTransformAccumulatorRecursion(BBI, CI))) {
-        // Yes, this is accumulator recursion.  Remember which instruction
-        // accumulates.
-        AccumulatorRecursionInstr = BBI;
-      } else {
-        return false;   // Otherwise, we cannot eliminate the tail recursion!
-      }
+  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+    if (CanMoveAboveCall(BBI, CI)) continue;
+    
+    // If we can't move the instruction above the call, it might be because it
+    // is an associative and commutative operation that could be tranformed
+    // using accumulator recursion elimination.  Check to see if this is the
+    // case, and if so, remember the initial accumulator value for later.
+    if ((AccumulatorRecursionEliminationInitVal =
+                           CanTransformAccumulatorRecursion(BBI, CI))) {
+      // Yes, this is accumulator recursion.  Remember which instruction
+      // accumulates.
+      AccumulatorRecursionInstr = BBI;
+    } else {
+      return false;   // Otherwise, we cannot eliminate the tail recursion!
     }
+  }
 
   // We can only transform call/return pairs that either ignore the return value
   // of the call and return void, ignore the value of the call and return a

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp Tue Oct 26 19:48:03 2010
@@ -21,6 +21,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -379,27 +380,10 @@
 /// return false.
 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
                                     const TargetLowering &TLI) {
-  std::vector<InlineAsm::ConstraintInfo>
-  Constraints = IA->ParseConstraints();
-
-  unsigned ArgNo = 0;   // The argument of the CallInst.
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
-    // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-      case InlineAsm::isOutput:
-        if (OpInfo.isIndirect)
-          OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
-        break;
-      case InlineAsm::isInput:
-        OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
-        break;
-      case InlineAsm::isClobber:
-        // Nothing to do.
-        break;
-    }
-
+  std::vector<TargetLowering::AsmOperandInfo> TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
     // Compute the constraint code and ConstraintType to use.
     TLI.ComputeConstraintToUse(OpInfo, SDValue());
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp Tue Oct 26 19:48:03 2010
@@ -97,23 +97,13 @@
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
 bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
-  pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
-  // Can't merge the entry block.  Don't merge away blocks who have their
-  // address taken: this is a bug if the predecessor block is the entry node
-  // (because we'd end up taking the address of the entry) and undesirable in
-  // any case.
-  if (pred_begin(BB) == pred_end(BB) ||
-      BB->hasAddressTaken()) return false;
-  
-  BasicBlock *PredBB = *PI++;
-  for (; PI != PE; ++PI)  // Search all predecessors, see if they are all same
-    if (*PI != PredBB) {
-      PredBB = 0;       // There are multiple different predecessors...
-      break;
-    }
+  // Don't merge away blocks who have their address taken.
+  if (BB->hasAddressTaken()) return false;
   
-  // Can't merge if there are multiple predecessors.
+  // Can't merge if there are multiple predecessors, or no predecessors.
+  BasicBlock *PredBB = BB->getUniquePredecessor();
   if (!PredBB) return false;
+
   // Don't break self-loops.
   if (PredBB == BB) return false;
   // Don't break invokes.
@@ -267,7 +257,7 @@
   case Instruction::Switch:    // Should remove entry
   default:
   case Instruction::Ret:       // Cannot happen, has no successors!
-    llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
+    llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
   }
 
   if (NewTI)   // If it's a different instruction, replace.
@@ -337,7 +327,7 @@
       L->addBasicBlockToLoop(New, LI->getBase());
 
   if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
-    // Old dominates New. New node domiantes all other nodes dominated by Old.
+    // Old dominates New. New node dominates all other nodes dominated by Old.
     DomTreeNode *OldNode = DT->getNode(Old);
     std::vector<DomTreeNode *> Children;
     for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
@@ -421,7 +411,8 @@
   DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
   if (DT)
     DT->splitBlock(NewBB);
-  if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
+  if (DominanceFrontier *DF =
+        P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
     DF->splitBlock(NewBB);
 
   // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
 namespace {
   struct BreakCriticalEdges : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    BreakCriticalEdges() : FunctionPass(&ID) {}
+    BreakCriticalEdges() : FunctionPass(ID) {
+      initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -53,11 +55,11 @@
 }
 
 char BreakCriticalEdges::ID = 0;
-static RegisterPass<BreakCriticalEdges>
-X("break-crit-edges", "Break critical edges in CFG");
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+                "Break critical edges in CFG", false, false)
 
 // Publically exposed interface to pass...
-const PassInfo *const llvm::BreakCriticalEdgesID = &X;
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
 FunctionPass *llvm::createBreakCriticalEdgesPass() {
   return new BreakCriticalEdges();
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -20,10 +20,9 @@
   Mem2Reg.cpp
   PromoteMemoryToRegister.cpp
   SSAUpdater.cpp
-  SSI.cpp
   SimplifyCFG.cpp
   UnifyFunctionExitNodes.cpp
+  Utils.cpp
   ValueMapper.cpp
   )
 
-target_link_libraries (LLVMTransformUtils LLVMSupport)

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp Tue Oct 26 19:48:03 2010
@@ -23,7 +23,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Support/CFG.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ADT/SmallVector.h"
@@ -69,10 +69,11 @@
 }
 
 // Clone OldFunc into NewFunc, transforming the old arguments into references to
-// ArgMap values.
+// VMap values.
 //
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                              ValueToValueMapTy &VMap,
+                             bool ModuleLevelChanges,
                              SmallVectorImpl<ReturnInst*> &Returns,
                              const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
   assert(NameSuffix && "NameSuffix cannot be null!");
@@ -126,7 +127,7 @@
          BE = NewFunc->end(); BB != BE; ++BB)
     // Loop over all instructions, fixing each one as we find it...
     for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
-      RemapInstruction(II, VMap);
+      RemapInstruction(II, VMap, ModuleLevelChanges);
 }
 
 /// CloneFunction - Return a copy of the specified function, but without
@@ -139,6 +140,7 @@
 ///
 Function *llvm::CloneFunction(const Function *F,
                               ValueToValueMapTy &VMap,
+                              bool ModuleLevelChanges,
                               ClonedCodeInfo *CodeInfo) {
   std::vector<const Type*> ArgTypes;
 
@@ -167,7 +169,7 @@
     }
 
   SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
-  CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
+  CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
   return NewF;
 }
 
@@ -180,6 +182,7 @@
     Function *NewFunc;
     const Function *OldFunc;
     ValueToValueMapTy &VMap;
+    bool ModuleLevelChanges;
     SmallVectorImpl<ReturnInst*> &Returns;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
@@ -187,12 +190,14 @@
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           ValueToValueMapTy &valueMap,
+                          bool moduleLevelChanges,
                           SmallVectorImpl<ReturnInst*> &returns,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
-    : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
-      NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+    : NewFunc(newFunc), OldFunc(oldFunc),
+      VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+      Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
     }
 
     /// CloneBlock - The specified block is found to be reachable, clone it and
@@ -211,7 +216,7 @@
 /// anything that it can reach.
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                        std::vector<const BasicBlock*> &ToClone){
-  Value *&BBEntry = VMap[BB];
+  TrackingVH<Value> &BBEntry = VMap[BB];
 
   // Have we already cloned this block?
   if (BBEntry) return;
@@ -257,8 +262,10 @@
       // If the condition was a known constant in the callee...
       ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
       // Or is a known constant in the caller...
-      if (Cond == 0)  
-        Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
+      if (Cond == 0) {
+        Value *V = VMap[BI->getCondition()];
+        Cond = dyn_cast_or_null<ConstantInt>(V);
+      }
 
       // Constant fold to uncond branch!
       if (Cond) {
@@ -271,8 +278,10 @@
   } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
     // If switching on a value known constant in the caller.
     ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
-    if (Cond == 0)  // Or known constant after constant prop in the callee...
-      Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
+    if (Cond == 0) { // Or known constant after constant prop in the callee...
+      Value *V = VMap[SI->getCondition()];
+      Cond = dyn_cast_or_null<ConstantInt>(V);
+    }
     if (Cond) {     // Constant fold to uncond branch!
       BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
       VMap[OldTI] = BranchInst::Create(Dest, NewBB);
@@ -313,7 +322,7 @@
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           VMap)))
+                                                   VMap, ModuleLevelChanges)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
@@ -355,6 +364,7 @@
 /// used for things like CloneFunction or CloneModule.
 void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      ValueToValueMapTy &VMap,
+                                     bool ModuleLevelChanges,
                                      SmallVectorImpl<ReturnInst*> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
@@ -368,8 +378,8 @@
     assert(VMap.count(II) && "No mapping from source argument specified!");
 #endif
 
-  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
-                            NameSuffix, CodeInfo, TD);
+  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+                            Returns, NameSuffix, CodeInfo, TD);
 
   // Clone the entry block, and anything recursively reachable from it.
   std::vector<const BasicBlock*> CloneWorklist;
@@ -388,7 +398,8 @@
   SmallVector<const PHINode*, 16> PHIToResolve;
   for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
        BI != BE; ++BI) {
-    BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
+    Value *V = VMap[BI];
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
     if (NewBB == 0) continue;  // Dead block.
 
     // Add the new block to the new function.
@@ -449,7 +460,7 @@
           I->setDebugLoc(DebugLoc());
         }
       }
-      RemapInstruction(I, VMap);
+      RemapInstruction(I, VMap, ModuleLevelChanges);
     }
   }
   
@@ -468,10 +479,11 @@
       OPN = PHIToResolve[phino];
       PHINode *PN = cast<PHINode>(VMap[OPN]);
       for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+        Value *V = VMap[PN->getIncomingBlock(pred)];
         if (BasicBlock *MappedBlock = 
-            cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
+            cast_or_null<BasicBlock>(V)) {
           Value *InVal = MapValue(PN->getIncomingValue(pred),
-                                  VMap);
+                                  VMap, ModuleLevelChanges);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp Tue Oct 26 19:48:03 2010
@@ -22,12 +22,12 @@
 /// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
 /// dominance info. It is expected that basic block is already cloned.
 static void CloneDominatorInfo(BasicBlock *BB, 
-                               ValueMap<const Value *, Value *> &VMap,
+                               ValueToValueMapTy &VMap,
                                DominatorTree *DT,
                                DominanceFrontier *DF) {
 
   assert (DT && "DominatorTree is not available");
-  ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+  ValueToValueMapTy::iterator BI = VMap.find(BB);
   assert (BI != VMap.end() && "BasicBlock clone is missing");
   BasicBlock *NewBB = cast<BasicBlock>(BI->second);
 
@@ -42,7 +42,7 @@
 
   // NewBB's dominator is either BB's dominator or BB's dominator's clone.
   BasicBlock *NewBBDom = BBDom;
-  ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+  ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
   if (BBDomI != VMap.end()) {
     NewBBDom = cast<BasicBlock>(BBDomI->second);
     if (!DT->getNode(NewBBDom))
@@ -59,7 +59,7 @@
         for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
              I != E; ++I) {
           BasicBlock *DB = *I;
-          ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
+          ValueToValueMapTy::iterator IDM = VMap.find(DB);
           if (IDM != VMap.end())
             NewDFSet.insert(cast<BasicBlock>(IDM->second));
           else
@@ -73,7 +73,7 @@
 /// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
 /// using old blocks to new blocks mapping.
 Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
-                      ValueMap<const Value *, Value *> &VMap, Pass *P) {
+                      ValueToValueMapTy &VMap, Pass *P) {
   
   DominatorTree *DT = NULL;
   DominanceFrontier *DF = NULL;
@@ -134,7 +134,7 @@
       for (unsigned index = 0, num_ops = Insn->getNumOperands(); 
            index != num_ops; ++index) {
         Value *Op = Insn->getOperand(index);
-        ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+        ValueToValueMapTy::iterator OpItr = VMap.find(Op);
         if (OpItr != VMap.end())
           Insn->setOperand(index, OpItr->second);
       }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp Tue Oct 26 19:48:03 2010
@@ -17,7 +17,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Constant.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
 
 /// CloneModule - Return an exact copy of the specified module.  This is not as
@@ -89,7 +89,8 @@
     GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 VMap)));
+                                                 VMap,
+                                                 true)));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -108,7 +109,7 @@
       }
 
       SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
-      CloneFunctionInto(F, I, VMap, Returns);
+      CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
     }
 
     F->setLinkage(I->getLinkage());
@@ -120,7 +121,7 @@
     GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
+      GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
   }
 
   // And named metadata....
@@ -129,23 +130,8 @@
     const NamedMDNode &NMD = *I;
     NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
     for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
-      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
+      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
   }
 
-  // Update metadata attach with instructions.
-  for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)   
-    for (Function::iterator FI = MI->begin(), FE = MI->end(); 
-         FI != FE; ++FI)
-      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
-           BI != BE; ++BI) {
-        SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
-        BI->getAllMetadata(MDs);
-        for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator 
-               MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
-          Value *MappedValue = MapValue(MDI->second, VMap);
-          if (MDI->second != MappedValue && MappedValue)
-            BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
-        }
-      }
   return New;
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp Tue Oct 26 19:48:03 2010
@@ -186,8 +186,8 @@
     if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
       BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
       if (DT) {
-        // Old dominates New. New node domiantes all other nodes dominated
-        //by Old.
+        // Old dominates New. New node dominates all other nodes dominated
+        // by Old.
         DomTreeNode *OldNode = DT->getNode(*I);
         SmallVector<DomTreeNode*, 8> Children;
         for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp Tue Oct 26 19:48:03 2010
@@ -171,7 +171,7 @@
 /// some edges of the callgraph may remain.
 static void UpdateCallGraphAfterInlining(CallSite CS,
                                          Function::iterator FirstNewBlock,
-                                         ValueMap<const Value*, Value*> &VMap,
+                                         ValueToValueMapTy &VMap,
                                          InlineFunctionInfo &IFI) {
   CallGraph &CG = *IFI.CG;
   const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -194,7 +194,7 @@
   for (; I != E; ++I) {
     const Value *OrigCall = I->first;
 
-    ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
+    ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
     if (VMI == VMap.end() || VMI->second == 0)
       continue;
@@ -288,7 +288,7 @@
   Function::iterator FirstNewBlock;
 
   { // Scope to destroy VMap after cloning.
-    ValueMap<const Value*, Value*> VMap;
+    ValueToValueMapTy VMap;
 
     assert(CalledFunc->arg_size() == CS.arg_size() &&
            "No varargs calls can be inlined!");
@@ -366,7 +366,8 @@
     // have no dead or constant instructions leftover after inlining occurs
     // (which can happen, e.g., because an argument was constant), but we'll be
     // happy with whatever the cloner can do.
-    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
+    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
+                              /*ModuleLevelChanges=*/false, Returns, ".i",
                               &InlinedFunctionInfo, IFI.TD, TheCall);
 
     // Remember the first block that is newly cloned over.

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp Tue Oct 26 19:48:03 2010
@@ -23,7 +23,9 @@
 namespace {
   struct InstNamer : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    InstNamer() : FunctionPass(&ID) {}
+    InstNamer() : FunctionPass(ID) {
+      initializeInstNamerPass(*PassRegistry::getPassRegistry());
+    }
     
     void getAnalysisUsage(AnalysisUsage &Info) const {
       Info.setPreservesAll();
@@ -48,12 +50,11 @@
   };
   
   char InstNamer::ID = 0;
-  static RegisterPass<InstNamer> X("instnamer",
-                                   "Assign names to anonymous instructions");
 }
 
-
-const PassInfo *const llvm::InstructionNamerID = &X;
+INITIALIZE_PASS(InstNamer, "instnamer", 
+                "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
 //===----------------------------------------------------------------------===//
 //
 // InstructionNamer - Give any unnamed non-void instructions "tmp" names.

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp Tue Oct 26 19:48:03 2010
@@ -47,7 +47,9 @@
 namespace {
   struct LCSSA : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LCSSA() : LoopPass(&ID) {}
+    LCSSA() : LoopPass(ID) {
+      initializeLCSSAPass(*PassRegistry::getPassRegistry());
+    }
 
     // Cached analysis information for the current function.
     DominatorTree *DT;
@@ -90,10 +92,13 @@
 }
   
 char LCSSA::ID = 0;
-static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
 
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-const PassInfo *const llvm::LCSSAID = &X;
+char &llvm::LCSSAID = LCSSA::ID;
 
 
 /// BlockDominatesAnExit - Return true if the specified block dominates at least
@@ -206,7 +211,7 @@
   DomTreeNode *DomNode = DT->getNode(DomBB);
 
   SSAUpdater SSAUpdate;
-  SSAUpdate.Initialize(Inst);
+  SSAUpdate.Initialize(Inst->getType(), Inst->getName());
   
   // Insert the LCSSA phi's into all of the exit blocks dominated by the
   // value, and add them to the Phi's map.

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp Tue Oct 26 19:48:03 2010
@@ -490,6 +490,9 @@
 /// rewriting all the predecessors to branch to the successor block and return
 /// true.  If we can't transform, return false.
 bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+  assert(BB != &BB->getParent()->getEntryBlock() &&
+         "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
   // We can't eliminate infinite loops.
   BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
   if (BB == Succ) return false;

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp Tue Oct 26 19:48:03 2010
@@ -46,9 +46,9 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
@@ -65,13 +65,16 @@
 namespace {
   struct LoopSimplify : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LoopSimplify() : LoopPass(&ID) {}
+    LoopSimplify() : LoopPass(ID) {
+      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+    }
 
     // AA - If we have an alias analysis object to update, this is it, otherwise
     // this is null.
     AliasAnalysis *AA;
     LoopInfo *LI;
     DominatorTree *DT;
+    ScalarEvolution *SE;
     Loop *L;
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -106,11 +109,15 @@
 }
 
 char LoopSimplify::ID = 0;
-static RegisterPass<LoopSimplify>
-X("loopsimplify", "Canonicalize natural loops", true);
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loopsimplify",
+                "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loopsimplify",
+                "Canonicalize natural loops", true, false)
 
 // Publically exposed interface to pass...
-const PassInfo *const llvm::LoopSimplifyID = &X;
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
 Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 
 /// runOnLoop - Run down all loops in the CFG (recursively, but we could do
@@ -122,6 +129,7 @@
   LI = &getAnalysis<LoopInfo>();
   AA = getAnalysisIfAvailable<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
 
   Changed |= ProcessLoop(L, LPM);
 
@@ -533,6 +541,12 @@
 
   DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
 
+  // If ScalarEvolution is around and knows anything about values in
+  // this loop, tell it to forget them, because we're about to
+  // substantially change it.
+  if (SE)
+    SE->forgetLoop(L);
+
   BasicBlock *Header = L->getHeader();
   BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
                                              OuterLoopPreds.size(),
@@ -622,6 +636,11 @@
   std::vector<BasicBlock*> BackedgeBlocks;
   for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
     BasicBlock *P = *I;
+
+    // Indirectbr edges cannot be split, so we must fail if we find one.
+    if (isa<IndirectBrInst>(P->getTerminator()))
+      return 0;
+
     if (P != Preheader) BackedgeBlocks.push_back(P);
   }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp Tue Oct 26 19:48:03 2010
@@ -40,10 +40,10 @@
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 static inline void RemapInstruction(Instruction *I,
-                                    ValueMap<const Value *, Value*> &VMap) {
+                                    ValueToValueMapTy &VMap) {
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
-    ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
     if (It != VMap.end())
       I->setOperand(op, It->second);
   }
@@ -189,7 +189,6 @@
 
   // For the first iteration of the loop, we should use the precloned values for
   // PHI nodes.  Insert associations now.
-  typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
   ValueToValueMapTy LastValueMap;
   std::vector<PHINode*> OrigPHINode;
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -274,7 +273,7 @@
     for (unsigned i = 0; i < NewBlocks.size(); ++i)
       for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
-        RemapInstruction(I, LastValueMap);
+        ::RemapInstruction(I, LastValueMap);
   }
   
   // The latch block exits the loop.  If there are any PHI nodes in the

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp Tue Oct 26 19:48:03 2010
@@ -78,14 +78,16 @@
     static char ID; // Pass identification, replacement for typeid
     explicit LowerInvoke(const TargetLowering *tli = NULL,
                          bool useExpensiveEHSupport = ExpensiveEHSupport)
-      : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport),
-        TLI(tli) { }
+      : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+        TLI(tli) {
+      initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+    }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       // This is a cluster of orthogonal Transforms
-      AU.addPreservedID(PromoteMemoryToRegisterID);
+      AU.addPreserved("mem2reg");
       AU.addPreservedID(LowerSwitchID);
     }
 
@@ -100,10 +102,11 @@
 }
 
 char LowerInvoke::ID = 0;
-static RegisterPass<LowerInvoke>
-X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+                "Lower invoke and unwind, for unwindless code generators",
+                false, false)
 
-const PassInfo *const llvm::LowerInvokePassID = &X;
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
 
 // Public Interface To the LowerInvoke pass.
 FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
@@ -147,19 +150,20 @@
                                       "llvm.sjljeh.jblist");
     }
 
-// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
-// so it looks like Intrinsic::_setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
 #endif
 
     SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
 
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
-// let's return it to _setjmp state in case anyone ever needs it after this
-// point under VisualStudio
-#define setjmp _setjmp
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+   // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
 #endif
 
     LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
@@ -185,6 +189,7 @@
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
+      NewCall->setDebugLoc(II->getDebugLoc());
       II->replaceAllUsesWith(NewCall);
 
       // Insert an unconditional branch to the normal destination.
@@ -265,6 +270,7 @@
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
   II->replaceAllUsesWith(NewCall);
 
   // Replace the invoke with an uncond branch.

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp Tue Oct 26 19:48:03 2010
@@ -29,19 +29,20 @@
 
 namespace {
   /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
-  /// instructions.  Note that this cannot be a BasicBlock pass because it
-  /// modifies the CFG!
+  /// instructions.
   class LowerSwitch : public FunctionPass {
   public:
     static char ID; // Pass identification, replacement for typeid
-    LowerSwitch() : FunctionPass(&ID) {} 
+    LowerSwitch() : FunctionPass(ID) {
+      initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+    } 
 
     virtual bool runOnFunction(Function &F);
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       // This is a cluster of orthogonal Transforms
       AU.addPreserved<UnifyFunctionExitNodes>();
-      AU.addPreservedID(PromoteMemoryToRegisterID);
+      AU.addPreserved("mem2reg");
       AU.addPreservedID(LowerInvokePassID);
     }
 
@@ -50,8 +51,7 @@
       Constant* High;
       BasicBlock* BB;
 
-      CaseRange() : Low(0), High(0), BB(0) { }
-      CaseRange(Constant* low, Constant* high, BasicBlock* bb) :
+      CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
         Low(low), High(high), BB(bb) { }
     };
 
@@ -81,11 +81,11 @@
 }
 
 char LowerSwitch::ID = 0;
-static RegisterPass<LowerSwitch>
-X("lowerswitch", "Lower SwitchInst's to branches");
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+                "Lower SwitchInst's to branches", false, false)
 
 // Publically exposed interface to pass...
-const PassInfo *const llvm::LowerSwitchID = &X;
+char &llvm::LowerSwitchID = LowerSwitch::ID;
 // createLowerSwitchPass - Interface to this file...
 FunctionPass *llvm::createLowerSwitchPass() {
   return new LowerSwitch();
@@ -109,7 +109,8 @@
 // operator<< - Used for debugging purposes.
 //
 static raw_ostream& operator<<(raw_ostream &O,
-                               const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+                               const LowerSwitch::CaseVector &C)
+    LLVM_ATTRIBUTE_USED;
 static raw_ostream& operator<<(raw_ostream &O,
                                const LowerSwitch::CaseVector &C) {
   O << "[";

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp Tue Oct 26 19:48:03 2010
@@ -27,7 +27,9 @@
 namespace {
   struct PromotePass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    PromotePass() : FunctionPass(&ID) {}
+    PromotePass() : FunctionPass(ID) {
+      initializePromotePassPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnFunction - To run this pass, first we calculate the alloca
     // instructions that are safe for promotion, then we promote each one.
@@ -49,7 +51,12 @@
 }  // end of anonymous namespace
 
 char PromotePass::ID = 0;
-static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register");
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
 
 bool PromotePass::runOnFunction(Function &F) {
   std::vector<AllocaInst*> Allocas;
@@ -81,8 +88,6 @@
   return Changed;
 }
 
-// Publically exposed interface to pass...
-const PassInfo *const llvm::PromoteMemoryToRegisterID = &X;
 // createPromoteMemoryToRegister - Provide an entry point to create this pass.
 //
 FunctionPass *llvm::createPromoteMemoryToRegisterPass() {

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Tue Oct 26 19:48:03 2010
@@ -228,14 +228,6 @@
 
     void run();
 
-    /// properlyDominates - Return true if I1 properly dominates I2.
-    ///
-    bool properlyDominates(Instruction *I1, Instruction *I2) const {
-      if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
-        I1 = II->getNormalDest()->begin();
-      return DT.properlyDominates(I1->getParent(), I2->getParent());
-    }
-    
     /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
     ///
     bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp Tue Oct 26 19:48:03 2010
@@ -29,20 +29,21 @@
 }
 
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
+  : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete &getAvailableVals(AV);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
-/// updates.  ProtoValue is the value used to name PHI nodes.
-void SSAUpdater::Initialize(Value *ProtoValue) {
+/// updates with type 'Ty'.  PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
   if (AV == 0)
     AV = new AvailableValsTy();
   else
     getAvailableVals(AV).clear();
-  PrototypeValue = ProtoValue;
+  ProtoType = Ty;
+  ProtoName = Name;
 }
 
 /// HasValueForBlock - Return true if the SSAUpdater already has a value for
@@ -54,8 +55,8 @@
 /// AddAvailableValue - Indicate that a rewritten value is available in the
 /// specified block with the specified value.
 void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
-  assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
-  assert(PrototypeValue->getType() == V->getType() &&
+  assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+  assert(ProtoType == V->getType() &&
          "All rewritten values must have the same type");
   getAvailableVals(AV)[BB] = V;
 }
@@ -148,7 +149,7 @@
 
   // If there are no predecessors, just return undef.
   if (PredValues.empty())
-    return UndefValue::get(PrototypeValue->getType());
+    return UndefValue::get(ProtoType);
 
   // Otherwise, if all the merged values are the same, just use it.
   if (SingularValue != 0)
@@ -168,9 +169,7 @@
   }
 
   // Ok, we have no way out, insert a new one now.
-  PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
-                                         PrototypeValue->getName(),
-                                         &BB->front());
+  PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
   InsertedPHI->reserveOperandSpace(PredValues.size());
 
   // Fill in all the predecessors of the PHI.
@@ -205,6 +204,22 @@
   U.set(V);
 }
 
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse.  However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueAtEndOfBlock(User->getParent());
+  
+  U.set(V);
+}
+
 /// PHIiter - Iterator for PHI operands.  This is used for the PHI_iterator
 /// in the SSAUpdaterImpl template.
 namespace {
@@ -266,15 +281,14 @@
   /// GetUndefVal - Get an undefined value of the same type as the value
   /// being handled.
   static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
-    return UndefValue::get(Updater->PrototypeValue->getType());
+    return UndefValue::get(Updater->ProtoType);
   }
 
   /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
   /// Reserve space for the operands but do not fill them in yet.
   static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
                                SSAUpdater *Updater) {
-    PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
-                                   Updater->PrototypeValue->getName(),
+    PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
                                    &BB->front());
     PHI->reserveOperandSpace(NumPreds);
     return PHI;

Removed: llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp (removed)
@@ -1,433 +0,0 @@
-//===------------------- SSI.cpp - Creates SSI Representation -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts a list of variables to the Static Single Information
-// form. This is a program representation described by Scott Ananian in his
-// Master Thesis: "The Static Single Information Form (1999)".
-// We are building an on-demand representation, that is, we do not convert
-// every single variable in the target function to SSI form. Rather, we receive
-// a list of target variables that must be converted. We also do not
-// completely convert a target variable to the SSI format. Instead, we only
-// change the variable in the points where new information can be attached
-// to its live range, that is, at branch points.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ssi"
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-
-using namespace llvm;
-
-static const std::string SSI_PHI = "SSI_phi";
-static const std::string SSI_SIG = "SSI_sigma";
-
-STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
-STATISTIC(NumPhiInserted, "Number of phi functions inserted");
-
-void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequiredTransitive<DominanceFrontier>();
-  AU.addRequiredTransitive<DominatorTree>();
-  AU.setPreservesAll();
-}
-
-bool SSI::runOnFunction(Function &F) {
-  DT_ = &getAnalysis<DominatorTree>();
-  return false;
-}
-
-/// This methods creates the SSI representation for the list of values
-/// received. It will only create SSI representation if a value is used
-/// to decide a branch. Repeated values are created only once.
-///
-void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
-  init(value);
-
-  SmallPtrSet<Instruction*, 4> needConstruction;
-  for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
-       E = value.end(); I != E; ++I)
-    if (created.insert(*I))
-      needConstruction.insert(*I);
-
-  insertSigmaFunctions(needConstruction);
-
-  // Test if there is a need to transform to SSI
-  if (!needConstruction.empty()) {
-    insertPhiFunctions(needConstruction);
-    renameInit(needConstruction);
-    rename(DT_->getRoot());
-    fixPhis();
-  }
-
-  clean();
-}
-
-/// Insert sigma functions (a sigma function is a phi function with one
-/// operator)
-///
-void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
-  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
-       E = value.end(); I != E; ++I) {
-    for (Value::use_iterator begin = (*I)->use_begin(),
-         end = (*I)->use_end(); begin != end; ++begin) {
-      // Test if the Use of the Value is in a comparator
-      if (CmpInst *CI = dyn_cast<CmpInst>(*begin)) {
-        // Iterates through all uses of CmpInst
-        for (Value::use_iterator begin_ci = CI->use_begin(),
-             end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
-          // Test if any use of CmpInst is in a Terminator
-          if (TerminatorInst *TI = dyn_cast<TerminatorInst>(*begin_ci)) {
-            insertSigma(TI, *I);
-          }
-        }
-      }
-    }
-  }
-}
-
-/// Inserts Sigma Functions in every BasicBlock successor to Terminator
-/// Instruction TI. All inserted Sigma Function are related to Instruction I.
-///
-void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
-  // Basic Block of the Terminator Instruction
-  BasicBlock *BB = TI->getParent();
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
-    // Next Basic Block
-    BasicBlock *BB_next = TI->getSuccessor(i);
-    if (BB_next != BB &&
-        BB_next->getSinglePredecessor() != NULL &&
-        dominateAny(BB_next, I)) {
-      PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
-      PN->addIncoming(I, BB);
-      sigmas[PN] = I;
-      created.insert(PN);
-      defsites[I].push_back(BB_next);
-      ++NumSigmaInserted;
-    }
-  }
-}
-
-/// Insert phi functions when necessary
-///
-void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
-  DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
-  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
-       E = value.end(); I != E; ++I) {
-    // Test if there were any sigmas for this variable
-    SmallPtrSet<BasicBlock *, 16> BB_visited;
-
-    // Insert phi functions if there is any sigma function
-    while (!defsites[*I].empty()) {
-
-      BasicBlock *BB = defsites[*I].back();
-
-      defsites[*I].pop_back();
-      DominanceFrontier::iterator DF_BB = DF->find(BB);
-
-      // The BB is unreachable. Skip it.
-      if (DF_BB == DF->end())
-        continue; 
-
-      // Iterates through all the dominance frontier of BB
-      for (std::set<BasicBlock *>::iterator DF_BB_begin =
-           DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
-           DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
-        BasicBlock *BB_dominated = *DF_BB_begin;
-
-        // Test if has not yet visited this node and if the
-        // original definition dominates this node
-        if (BB_visited.insert(BB_dominated) &&
-            DT_->properlyDominates(value_original[*I], BB_dominated) &&
-            dominateAny(BB_dominated, *I)) {
-          PHINode *PN = PHINode::Create(
-              (*I)->getType(), SSI_PHI, BB_dominated->begin());
-          phis.insert(std::make_pair(PN, *I));
-          created.insert(PN);
-
-          defsites[*I].push_back(BB_dominated);
-          ++NumPhiInserted;
-        }
-      }
-    }
-    BB_visited.clear();
-  }
-}
-
-/// Some initialization for the rename part
-///
-void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
-  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
-       E = value.end(); I != E; ++I)
-    value_stack[*I].push_back(*I);
-}
-
-/// Renames all variables in the specified BasicBlock.
-/// Only variables that need to be rename will be.
-///
-void SSI::rename(BasicBlock *BB) {
-  SmallPtrSet<Instruction*, 8> defined;
-
-  // Iterate through instructions and make appropriate renaming.
-  // For SSI_PHI (b = PHI()), store b at value_stack as a new
-  // definition of the variable it represents.
-  // For SSI_SIG (b = PHI(a)), substitute a with the current
-  // value of a, present in the value_stack.
-  // Then store bin the value_stack as the new definition of a.
-  // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
-  // all operands with its current value, present in value_stack.
-  for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
-       begin != end; ++begin) {
-    Instruction *I = begin;
-    if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
-      Instruction* position;
-
-      // Treat SSI_PHI
-      if ((position = getPositionPhi(PN))) {
-        value_stack[position].push_back(PN);
-        defined.insert(position);
-      // Treat SSI_SIG
-      } else if ((position = getPositionSigma(PN))) {
-        substituteUse(I);
-        value_stack[position].push_back(PN);
-        defined.insert(position);
-      }
-
-      // Treat all other PHI functions
-      else {
-        substituteUse(I);
-      }
-    }
-
-    // Treat all other functions
-    else {
-      substituteUse(I);
-    }
-  }
-
-  // This loop iterates in all BasicBlocks that are successors of the current
-  // BasicBlock. For each SSI_PHI instruction found, insert an operand.
-  // This operand is the current operand in value_stack for the variable
-  // in "position". And the BasicBlock this operand represents is the current
-  // BasicBlock.
-  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
-    BasicBlock *BB_succ = *SI;
-
-    for (BasicBlock::iterator begin = BB_succ->begin(),
-         notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
-      Instruction *I = begin;
-      PHINode *PN = dyn_cast<PHINode>(I);
-      Instruction* position;
-      if (PN && ((position = getPositionPhi(PN)))) {
-        PN->addIncoming(value_stack[position].back(), BB);
-      }
-    }
-  }
-
-  // This loop calls rename on all children from this block. This time children
-  // refers to a successor block in the dominance tree.
-  DomTreeNode *DTN = DT_->getNode(BB);
-  for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
-       begin != end; ++begin) {
-    DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
-    BasicBlock *BB_children = DTN_children->getBlock();
-    rename(BB_children);
-  }
-
-  // Now we remove all inserted definitions of a variable from the top of
-  // the stack leaving the previous one as the top.
-  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
-       DE = defined.end(); DI != DE; ++DI)
-    value_stack[*DI].pop_back();
-}
-
-/// Substitute any use in this instruction for the last definition of
-/// the variable
-///
-void SSI::substituteUse(Instruction *I) {
-  for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
-    Value *operand = I->getOperand(i);
-    for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
-         VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
-      if (operand == VI->second.front() &&
-          I != VI->second.back()) {
-        PHINode *PN_I = dyn_cast<PHINode>(I);
-        PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
-
-        // If a phi created in a BasicBlock is used as an operand of another
-        // created in the same BasicBlock, this step marks this second phi,
-        // to fix this issue later. It cannot be fixed now, because the
-        // operands of the first phi are not final yet.
-        if (PN_I && PN_vs &&
-            VI->second.back()->getParent() == I->getParent()) {
-
-          phisToFix.insert(PN_I);
-        }
-
-        I->setOperand(i, VI->second.back());
-        break;
-      }
-    }
-  }
-}
-
-/// Test if the BasicBlock BB dominates any use or definition of value.
-/// If it dominates a phi instruction that is on the same BasicBlock,
-/// that does not count.
-///
-bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
-  for (Value::use_iterator begin = value->use_begin(),
-       end = value->use_end(); begin != end; ++begin) {
-    Instruction *I = cast<Instruction>(*begin);
-    BasicBlock *BB_father = I->getParent();
-    if (BB == BB_father && isa<PHINode>(I))
-      continue;
-    if (DT_->dominates(BB, BB_father)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-/// When there is a phi node that is created in a BasicBlock and it is used
-/// as an operand of another phi function used in the same BasicBlock,
-/// LLVM looks this as an error. So on the second phi, the first phi is called
-/// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B. It also includes undef values for predecessors
-/// that were not included in the phi.
-///
-void SSI::fixPhis() {
-  for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
-       end = phisToFix.end(); begin != end; ++begin) {
-    PHINode *PN = *begin;
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
-      PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
-      if (PN_father && PN->getParent() == PN_father->getParent() &&
-          !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
-        BasicBlock *BB = PN->getIncomingBlock(i);
-        int pos = PN_father->getBasicBlockIndex(BB);
-        PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
-      }
-    }
-  }
-
-  for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
-       end = phis.end(); begin != end; ++begin) {
-    PHINode *PN = begin->first;
-    BasicBlock *BB = PN->getParent();
-    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-    SmallVector<BasicBlock*, 8> Preds(PI, PE);
-    for (unsigned size = Preds.size();
-         PI != PE && PN->getNumIncomingValues() != size; ++PI) {
-      bool found = false;
-      for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
-           i < pn_end; ++i) {
-        if (PN->getIncomingBlock(i) == *PI) {
-          found = true;
-          break;
-        }
-      }
-      if (!found) {
-        PN->addIncoming(UndefValue::get(PN->getType()), *PI);
-      }
-    }
-  }
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the phis list.
-///
-Instruction* SSI::getPositionPhi(PHINode *PN) {
-  DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
-  if (val == phis.end())
-    return 0;
-  else
-    return val->second;
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the sigmas list.
-///
-Instruction* SSI::getPositionSigma(PHINode *PN) {
-  DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
-  if (val == sigmas.end())
-    return 0;
-  else
-    return val->second;
-}
-
-/// Initializes
-///
-void SSI::init(SmallVectorImpl<Instruction *> &value) {
-  for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
-       E = value.end(); I != E; ++I) {
-    value_original[*I] = (*I)->getParent();
-    defsites[*I].push_back((*I)->getParent());
-  }
-}
-
-/// Clean all used resources in this creation of SSI
-///
-void SSI::clean() {
-  phis.clear();
-  sigmas.clear();
-  phisToFix.clear();
-
-  defsites.clear();
-  value_stack.clear();
-  value_original.clear();
-}
-
-/// createSSIPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIPass() { return new SSI(); }
-
-char SSI::ID = 0;
-INITIALIZE_PASS(SSI, "ssi",
-                "Static Single Information Construction", false, false);
-
-/// SSIEverything - A pass that runs createSSI on every non-void variable,
-/// intended for debugging.
-namespace {
-  struct SSIEverything : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    SSIEverything() : FunctionPass(&ID) {}
-
-    bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<SSI>();
-    }
-  };
-}
-
-bool SSIEverything::runOnFunction(Function &F) {
-  SmallVector<Instruction *, 16> Insts;
-  SSI &ssi = getAnalysis<SSI>();
-
-  if (F.isDeclaration() || F.isIntrinsic()) return false;
-
-  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
-    for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
-      if (!I->getType()->isVoidTy())
-        Insts.push_back(I);
-
-  ssi.createSSI(Insts);
-  return true;
-}
-
-/// createSSIEverythingPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
-
-char SSIEverything::ID = 0;
-INITIALIZE_PASS(SSIEverything, "ssi-everything",
-                "Static Single Information Construction", false, false);

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp Tue Oct 26 19:48:03 2010
@@ -1720,16 +1720,15 @@
 
 bool SimplifyCFGOpt::run(BasicBlock *BB) {
   bool Changed = false;
-  Function *M = BB->getParent();
+  Function *Fn = BB->getParent();
 
-  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB && Fn && "Block not embedded in function!");
   assert(BB->getTerminator() && "Degenerate basic block encountered!");
-  assert(&BB->getParent()->getEntryBlock() != BB &&
-         "Can't Simplify entry block!");
 
-  // Remove basic blocks that have no predecessors... or that just have themself
-  // as a predecessor.  These are unreachable.
-  if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
+  // Remove basic blocks that have no predecessors (except the entry block)...
+  // or that just have themself as a predecessor.  These are unreachable.
+  if ((pred_begin(BB) == pred_end(BB) && BB != &Fn->getEntryBlock()) ||
+      BB->getSinglePredecessor() == BB) {
     DEBUG(dbgs() << "Removing BB: \n" << *BB);
     DeleteDeadBlock(BB);
     return true;
@@ -1798,7 +1797,7 @@
         // If we eliminated all predecessors of the block, delete the block now.
         if (pred_begin(BB) == pred_end(BB))
           // We know there are no successors, so just nuke the block.
-          M->getBasicBlockList().erase(BB);
+          Fn->getBasicBlockList().erase(BB);
 
         return true;
       }
@@ -1847,10 +1846,10 @@
       Preds.pop_back();
     }
 
-    // If this block is now dead, remove it.
-    if (pred_begin(BB) == pred_end(BB)) {
+    // If this block is now dead (and isn't the entry block), remove it.
+    if (pred_begin(BB) == pred_end(BB) && BB != &Fn->getEntryBlock()) {
       // We know there are no successors, so just nuke the block.
-      M->getBasicBlockList().erase(BB);
+      Fn->getBasicBlockList().erase(BB);
       return true;
     }
 
@@ -1880,8 +1879,9 @@
       while (isa<DbgInfoIntrinsic>(BBI))
         ++BBI;
       if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
-        if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
-          return true;
+        if (BB != &Fn->getEntryBlock())
+          if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+            return true;
       
     } else {  // Conditional branch
       if (isValueEqualityComparison(BI)) {
@@ -2049,12 +2049,37 @@
       }
 
       // If this block is now dead, remove it.
-      if (pred_begin(BB) == pred_end(BB)) {
+      if (pred_begin(BB) == pred_end(BB) && BB != &Fn->getEntryBlock()) {
         // We know there are no successors, so just nuke the block.
-        M->getBasicBlockList().erase(BB);
+        Fn->getBasicBlockList().erase(BB);
         return true;
       }
     }
+  } else if (IndirectBrInst *IBI =
+               dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+    // Eliminate redundant destinations.
+    SmallPtrSet<Value *, 8> Succs;
+    for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+      BasicBlock *Dest = IBI->getDestination(i);
+      if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+        Dest->removePredecessor(BB);
+        IBI->removeDestination(i);
+        --i; --e;
+        Changed = true;
+      }
+    } 
+
+    if (IBI->getNumDestinations() == 0) {
+      // If the indirectbr has no successors, change it to unreachable.
+      new UnreachableInst(IBI->getContext(), IBI);
+      IBI->eraseFromParent();
+      Changed = true;
+    } else if (IBI->getNumDestinations() == 1) {
+      // If the indirectbr has one successor, change it to a direct branch.
+      BranchInst::Create(IBI->getDestination(0), IBI);
+      IBI->eraseFromParent();
+      Changed = true;
+    }
   }
 
   // Merge basic blocks into their predecessor if there is only one distinct
@@ -2068,12 +2093,15 @@
   // is a conditional branch, see if we can hoist any code from this block up
   // into our predecessor.
   pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
-  BasicBlock *OnlyPred = *PI++;
-  for (; PI != PE; ++PI)  // Search all predecessors, see if they are all same
-    if (*PI != OnlyPred) {
+  BasicBlock *OnlyPred = 0;
+  for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
+    if (!OnlyPred)
+      OnlyPred = *PI;
+    else if (*PI != OnlyPred) {
       OnlyPred = 0;       // There are multiple different predecessors...
       break;
     }
+  }
   
   if (OnlyPred)
     if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
@@ -2172,8 +2200,6 @@
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
 /// of the CFG.  It returns true if a modification was made.
 ///
-/// WARNING:  The entry node of a function may not be simplified.
-///
 bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
   return SimplifyCFGOpt(TD).run(BB);
 }

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp Tue Oct 26 19:48:03 2010
@@ -25,7 +25,7 @@
 
 char UnifyFunctionExitNodes::ID = 0;
 INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
-                "Unify function exit nodes", false, false);
+                "Unify function exit nodes", false, false)
 
 Pass *llvm::createUnifyFunctionExitNodesPass() {
   return new UnifyFunctionExitNodes();
@@ -35,7 +35,7 @@
   // We preserve the non-critical-edgeness property
   AU.addPreservedID(BreakCriticalEdgesID);
   // This is a cluster of orthogonal Transforms
-  AU.addPreservedID(PromoteMemoryToRegisterID);
+  AU.addPreserved("mem2reg");
   AU.addPreservedID(LowerSwitchID);
 }
 

Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp Tue Oct 26 19:48:03 2010
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -20,28 +20,51 @@
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
-  Value *&VMSlot = VM[V];
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
+                      bool ModuleLevelChanges) {
+  TrackingVH<Value> &VMSlot = VM[V];
   if (VMSlot) return VMSlot;      // Does it exist in the map yet?
   
   // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
   // DenseMap.  This includes any recursive calls to MapValue.
 
-  // Global values and non-function-local metadata do not need to be seeded into
-  // the VM if they are using the identity mapping.
+  // Global values do not need to be seeded into the VM if they
+  // are using the identity mapping.
   if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
-      (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
+      (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
+       !ModuleLevelChanges))
     return VMSlot = const_cast<Value*>(V);
 
   if (const MDNode *MD = dyn_cast<MDNode>(V)) {
-    SmallVector<Value*, 4> Elts;
-    for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
-      Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0);
-    return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+    // Start by assuming that we'll use the identity mapping.
+    VMSlot = const_cast<Value*>(V);
+
+    // Check all operands to see if any need to be remapped.
+    for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+      Value *OP = MD->getOperand(i);
+      if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+
+      // Ok, at least one operand needs remapping.
+      MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+      VM[V] = Dummy;
+      SmallVector<Value*, 4> Elts;
+      Elts.reserve(MD->getNumOperands());
+      for (i = 0; i != e; ++i)
+        Elts.push_back(MD->getOperand(i) ? 
+                       MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+      MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+      Dummy->replaceAllUsesWith(NewMD);
+      MDNode::deleteTemporary(Dummy);
+      return VM[V] = NewMD;
+    }
+
+    // No operands needed remapping; keep the identity map.
+    return const_cast<Value*>(V);
   }
 
   Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
-  if (C == 0) return 0;
+  if (C == 0)
+    return 0;
   
   if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
       isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
@@ -51,7 +74,7 @@
   if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
     for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
          i != e; ++i) {
-      Value *MV = MapValue(*i, VM);
+      Value *MV = MapValue(*i, VM, ModuleLevelChanges);
       if (MV != *i) {
         // This array must contain a reference to a global, make a new array
         // and return it.
@@ -62,7 +85,8 @@
           Values.push_back(cast<Constant>(*j));
         Values.push_back(cast<Constant>(MV));
         for (++i; i != e; ++i)
-          Values.push_back(cast<Constant>(MapValue(*i, VM)));
+          Values.push_back(cast<Constant>(MapValue(*i, VM,
+                                                   ModuleLevelChanges)));
         return VM[V] = ConstantArray::get(CA->getType(), Values);
       }
     }
@@ -72,7 +96,7 @@
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
     for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
          i != e; ++i) {
-      Value *MV = MapValue(*i, VM);
+      Value *MV = MapValue(*i, VM, ModuleLevelChanges);
       if (MV != *i) {
         // This struct must contain a reference to a global, make a new struct
         // and return it.
@@ -83,7 +107,8 @@
           Values.push_back(cast<Constant>(*j));
         Values.push_back(cast<Constant>(MV));
         for (++i; i != e; ++i)
-          Values.push_back(cast<Constant>(MapValue(*i, VM)));
+          Values.push_back(cast<Constant>(MapValue(*i, VM,
+                                                   ModuleLevelChanges)));
         return VM[V] = ConstantStruct::get(CS->getType(), Values);
       }
     }
@@ -93,14 +118,14 @@
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     std::vector<Constant*> Ops;
     for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-      Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+      Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
     return VM[V] = CE->getWithOperands(Ops);
   }
   
   if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
     for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
          i != e; ++i) {
-      Value *MV = MapValue(*i, VM);
+      Value *MV = MapValue(*i, VM, ModuleLevelChanges);
       if (MV != *i) {
         // This vector value must contain a reference to a global, make a new
         // vector constant and return it.
@@ -111,7 +136,8 @@
           Values.push_back(cast<Constant>(*j));
         Values.push_back(cast<Constant>(MV));
         for (++i; i != e; ++i)
-          Values.push_back(cast<Constant>(MapValue(*i, VM)));
+          Values.push_back(cast<Constant>(MapValue(*i, VM,
+                                                   ModuleLevelChanges)));
         return VM[V] = ConstantVector::get(Values);
       }
     }
@@ -119,19 +145,33 @@
   }
   
   BlockAddress *BA = cast<BlockAddress>(C);
-  Function *F = cast<Function>(MapValue(BA->getFunction(), VM));
-  BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM));
+  Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
+                                        ModuleLevelChanges));
+  BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
+                                             ModuleLevelChanges));
   return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
 }
 
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 ///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+                            bool ModuleLevelChanges) {
+  // Remap operands.
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, VMap);
+    Value *V = MapValue(*op, VMap, ModuleLevelChanges);
     assert(V && "Referenced value not in value map!");
     *op = V;
   }
-}
 
+  // Remap attached metadata.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  I->getAllMetadata(MDs);
+  for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+       MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+    Value *Old = MI->second;
+    Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+    if (New != Old)
+      I->setMetadata(MI->first, cast<MDNode>(New));
+  }
+}

Removed: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h (removed)
@@ -1,29 +0,0 @@
-//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MapValue interface which is used by various parts of
-// the Transforms/Utils library to implement cloning and linking facilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef VALUEMAPPER_H
-#define VALUEMAPPER_H
-
-#include "llvm/ADT/ValueMap.h"
-
-namespace llvm {
-  class Value;
-  class Instruction;
-  typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
-
-  Value *MapValue(const Value *V, ValueToValueMapTy &VM);
-  void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
-} // End llvm namespace
-
-#endif

Modified: llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp Tue Oct 26 19:48:03 2010
@@ -16,7 +16,7 @@
 
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
@@ -198,6 +198,7 @@
   case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
   case Type::LabelTyID:     OS << "label"; break;
   case Type::MetadataTyID:  OS << "metadata"; break;
+  case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
   case Type::IntegerTyID:
     OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
     break;
@@ -228,7 +229,7 @@
          E = STy->element_end(); I != E; ++I) {
       OS << ' ';
       CalcTypeName(*I, TypeStack, OS);
-      if (next(I) == STy->element_end())
+      if (llvm::next(I) == STy->element_end())
         OS << ' ';
       else
         OS << ',';
@@ -238,21 +239,6 @@
       OS << '>';
     break;
   }
-  case Type::UnionTyID: {
-    const UnionType *UTy = cast<UnionType>(Ty);
-    OS << "union {";
-    for (StructType::element_iterator I = UTy->element_begin(),
-         E = UTy->element_end(); I != E; ++I) {
-      OS << ' ';
-      CalcTypeName(*I, TypeStack, OS);
-      if (next(I) == UTy->element_end())
-        OS << ' ';
-      else
-        OS << ',';
-    }
-    OS << '}';
-    break;
-  }
   case Type::PointerTyID: {
     const PointerType *PTy = cast<PointerType>(Ty);
     CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -1042,16 +1028,6 @@
     return;
   }
 
-  if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) {
-    Out << "{ ";
-    TypePrinter.print(CU->getOperand(0)->getType(), Out);
-    Out << ' ';
-    WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine,
-                           Context);
-    Out << " }";
-    return;
-  }
-  
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     const Type *ETy = CP->getType()->getElementType();
     assert(CP->getNumOperands() > 0 &&
@@ -1082,11 +1058,6 @@
     return;
   }
 
-  if (const MDNode *Node = dyn_cast<MDNode>(CV)) {
-    Out << "!" << Machine->getMetadataSlot(Node);
-    return;
-  }
-
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     Out << CE->getOpcodeName();
     WriteOptimizationInfo(Out, CE);
@@ -1190,7 +1161,11 @@
       else
         Machine = new SlotTracker(Context);
     }
-    Out << '!' << Machine->getMetadataSlot(N);
+    int Slot = Machine->getMetadataSlot(N);
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
     return;
   }
 
@@ -1420,7 +1395,11 @@
   Out << "!" << NMD->getName() << " = !{";
   for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
     if (i) Out << ", ";
-    Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i));
+    int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
   }
   Out << "}\n";
 }
@@ -1435,6 +1414,9 @@
   case GlobalValue::LinkerPrivateWeakLinkage:
     Out << "linker_private_weak ";
     break;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    Out << "linker_private_weak_def_auto ";
+    break;
   case GlobalValue::InternalLinkage:      Out << "internal ";       break;
   case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
   case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
@@ -1597,6 +1579,8 @@
   case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
   case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
   case CallingConv::MSP430_INTR:  Out << "msp430_intrcc "; break;
+  case CallingConv::PTX_Kernel:   Out << "ptx_kernel"; break;
+  case CallingConv::PTX_Device:   Out << "ptx_device"; break;
   default: Out << "cc" << F->getCallingConv() << " "; break;
   }
 
@@ -1657,11 +1641,10 @@
   if (F->hasGC())
     Out << " gc \"" << F->getGC() << '"';
   if (F->isDeclaration()) {
-    Out << "\n";
+    Out << '\n';
   } else {
     Out << " {";
-
-    // Output all of its basic blocks... for the function
+    // Output all of the function's basic blocks.
     for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
       printBasicBlock(I);
 
@@ -1710,7 +1693,7 @@
     Out.PadToColumn(50);
     Out << "; Error: Block without parent!";
   } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
-    // Output predecessors for the block...
+    // Output predecessors for the block.
     Out.PadToColumn(50);
     Out << ";";
     const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
@@ -1748,13 +1731,6 @@
     AnnotationWriter->printInfoComment(V, Out);
     return;
   }
-
-  if (V.getType()->isVoidTy()) return;
-  
-  Out.PadToColumn(50);
-  Out << "; <";
-  TypePrinter.print(V.getType(), Out);
-  Out << "> [#uses=" << V.getNumUses() << ']';  // Output # uses
 }
 
 // This member is called for each Instruction in a function..
@@ -1873,6 +1849,8 @@
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
     default: Out << " cc" << CI->getCallingConv(); break;
     }
 
@@ -1927,6 +1905,8 @@
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
     default: Out << " cc" << II->getCallingConv(); break;
     }