[llvm-branch-commits] [llvm-branch] r117425 [6/9] - in /llvm/branches/wendling/eh: ./ autoconf/ autoconf/m4/ bindings/ada/ bindings/ocaml/llvm/ bindings/ocaml/transforms/scalar/ cmake/ cmake/modules/ docs/ docs/CommandGuide/ docs/tutorial/ examples/ examples/BrainF/ examples/ExceptionDemo/ examples/Fibonacci/ examples/Kaleidoscope/Chapter7/ examples/ModuleMaker/ include/llvm-c/ include/llvm-c/Transforms/ include/llvm/ include/llvm/ADT/ include/llvm/Analysis/ include/llvm/Assembly/ include/llvm/Bitcode/ include/llvm/CodeGen/ i...

Bill Wendling isanbard at gmail.com
Tue Oct 26 17:48:11 PDT 2010


Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.cpp Tue Oct 26 19:48:03 2010
@@ -27,25 +27,37 @@
 UseMOVT("arm-use-movt",
         cl::init(true), cl::Hidden);
 
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+            cl::desc("Disallow all unaligned memory accesses"));
+
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                            bool isT)
   : ARMArchVersion(V4)
+  , ARMProcFamily(Others)
   , ARMFPUType(None)
   , UseNEONForSinglePrecisionFP(false)
   , SlowVMLx(false)
+  , SlowFPBrcc(false)
   , IsThumb(isT)
   , ThumbMode(Thumb1)
+  , NoARM(false)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
   , UseMovt(UseMOVT)
   , HasFP16(false)
+  , HasD16(false)
   , HasHardwareDivide(false)
   , HasT2ExtractPack(false)
+  , HasDataBarrier(false)
+  , Pref32BitThumb(false)
+  , FPOnlySP(false)
+  , AllowsUnalignedMem(false)
   , stackAlignment(4)
   , CPUString("generic")
   , TargetType(isELF) // Default to ELF unless otherwise specified.
   , TargetABI(ARM_ABI_APCS) {
-  // default to soft float ABI
+  // Default to soft float ABI
   if (FloatABIType == FloatABI::Default)
     FloatABIType = FloatABI::Soft;
 
@@ -116,6 +128,11 @@
 
   if (!isThumb() || hasThumb2())
     PostRAScheduler = true;
+
+  // v6+ may or may not support unaligned mem access depending on the system
+  // configuration.
+  if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+    AllowsUnalignedMem = true;
 }
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
@@ -169,6 +186,18 @@
   return false;
 }
 
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+  // If we have a reasonable estimate of the pipeline depth, then we can
+  // estimate the penalty of a misprediction based on that.
+  if (isCortexA8())
+    return 13;
+  else if (isCortexA9())
+    return 8;
+  
+  // Otherwise, just return a sensible default.
+  return 10;
+}
+
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
            TargetSubtarget::AntiDepBreakMode& Mode,

Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMSubtarget.h Tue Oct 26 19:48:03 2010
@@ -26,7 +26,11 @@
 class ARMSubtarget : public TargetSubtarget {
 protected:
   enum ARMArchEnum {
-    V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
+    V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
+  };
+
+  enum ARMProcFamilyEnum {
+    Others, CortexA8, CortexA9
   };
 
   enum ARMFPEnum {
@@ -42,6 +46,9 @@
   /// V6, V6T2, V7A, V7M.
   ARMArchEnum ARMArchVersion;
 
+  /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+  ARMProcFamilyEnum ARMProcFamily;
+
   /// ARMFPUType - Floating Point Unit type.
   ARMFPEnum ARMFPUType;
 
@@ -63,6 +70,9 @@
   /// ThumbMode - Indicates supported Thumb version.
   ThumbTypeEnum ThumbMode;
 
+  /// NoARM - True if subtarget does not support ARM mode execution.
+  bool NoARM;
+
   /// PostRAScheduler - True if using post-register-allocation scheduler.
   bool PostRAScheduler;
 
@@ -77,6 +87,10 @@
   /// only so far)
   bool HasFP16;
 
+  /// HasD16 - True if subtarget is limited to 16 double precision
+  /// FP registers for VFPv3.
+  bool HasD16;
+
   /// HasHardwareDivide - True if subtarget supports [su]div
   bool HasHardwareDivide;
 
@@ -84,6 +98,23 @@
   /// instructions.
   bool HasT2ExtractPack;
 
+  /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+  /// instructions.
+  bool HasDataBarrier;
+
+  /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+  /// over 16-bit ones.
+  bool Pref32BitThumb;
+
+  /// FPOnlySP - If true, the floating point unit only supports single
+  /// precision.
+  bool FPOnlySP;
+
+  /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+  /// accesses for some types.  For details, see
+  /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+  bool AllowsUnalignedMem;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -128,6 +159,11 @@
   bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
   bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
 
+  bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+  bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
+  bool hasARMOps() const { return !NoARM; }
+
   bool hasVFP2() const { return ARMFPUType >= VFPv2; }
   bool hasVFP3() const { return ARMFPUType >= VFPv3; }
   bool hasNEON() const { return ARMFPUType >= NEON;  }
@@ -135,10 +171,14 @@
     return hasNEON() && UseNEONForSinglePrecisionFP; }
   bool hasDivide() const { return HasHardwareDivide; }
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+  bool hasDataBarrier() const { return HasDataBarrier; }
   bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
+  bool isFPOnlySP() const { return FPOnlySP; }
+  bool prefers32BitThumb() const { return Pref32BitThumb; }
 
   bool hasFP16() const { return HasFP16; }
+  bool hasD16() const { return HasD16; }
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }
@@ -155,8 +195,12 @@
 
   bool useMovt() const { return UseMovt && hasV6T2Ops(); }
 
+  bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
   const std::string & getCPUString() const { return CPUString; }
 
+  unsigned getMispredictionPenalty() const;
+  
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtarget::AntiDepBreakMode& Mode,

Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.cpp Tue Oct 26 19:48:03 2010
@@ -31,6 +31,26 @@
   }
 }
 
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+  case Triple::MinGW32:
+  case Triple::MinGW64:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    llvm_unreachable("ARM does not support Windows COFF format");
+    return NULL;
+  default:
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+  }
+}
 
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
@@ -40,6 +60,25 @@
   // Register the target asm info.
   RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
   RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(TheARMTarget,
+                                      createARMMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheThumbTarget,
+                                      createARMMCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheARMTarget,
+                                     createARMAsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheThumbTarget,
+                                     createARMAsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheARMTarget,
+                                         createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheThumbTarget,
+                                         createMCStreamer);
+
 }
 
 /// TargetMachine ctor - Create an ARM architecture model.
@@ -52,7 +91,8 @@
     Subtarget(TT, FS, isThumb),
     FrameInfo(Subtarget),
     JITInfo(),
-    InstrItins(Subtarget.getInstrItineraryData()) {
+    InstrItins(Subtarget.getInstrItineraryData())
+{
   DefRelocModel = getRelocationModel();
 }
 
@@ -60,12 +100,16 @@
                                    const std::string &FS)
   : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
-               std::string("e-p:32:32-f64:32:32-i64:32:32-"
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "v128:32:128-v64:32:64-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "v128:64:128-v64:64:64-n32")),
+    ELFWriterInfo(*this),
     TLInfo(*this),
     TSInfo(*this) {
+  if (!Subtarget.hasARMOps())
+    report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+                       "support ARM mode execution!");
 }
 
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -75,12 +119,13 @@
               ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
     DataLayout(Subtarget.isAPCS_ABI() ?
-               std::string("e-p:32:32-f64:32:32-i64:32:32-"
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "i16:16:32-i8:8:32-i1:8:32-"
                            "v128:32:128-v64:32:64-a:0:32-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "i16:16:32-i8:8:32-i1:8:32-"
                            "v128:64:128-v64:64:64-a:0:32-n32")),
+    ELFWriterInfo(*this),
     TLInfo(*this),
     TSInfo(*this) {
 }
@@ -102,9 +147,6 @@
 
 bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
-  if (Subtarget.hasNEON())
-    PM.add(createNEONPreAllocPass());
-
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
   if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass(true));
@@ -138,7 +180,7 @@
 
 bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
-  if (Subtarget.isThumb2())
+  if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
     PM.add(createThumb2SizeReductionPass());
 
   PM.add(createARMConstantIslandPass());

Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetMachine.h Tue Oct 26 19:48:03 2010
@@ -16,7 +16,9 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
 #include "ARMInstrInfo.h"
+#include "ARMELFWriterInfo.h"
 #include "ARMFrameInfo.h"
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
@@ -45,8 +47,8 @@
   virtual const ARMFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual const InstrItineraryData getInstrItineraryData() const {
-    return InstrItins;
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
   }
 
   // Pass Pipeline Configuration
@@ -64,6 +66,7 @@
 class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMInstrInfo        InstrInfo;
   const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
   ARMTargetLowering   TLInfo;
   ARMSelectionDAGInfo TSInfo;
 public:
@@ -84,6 +87,9 @@
 
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
 };
 
 /// ThumbTargetMachine - Thumb target machine.
@@ -94,6 +100,7 @@
   // Either Thumb1InstrInfo or Thumb2InstrInfo.
   OwningPtr<ARMBaseInstrInfo> InstrInfo;
   const TargetData    DataLayout;   // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
   ARMTargetLowering   TLInfo;
   ARMSelectionDAGInfo TSInfo;
 public:
@@ -118,6 +125,9 @@
     return InstrInfo.get();
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
 };
 
 } // end namespace llvm

Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.cpp Tue Oct 26 19:48:03 2010
@@ -36,4 +36,10 @@
                                  MCSectionELF::SHF_ALLOC,
                                  SectionKind::getDataRel());
   }
+  
+  AttributesSection =
+    getContext().getELFSection(".ARM.attributes",
+                               MCSectionELF::SHT_ARM_ATTRIBUTES,
+                               0,
+                               SectionKind::getMetadata());
 }

Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMTargetObjectFile.h Tue Oct 26 19:48:03 2010
@@ -18,10 +18,19 @@
 class TargetMachine;
 
 class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+  const MCSection *AttributesSection;
 public:
-  ARMElfTargetObjectFile() : TargetLoweringObjectFileELF() {}
+  ARMElfTargetObjectFile() :
+    TargetLoweringObjectFileELF(),
+    AttributesSection(NULL)
+  {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getAttributesSection() const {
+    return AttributesSection;
+  }
 };
 
 } // end namespace llvm

Modified: llvm/branches/wendling/eh/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Oct 26 19:48:03 2010
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
+#include "ARMSubtarget.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -16,10 +17,11 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmParser.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
@@ -77,19 +79,26 @@
 
   bool ParseDirectiveSyntax(SMLoc L);
 
-  // TODO - For now hacked versions of the next two are in here in this file to
-  // allow some parser testing until the table gen versions are implemented.
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out) {
+    MCInst Inst;
+    unsigned ErrorInfo;
+    if (MatchInstructionImpl(Operands, Inst, ErrorInfo) == Match_Success) {
+      Out.EmitInstruction(Inst);
+      return false;
+    }
+
+    // FIXME: We should give nicer diagnostics about the exact failure.
+    Error(IDLoc, "unrecognized instruction");
+    return true;
+  }
 
   /// @name Auto-generated Match Functions
   /// {
-  bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCInst &Inst);
 
-  /// MatchRegisterName - Match the given string to a register name and return
-  /// its register number, or -1 if there is no match.  To allow return values
-  /// to be used directly in register lists, arm registers have values between
-  /// 0 and 15.
-  int MatchRegisterName(StringRef Name);
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
 
   /// }
 
@@ -111,16 +120,21 @@
   ARMOperand() {}
 public:
   enum KindTy {
-    Token,
-    Register,
+    CondCode,
     Immediate,
-    Memory
+    Memory,
+    Register,
+    Token
   } Kind;
 
   SMLoc StartLoc, EndLoc;
 
   union {
     struct {
+      ARMCC::CondCodes Val;
+    } CC;
+
+    struct {
       const char *Data;
       unsigned Length;
     } Tok;
@@ -152,16 +166,19 @@
 
   };
   
-  ARMOperand(KindTy K, SMLoc S, SMLoc E)
-    : Kind(K), StartLoc(S), EndLoc(E) {}
+  //ARMOperand(KindTy K, SMLoc S, SMLoc E)
+  //  : Kind(K), StartLoc(S), EndLoc(E) {}
   
   ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
     Kind = o.Kind;
     StartLoc = o.StartLoc;
     EndLoc = o.EndLoc;
     switch (Kind) {
+    case CondCode:
+      CC = o.CC;
+      break;
     case Token:
-    Tok = o.Tok;
+      Tok = o.Tok;
       break;
     case Register:
       Reg = o.Reg;
@@ -180,6 +197,11 @@
   /// getEndLoc - Get the location of the last token of this operand.
   SMLoc getEndLoc() const { return EndLoc; }
 
+  ARMCC::CondCodes getCondCode() const {
+    assert(Kind == CondCode && "Invalid access!");
+    return CC.Val;
+  }
+
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
     return StringRef(Tok.Data, Tok.Length);
@@ -195,15 +217,50 @@
     return Imm.Val;
   }
 
-  bool isToken() const {return Kind == Token; }
+  bool isCondCode() const { return Kind == CondCode; }
+
+  bool isImm() const { return Kind == Immediate; }
 
   bool isReg() const { return Kind == Register; }
 
+  bool isToken() const {return Kind == Token; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+    // FIXME: What belongs here?
+    Inst.addOperand(MCOperand::CreateReg(0));
+  }
+
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  virtual void dump(raw_ostream &OS) const;
+
+  static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
+                             SMLoc S) {
+    Op.reset(new ARMOperand);
+    Op->Kind = CondCode;
+    Op->CC.Val = CC;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+  }
+
   static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
                           SMLoc S) {
     Op.reset(new ARMOperand);
@@ -263,6 +320,33 @@
 
 } // end anonymous namespace.
 
+void ARMOperand::dump(raw_ostream &OS) const {
+  switch (Kind) {
+  case CondCode:
+    OS << ARMCondCodeToString(getCondCode());
+    break;
+  case Immediate:
+    getImm()->print(OS);
+    break;
+  case Memory:
+    OS << "<memory>";
+    break;
+  case Register:
+    OS << "<register " << getReg() << ">";
+    break;
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
 /// Try to parse a register name.  The token must be an Identifier when called,
 /// and if it is a register name a Reg operand is created, the token is eaten
 /// and false is returned.  Else true is returned and no token is eaten.
@@ -549,77 +633,6 @@
   return false;
 }
 
-/// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(StringRef Name) {
-  if (Name == "r0" || Name == "R0")
-    return 0;
-  else if (Name == "r1" || Name == "R1")
-    return 1;
-  else if (Name == "r2" || Name == "R2")
-    return 2;
-  else if (Name == "r3" || Name == "R3")
-    return 3;
-  else if (Name == "r3" || Name == "R3")
-    return 3;
-  else if (Name == "r4" || Name == "R4")
-    return 4;
-  else if (Name == "r5" || Name == "R5")
-    return 5;
-  else if (Name == "r6" || Name == "R6")
-    return 6;
-  else if (Name == "r7" || Name == "R7")
-    return 7;
-  else if (Name == "r8" || Name == "R8")
-    return 8;
-  else if (Name == "r9" || Name == "R9")
-    return 9;
-  else if (Name == "r10" || Name == "R10")
-    return 10;
-  else if (Name == "r11" || Name == "R11" || Name == "fp")
-    return 11;
-  else if (Name == "r12" || Name == "R12" || Name == "ip")
-    return 12;
-  else if (Name == "r13" || Name == "R13" || Name == "sp")
-    return 13;
-  else if (Name == "r14" || Name == "R14" || Name == "lr")
-      return 14;
-  else if (Name == "r15" || Name == "R15" || Name == "pc")
-    return 15;
-  return -1;
-}
-
-/// A hack to allow some testing, to be replaced by a real table gen version.
-bool ARMAsmParser::
-MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                 MCInst &Inst) {
-  ARMOperand &Op0 = *(ARMOperand*)Operands[0];
-  assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
-  StringRef Mnemonic = Op0.getToken();
-  if (Mnemonic == "add" ||
-      Mnemonic == "stmfd" ||
-      Mnemonic == "str" ||
-      Mnemonic == "ldmfd" ||
-      Mnemonic == "ldr" ||
-      Mnemonic == "mov" ||
-      Mnemonic == "sub" ||
-      Mnemonic == "bl" ||
-      Mnemonic == "push" ||
-      Mnemonic == "blx" ||
-      Mnemonic == "pop") {
-    // Hard-coded to a valid instruction, till we have a real matcher.
-    Inst = MCInst();
-    Inst.setOpcode(ARM::MOVr);
-    Inst.addOperand(MCOperand::CreateReg(2));
-    Inst.addOperand(MCOperand::CreateReg(2));
-    Inst.addOperand(MCOperand::CreateImm(0));
-    Inst.addOperand(MCOperand::CreateImm(0));
-    Inst.addOperand(MCOperand::CreateReg(0));
-    return false;
-  }
-
-  return true;
-}
-
 /// Parse a arm instruction operand.  For now this parses the operand regardless
 /// of the mnemonic.
 bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
@@ -662,25 +675,81 @@
 bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   OwningPtr<ARMOperand> Op;
-  ARMOperand::CreateToken(Op, Name, NameLoc);
-  
+
+  // Create the leading tokens for the mnemonic, split by '.' characters.
+  size_t Start = 0, Next = Name.find('.');
+  StringRef Head = Name.slice(Start, Next);
+
+  // Determine the predicate, if any.
+  //
+  // FIXME: We need a way to check whether a prefix supports predication,
+  // otherwise we will end up with an ambiguity for instructions that happen to
+  // end with a predicate name.
+  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+    .Case("eq", ARMCC::EQ)
+    .Case("ne", ARMCC::NE)
+    .Case("hs", ARMCC::HS)
+    .Case("lo", ARMCC::LO)
+    .Case("mi", ARMCC::MI)
+    .Case("pl", ARMCC::PL)
+    .Case("vs", ARMCC::VS)
+    .Case("vc", ARMCC::VC)
+    .Case("hi", ARMCC::HI)
+    .Case("ls", ARMCC::LS)
+    .Case("ge", ARMCC::GE)
+    .Case("lt", ARMCC::LT)
+    .Case("gt", ARMCC::GT)
+    .Case("le", ARMCC::LE)
+    .Case("al", ARMCC::AL)
+    .Default(~0U);
+  if (CC != ~0U) {
+    Head = Head.slice(0, Head.size() - 2);
+  } else
+    CC = ARMCC::AL;
+
+  ARMOperand::CreateToken(Op, Head, NameLoc);
   Operands.push_back(Op.take());
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+  ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
+  Operands.push_back(Op.take());
+
+  // Add the remaining tokens in the mnemonic.
+  while (Next != StringRef::npos) {
+    Start = Next;
+    Next = Name.find('.', Start + 1);
+    Head = Name.slice(Start, Next);
 
+    ARMOperand::CreateToken(Op, Head, NameLoc);
+    Operands.push_back(Op.take());
+  }
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
     OwningPtr<ARMOperand> Op;
-    if (ParseOperand(Op)) return true;
+    if (ParseOperand(Op)) {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
     Operands.push_back(Op.take());
 
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();  // Eat the comma.
 
       // Parse and remember the operand.
-      if (ParseOperand(Op)) return true;
+      if (ParseOperand(Op)) {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
       Operands.push_back(Op.take());
     }
   }
+  
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Parser.EatToEndOfStatement();
+    return TokError("unexpected token in argument list");
+  }
+  Parser.Lex(); // Consume the EndOfStatement
   return false;
 }
 
@@ -744,7 +813,6 @@
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
     return Error(L, "unexpected token in .syntax directive");
-  StringRef ATTRIBUTE_UNUSED SymbolName = Parser.getTok().getIdentifier();
   Parser.Lex(); // Consume the identifier token.
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -810,3 +878,7 @@
   RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
   LLVMInitializeARMAsmLexer();
 }
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"

Modified: llvm/branches/wendling/eh/lib/Target/ARM/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -7,19 +7,23 @@
 tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
 tablegen(ARMGenCodeEmitter.inc -gen-emitter)
 tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenFastISel.inc -gen-fast-isel)
 tablegen(ARMGenCallingConv.inc -gen-callingconv)
 tablegen(ARMGenSubtarget.inc -gen-subtarget)
 tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
-tablegen(ARMGenFastISel.inc -gen-fast-isel)
+tablegen(ARMGenDecoderTables.inc -gen-arm-decoder)
 
 add_llvm_target(ARMCodeGen
+  ARMAsmBackend.cpp
   ARMAsmPrinter.cpp
   ARMBaseInstrInfo.cpp
   ARMBaseRegisterInfo.cpp
   ARMCodeEmitter.cpp
   ARMConstantIslandPass.cpp
   ARMConstantPoolValue.cpp
+  ARMELFWriterInfo.cpp
   ARMExpandPseudoInsts.cpp
   ARMFastISel.cpp
   ARMGlobalMerge.cpp
@@ -27,6 +31,7 @@
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
   ARMJITInfo.cpp
+  ARMMCCodeEmitter.cpp
   ARMLoadStoreOptimizer.cpp
   ARMMCAsmInfo.cpp
   ARMMCInstLower.cpp
@@ -36,7 +41,6 @@
   ARMTargetMachine.cpp
   ARMTargetObjectFile.cpp
   NEONMoveFix.cpp
-  NEONPreAllocPass.cpp
   Thumb1InstrInfo.cpp
   Thumb1RegisterInfo.cpp
   Thumb2HazardRecognizer.cpp
@@ -45,5 +49,3 @@
   Thumb2RegisterInfo.cpp
   Thumb2SizeReduction.cpp
   )
-
-target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Tue Oct 26 19:48:03 2010
@@ -26,6 +26,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+//#define DEBUG(X) do { X; } while (0)
+
 /// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
 /// ARMDecoderEmitter.cpp TableGen backend.  It contains:
 ///
@@ -37,9 +39,9 @@
 /// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
 /// function for a Thumb instruction.
 ///
-#include "../ARMGenDecoderTables.inc"
+#include "ARMGenDecoderTables.inc"
 
-#include "../ARMGenEDInfo.inc"
+#include "ARMGenEDInfo.inc"
 
 using namespace llvm;
 
@@ -87,6 +89,12 @@
       return ARM::BFI;
   }
 
+  // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+  // A1 & A2.
+  // As a result, the decoder fails to deocode USAT properly.
+  if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+    return ARM::USAT;
+
   // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
   // As a result, the decoder fails to decode UMULL properly.
   if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
@@ -106,7 +114,7 @@
   // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
   // As a result, the decoder fails to deocode SSAT properly.
   if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
-    return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
+    return ARM::SSAT;
 
   // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
   // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
@@ -291,7 +299,7 @@
 /// decodeInstruction(insn) is invoked on the original insn.
 ///
 /// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
   if (IsThumb2) {
     uint16_t op1 = slice(insn, 28, 27);
     uint16_t op2 = slice(insn, 26, 20);
@@ -429,7 +437,7 @@
   // passed to decodeThumbInstruction().  For 16-bit Thumb instruction, the top
   // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
   // the top half followed by the second halfword.
-  uint32_t insn = 0;
+  unsigned insn = 0;
   // Possible second halfword.
   uint16_t insn1 = 0;
 

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp Tue Oct 26 19:48:03 2010
@@ -20,6 +20,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
+//#define DEBUG(X) do { X; } while (0)
+
 /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
 /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
 /// describing the operand info for each ARMInsts[i].
@@ -77,22 +79,9 @@
 }
 
 // Return the register enum Based on RegClass and the raw register number.
-// For DRegPair, see comments below.
 // FIXME: Auto-gened?
-static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
-                                bool DRegPair = false) {
-
-  if (DRegPair && RegClassID == ARM::QPRRegClassID) {
-    // LLVM expects { Dd, Dd+1 } to form a super register; this is not specified
-    // in the ARM Architecture Manual as far as I understand it (A8.6.307).
-    // Therefore, we morph the RegClassID to be the sub register class and don't
-    // subsequently transform the RawRegister encoding when calculating RegNum.
-    //
-    // See also ARMinstPrinter::printOperand() wrt "dregpair" modifier part
-    // where this workaround is meant for.
-    RegClassID = ARM::DPRRegClassID;
-  }
-
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
   // For this purpose, we can treat rGPR as if it were GPR.
   if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
 
@@ -454,12 +443,23 @@
 //
 // A8-11: DecodeImmShift()
 static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
-  // If type == 0b11 and imm5 == 0, we have an rrx, instead.
-  if (ShOp == ARM_AM::ror && ShImm == 0)
-    ShOp = ARM_AM::rrx;
-  // If (lsr or asr) and imm5 == 0, shift amount is 32.
-  if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0)
+  if (ShImm != 0)
+    return;
+  switch (ShOp) {
+  case ARM_AM::no_shift:
+  case ARM_AM::rrx:
+    break;
+  case ARM_AM::lsl:
+    ShOp = ARM_AM::no_shift;
+    break;
+  case ARM_AM::lsr:
+  case ARM_AM::asr:
     ShImm = 32;
+    break;
+  case ARM_AM::ror:
+    ShOp = ARM_AM::rrx;
+    break;
+  }
 }
 
 // getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
@@ -493,9 +493,6 @@
 static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
-  if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
-    return true;
-
   assert(0 && "Unexpected pseudo instruction!");
   return false;
 }
@@ -908,34 +905,6 @@
   return true;
 }
 
-static inline bool SaturateOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16:
-  case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16:
-    return true;
-  default:
-    return false;
-  }
-}
-
-static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
-  switch (Opcode) {
-  case ARM::SSATlsl:
-  case ARM::SSATasr:
-    return slice(insn, 20, 16) + 1;
-  case ARM::SSAT16:
-    return slice(insn, 19, 16) + 1;
-  case ARM::USATlsl:
-  case ARM::USATasr:
-    return slice(insn, 20, 16);
-  case ARM::USAT16:
-    return slice(insn, 19, 16);
-  default:
-    assert(0 && "Invalid opcode passed in");
-    return 0;
-  }
-}
-
 // A major complication is the fact that some of the saturating add/subtract
 // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
 // They are QADD, QDADD, QDSUB, and QSUB.
@@ -961,34 +930,6 @@
   if (OpIdx >= NumOps)
     return false;
 
-  // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd.
-  if (SaturateOpcode(Opcode)) {
-    MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
-
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRm(insn))));
-
-    if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
-      OpIdx += 2;
-      return true;
-    }
-
-    // For SSAT operand reg (Rm) has been disassembled above.
-    // Now disassemble the shift amount.
-
-    // Inst{11-7} encodes the imm5 shift amount.
-    unsigned ShAmt = slice(insn, 11, 7);
-
-    // A8.6.183.  Possible ASR shift amount of 32...
-    if (Opcode == ARM::SSATasr && ShAmt == 0)
-      ShAmt = 32;
-
-    MI.addOperand(MCOperand::CreateImm(ShAmt));
-
-    OpIdx += 3;
-    return true;
-  }
-
   // Special-case handling of BFC/BFI/SBFX/UBFX.
   if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
     MI.addOperand(MCOperand::CreateReg(0));
@@ -1502,13 +1443,55 @@
       && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
     // Extract the 5-bit immediate field Inst{11-7}.
     unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
-    MI.addOperand(MCOperand::CreateImm(ShiftAmt));
+    ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+    if (Opcode == ARM::PKHBT)
+      Opc = ARM_AM::lsl;
+    else if (Opcode == ARM::PKHBT)
+      Opc = ARM_AM::asr;
+    getImmShiftSE(Opc, ShiftAmt);
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
     ++OpIdx;
   }
 
   return true;
 }
 
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+  // Disassemble register def.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  unsigned Pos = slice(insn, 20, 16);
+  if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+    Pos += 1;
+  MI.addOperand(MCOperand::CreateImm(Pos));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (NumOpsAdded == 4) {
+    ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 11, 7);
+    if (ShAmt == 0) {
+      // A8.6.183.  Possible ASR shift amount of 32...
+      if (Opc == ARM_AM::asr)
+        ShAmt = 32;
+      else
+        Opc = ARM_AM::no_shift;
+    }
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+  }
+  return true;
+}
+
 // Extend instructions.
 // SXT* and UXT*: Rd [Rn] Rm [rot_imm].
 // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
@@ -1590,8 +1573,7 @@
 }
 
 // A7.5.1
-#if 0
-static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
   assert(N == 32 || N == 64);
 
   uint64_t Result;
@@ -1606,13 +1588,12 @@
     Result = (uint64_t)slice(byte, 7, 7) << 63 |
              (uint64_t)slice(byte, 5, 0) << 48;
     if (bit6)
-      Result |= 0xffL << 54;
+      Result |= 0xffULL << 54;
     else
-      Result |= 0x1L << 62;
+      Result |= 0x1ULL << 62;
   }
-  return Result;
+  return APInt(N, Result);
 }
-#endif
 
 // VFP Unary Format Instructions:
 //
@@ -1867,7 +1848,7 @@
 
   assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
 
-  bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
+  bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
   unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
 
   // Extract Dd/Sd for operand 0.
@@ -1890,7 +1871,7 @@
 
 // VFP Load/Store Multiple Instructions.
 // This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM5 mode imm) is followed by two predicate operands.  It is
+// operand 1 (the AM4 mode imm) is followed by two predicate operands.  It is
 // followed by a reglist of either DPR(s) or SPR(s).
 //
 // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
@@ -1914,16 +1895,14 @@
 
   MI.addOperand(MCOperand::CreateReg(Base));
 
-  // Next comes the AM5 Opcode.
+  // Next comes the AM4 Opcode.
   ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
   // Must be either "ia" or "db" submode.
   if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
-    DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+    DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
     return false;
   }
-
-  unsigned char Imm8 = insn & 0xFF;
-  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
 
   // Handling the two predicate operands before the reglist.
   int64_t CondVal = insn >> ARMII::CondShift;
@@ -1933,13 +1912,14 @@
   OpIdx += 4;
 
   bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
-     Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
+                  Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
   unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
 
   // Extract Dd/Sd.
   unsigned RegD = decodeVFPRd(insn, isSPVFP);
 
   // Fill the variadic part of reglist.
+  unsigned char Imm8 = insn & 0xFF;
   unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
   for (unsigned i = 0; i < Regs; ++i) {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
@@ -1990,10 +1970,14 @@
   // Extract/decode the f64/f32 immediate.
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
         && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
-    // The asm syntax specifies the before-expanded <imm>.
-    // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
-    //                  Opcode == ARM::FCONSTD ? 64 : 32)
-    MI.addOperand(MCOperand::CreateImm(slice(insn,19,16)<<4 | slice(insn,3,0)));
+    // The asm syntax specifies the floating point value, not the 8-bit literal.
+    APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+                             Opcode == ARM::FCONSTD ? 64 : 32);
+    APFloat immFP = APFloat(immRaw, true);
+    double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+      immFP.convertToFloat();
+    MI.addOperand(MCOperand::CreateFPImm(imm));
+
     ++OpIdx;
   }
 
@@ -2206,22 +2190,6 @@
   return (insn >> 8) & 0xF;
 }
 
-static bool UseDRegPair(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case ARM::VLD1q8_UPD:
-  case ARM::VLD1q16_UPD:
-  case ARM::VLD1q32_UPD:
-  case ARM::VLD1q64_UPD:
-  case ARM::VST1q8_UPD:
-  case ARM::VST1q16_UPD:
-  case ARM::VST1q32_UPD:
-  case ARM::VST1q64_UPD:
-    return true;
-  }
-}
-
 // VLD*
 //   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
 // VLD*LN*
@@ -2248,10 +2216,9 @@
 
   // We have homogeneous NEON registers for Load/Store.
   unsigned RegClass = 0;
-  bool DRegPair = UseDRegPair(Opcode);
 
   // Double-spaced registers have increments of 2.
-  unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
+  unsigned Inc = DblSpaced ? 2 : 1;
 
   unsigned Rn = decodeRn(insn);
   unsigned Rm = decodeRm(insn);
@@ -2297,7 +2264,7 @@
     RegClass = OpInfo[OpIdx].RegClass;
     while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(B, RegClass, Rd, DRegPair)));
+                      getRegisterEnum(B, RegClass, Rd)));
       Rd += Inc;
       ++OpIdx;
     }
@@ -2316,7 +2283,7 @@
 
     while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(B, RegClass, Rd, DRegPair)));
+                      getRegisterEnum(B, RegClass, Rd)));
       Rd += Inc;
       ++OpIdx;
     }
@@ -2776,8 +2743,8 @@
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
                                   N3V_VectorShift, B);
 }
-static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
                                   N3V_VectorExtract, B);
@@ -2956,7 +2923,7 @@
 // A8.6.49 ISB
 static inline bool MemBarrierInstr(uint32_t insn) {
   unsigned op7_4 = slice(insn, 7, 4);
-  if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6))
+  if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
     return true;
 
   return false;
@@ -3013,8 +2980,15 @@
 static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  if (MemBarrierInstr(insn))
+  if (MemBarrierInstr(insn)) {
+    // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+    // of within the generic ARMBasicMCBuilder::BuildIt() method.
+    //
+    // Inst{3-0} encodes the memory barrier option for the variants.
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
     return true;
+  }
 
   switch (Opcode) {
   case ARM::CLREX:
@@ -3024,13 +2998,17 @@
   case ARM::WFE:
   case ARM::WFI:
   case ARM::SEV:
-  case ARM::SETENDBE:
-  case ARM::SETENDLE:
     return true;
   default:
     break;
   }
 
+  if (Opcode == ARM::SETEND) {
+    NumOpsAdded = 1;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+    return true;
+  }
+
   // CPS has a singleton $opt operand that contains the following information:
   // opt{4-0} = mode from Inst{4-0}
   // opt{5} = changemode from Inst{17}
@@ -3085,6 +3063,7 @@
   &DisassembleLdStMulFrm,
   &DisassembleLdStExFrm,
   &DisassembleArithMiscFrm,
+  &DisassembleSatFrm,
   &DisassembleExtFrm,
   &DisassembleVFPUnaryFrm,
   &DisassembleVFPBinaryFrm,

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h Tue Oct 26 19:48:03 2010
@@ -54,36 +54,35 @@
   ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
   ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
   ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
-  ENTRY(ARM_FORMAT_EXTFRM,        13) \
-  ENTRY(ARM_FORMAT_VFPUNARYFRM,   14) \
-  ENTRY(ARM_FORMAT_VFPBINARYFRM,  15) \
-  ENTRY(ARM_FORMAT_VFPCONV1FRM,   16) \
-  ENTRY(ARM_FORMAT_VFPCONV2FRM,   17) \
-  ENTRY(ARM_FORMAT_VFPCONV3FRM,   18) \
-  ENTRY(ARM_FORMAT_VFPCONV4FRM,   19) \
-  ENTRY(ARM_FORMAT_VFPCONV5FRM,   20) \
-  ENTRY(ARM_FORMAT_VFPLDSTFRM,    21) \
-  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
-  ENTRY(ARM_FORMAT_VFPMISCFRM,    23) \
-  ENTRY(ARM_FORMAT_THUMBFRM,      24) \
-  ENTRY(ARM_FORMAT_NEONFRM,       25) \
-  ENTRY(ARM_FORMAT_NEONGETLNFRM,  26) \
-  ENTRY(ARM_FORMAT_NEONSETLNFRM,  27) \
-  ENTRY(ARM_FORMAT_NEONDUPFRM,    28) \
-  ENTRY(ARM_FORMAT_MISCFRM,       29) \
-  ENTRY(ARM_FORMAT_THUMBMISCFRM,  30) \
-  ENTRY(ARM_FORMAT_NLdSt,         31) \
-  ENTRY(ARM_FORMAT_N1RegModImm,   32) \
-  ENTRY(ARM_FORMAT_N2Reg,         33) \
-  ENTRY(ARM_FORMAT_NVCVT,         34) \
-  ENTRY(ARM_FORMAT_NVecDupLn,     35) \
-  ENTRY(ARM_FORMAT_N2RegVecShL,   36) \
-  ENTRY(ARM_FORMAT_N2RegVecShR,   37) \
-  ENTRY(ARM_FORMAT_N3Reg,         38) \
-  ENTRY(ARM_FORMAT_N3RegVecSh,    39) \
-  ENTRY(ARM_FORMAT_NVecExtract,   40) \
-  ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
-  ENTRY(ARM_FORMAT_NVTBL,         42)
+  ENTRY(ARM_FORMAT_SATFRM,        13) \
+  ENTRY(ARM_FORMAT_EXTFRM,        14) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
+  ENTRY(ARM_FORMAT_MISCFRM,       26) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
+  ENTRY(ARM_FORMAT_NLdSt,         30) \
+  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
+  ENTRY(ARM_FORMAT_N2Reg,         32) \
+  ENTRY(ARM_FORMAT_NVCVT,         33) \
+  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
+  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
+  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
+  ENTRY(ARM_FORMAT_N3Reg,         37) \
+  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
+  ENTRY(ARM_FORMAT_NVecExtract,   39) \
+  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+  ENTRY(ARM_FORMAT_NVTBL,         41)
 
 // ARM instruction format specifies the encoding used by the instruction.
 #define ENTRY(n, v) n = v,
@@ -127,8 +126,8 @@
 }
 
 /// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
-                            uint32_t Val) {
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+                            unsigned Val) {
   assert(From < 32 && To < 32 && From >= To);
   uint32_t Mask = ((1 << (From - To + 1)) - 1);
   Bits &= ~(Mask << To);

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h Tue Oct 26 19:48:03 2010
@@ -220,7 +220,7 @@
   switch (bits2) {
   default: assert(0 && "No such value");
   case 0:
-    ShOp = ARM_AM::lsl;
+    ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
     return imm5;
   case 1:
     ShOp = ARM_AM::lsr;
@@ -959,22 +959,23 @@
 // corresponding to op.
 //
 // Table A6-1 16-bit Thumb instruction encoding (abridged)
-// op		Instruction or instruction class
-// ------	--------------------------------------------------------------------
-// 00xxxx	Shift (immediate), add, subtract, move, and compare on page A6-7
-// 010000	Data-processing on page A6-8
-// 010001	Special data instructions and branch and exchange on page A6-9
-// 01001x	Load from Literal Pool, see LDR (literal) on page A8-122
-// 0101xx	Load/store single data item on page A6-10
+// op    Instruction or instruction class
+// ------  --------------------------------------------------------------------
+// 00xxxx  Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000  Data-processing on page A6-8
+// 010001  Special data instructions and branch and exchange on page A6-9
+// 01001x  Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx  Load/store single data item on page A6-10
 // 011xxx
 // 100xxx
-// 10100x	Generate PC-relative address, see ADR on page A8-32
-// 10101x	Generate SP-relative address, see ADD (SP plus immediate) on page A8-28
-// 1011xx	Miscellaneous 16-bit instructions on page A6-11
-// 11000x	Store multiple registers, see STM / STMIA / STMEA on page A8-374
-// 11001x	Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
-// 1101xx	Conditional branch, and Supervisor Call on page A6-13
-// 11100x	Unconditional Branch, see B on page A8-44
+// 10100x  Generate PC-relative address, see ADR on page A8-32
+// 10101x  Generate SP-relative address, see ADD (SP plus immediate) on
+//         page A8-28
+// 1011xx  Miscellaneous 16-bit instructions on page A6-11
+// 11000x  Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x  Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx  Conditional branch, and Supervisor Call on page A6-13
+// 11100x  Unconditional Branch, see B on page A8-44
 //
 static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1389,14 +1390,7 @@
       unsigned imm5 = getShiftAmtBits(insn);
       ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
       unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
-
-      // PKHBT/PKHTB are special in that we need the decodeImmShift() call to
-      // decode the shift amount from raw imm5 and bits2, but we DO NOT need
-      // to encode the ShOp, as it's in the asm string already.
-      if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB)
-        MI.addOperand(MCOperand::CreateImm(ShAmt));
-      else
-        MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+      MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
     }
     ++OpIdx;
   }
@@ -1408,7 +1402,8 @@
 //
 // Two register operands: Rs Rn ModImm
 // One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
-// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - {t2MOVi, t2MVNi}
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
 //
 // ModImm = ThumbExpandImm(i:imm3:imm8)
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
@@ -1462,30 +1457,48 @@
 
 static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
   switch (Opcode) {
-  case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16:
-  case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16:
+  case ARM::t2SSAT: case ARM::t2SSAT16:
+  case ARM::t2USAT: case ARM::t2USAT16:
     return true;
   default:
     return false;
   }
 }
 
-static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
-  switch (Opcode) {
-  case ARM::t2SSATlsl:
-  case ARM::t2SSATasr:
-    return slice(insn, 4, 0) + 1;
-  case ARM::t2SSAT16:
-    return slice(insn, 3, 0) + 1;
-  case ARM::t2USATlsl:
-  case ARM::t2USATasr:
-    return slice(insn, 4, 0);
-  case ARM::t2USAT16:
-    return slice(insn, 3, 0);
-  default:
-    assert(0 && "Unexpected opcode");
-    return 0;
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+                                 unsigned &NumOpsAdded, BO B) {
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+  // Disassemble the register def.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  unsigned Pos = slice(insn, 4, 0);
+  if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+    Pos += 1;
+  MI.addOperand(MCOperand::CreateImm(Pos));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  if (NumOpsAdded == 4) {
+    ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+                            ARM_AM::asr : ARM_AM::lsl);
+    // Inst{14-12:7-6} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+    if (ShAmt == 0) {
+      if (Opc == ARM_AM::asr)
+        ShAmt = 32;
+      else
+        Opc = ARM_AM::no_shift;
+    }
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
   }
+  return true;
 }
 
 // A6.3.3 Data-processing (plain binary immediate)
@@ -1499,11 +1512,6 @@
 // o t2SBFX (SBFX): Rs Rn lsb width
 // o t2UBFX (UBFX): Rs Rn lsb width
 // o t2BFI (BFI): Rs Rn lsb width
-//
-// [Signed|Unsigned] Saturate [16]
-//
-// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
-// o t2SSAT16, t2USAT16: Rs sat_pos Rn
 static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
@@ -1528,30 +1536,6 @@
                                                      decodeRs(insn))));
   ++OpIdx;
 
-  // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd.
-  if (Thumb2SaturateOpcode(Opcode)) {
-    MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
-
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
-                                                       decodeRn(insn))));
-
-    if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
-      OpIdx += 2;
-      return true;
-    }
-
-    // For SSAT operand reg (Rn) has been disassembled above.
-    // Now disassemble the shift amount.
-
-    // Inst{14-12:7-6} encodes the imm5 shift amount.
-    unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
-
-    MI.addOperand(MCOperand::CreateImm(ShAmt));
-
-    OpIdx += 3;
-    return true;
-  }
-
   if (TwoReg) {
     assert(NumOps >= 3 && "Expect >= 3 operands");
     int Idx;
@@ -1629,8 +1613,8 @@
 // A8.6.26
 // t2BXJ -> Rn
 //
-// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants),
-// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
 //   -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
 //
 // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1853,13 +1837,15 @@
 //
 // t2LDRi12:   Rd Rn (+)imm12
 // t2LDRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
 // t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 // t2LDR_PRE:  Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 //
 // t2STRi12:   Rd Rn (+)imm12
 // t2STRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
 // t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 // t2STR_PRE:  Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 //
@@ -2099,25 +2085,29 @@
 // corresponding to (op1, op2, op).
 //
 // Table A6-9 32-bit Thumb instruction encoding
-// op1	op2		op	Instruction class, see
-// ---	-------	--	------------------------------------------------------------
-// 01	00xx0xx	-	Load/store multiple on page A6-23
-// 		00xx1xx	-	Load/store dual, load/store exclusive, table branch on page A6-24
-// 		01xxxxx	-	Data-processing (shifted register) on page A6-31
-// 		1xxxxxx	-	Coprocessor instructions on page A6-40
-// 10	x0xxxxx	0	Data-processing (modified immediate) on page A6-15
-// 		x1xxxxx	0	Data-processing (plain binary immediate) on page A6-19
-// 		-		1	Branches and miscellaneous control on page A6-20
-// 11	000xxx0	-	Store single data item on page A6-30
-// 		001xxx0	-	Advanced SIMD element or structure load/store instructions on page A7-27
-// 		00xx001 -	Load byte, memory hints on page A6-28
-// 		00xx011	-	Load halfword, memory hints on page A6-26
-// 		00xx101	-	Load word on page A6-25
-// 		00xx111	-	UNDEFINED
-// 		010xxxx	-	Data-processing (register) on page A6-33
-// 		0110xxx	-	Multiply, multiply accumulate, and absolute difference on page A6-38
-// 		0111xxx	-	Long multiply, long multiply accumulate, and divide on page A6-39
-// 		1xxxxxx	-	Coprocessor instructions on page A6-40
+// op1  op2    op  Instruction class, see
+// ---  -------  --  -----------------------------------------------------------
+// 01  00xx0xx  -  Load/store multiple on page A6-23
+//     00xx1xx  -  Load/store dual, load/store exclusive, table branch on
+//                 page A6-24
+//     01xxxxx  -  Data-processing (shifted register) on page A6-31
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
+// 10  x0xxxxx  0  Data-processing (modified immediate) on page A6-15
+//     x1xxxxx  0  Data-processing (plain binary immediate) on page A6-19
+//         -    1  Branches and miscellaneous control on page A6-20
+// 11  000xxx0  -  Store single data item on page A6-30
+//     001xxx0  -  Advanced SIMD element or structure load/store instructions
+//                 on page A7-27
+//     00xx001  - Load byte, memory hints on page A6-28
+//     00xx011  -  Load halfword, memory hints on page A6-26
+//     00xx101  -  Load word on page A6-25
+//     00xx111  -  UNDEFINED
+//     010xxxx  -  Data-processing (register) on page A6-33
+//     0110xxx  -  Multiply, multiply accumulate, and absolute difference on
+//                 page A6-38
+//     0111xxx  -  Long multiply, long multiply accumulate, and divide on
+//                 page A6-39
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
 //
 static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
     MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
@@ -2163,22 +2153,20 @@
     break;
   case 2:
     if (op == 0) {
-      if (slice(op2, 5, 5) == 0) {
+      if (slice(op2, 5, 5) == 0)
         // Data-processing (modified immediate)
         return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
                                          B);
-      } else {
-        // Data-processing (plain binary immediate)
-        return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                         B);
-      }
-    } else {
-      // Branches and miscellaneous control on page A6-20.
-      return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
-                                         B);
-    }
+      if (Thumb2SaturateOpcode(Opcode))
+        return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
 
-    break;
+      // Data-processing (plain binary immediate)
+      return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+    }
+    // Branches and miscellaneous control on page A6-20.
+    return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
   case 3:
     switch (slice(op2, 6, 5)) {
     case 0:
@@ -2195,7 +2183,8 @@
         }
       } else {
         // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
-        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B);
+        return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+                                     NumOpsAdded, B);
       }
       break;
     case 1:

Propchange: llvm/branches/wendling/eh/lib/Target/ARM/InstPrinter/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Oct 26 19:48:03 2010
@@ -0,0 +1,9 @@
+Debug
+Debug+Checks
+Debug+Coverage
+Debug+Coverage-Asserts
+Release
+Release-Asserts
+Release+Coverage
+Debug+Asserts
+Release+Asserts

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Makefile?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Makefile (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Makefile Tue Oct 26 19:48:03 2010
@@ -14,12 +14,12 @@
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
-                ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+                ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
                 ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
                 ARMGenDecoderTables.inc ARMGenEDInfo.inc \
                 ARMGenFastISel.inc
 
-DIRS = AsmPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common

Modified: llvm/branches/wendling/eh/lib/Target/ARM/NEONMoveFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/NEONMoveFix.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/NEONMoveFix.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/NEONMoveFix.cpp Tue Oct 26 19:48:03 2010
@@ -24,7 +24,7 @@
 namespace {
   struct NEONMoveFixPass : public MachineFunctionPass {
     static char ID;
-    NEONMoveFixPass() : MachineFunctionPass(&ID) {}
+    NEONMoveFixPass() : MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 

Removed: llvm/branches/wendling/eh/lib/Target/ARM/NEONPreAllocPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/NEONPreAllocPass.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/NEONPreAllocPass.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/NEONPreAllocPass.cpp (removed)
@@ -1,495 +0,0 @@
-//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "neon-prealloc"
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-using namespace llvm;
-
-namespace {
-  class NEONPreAllocPass : public MachineFunctionPass {
-    const TargetInstrInfo *TII;
-    MachineRegisterInfo *MRI;
-
-  public:
-    static char ID;
-    NEONPreAllocPass() : MachineFunctionPass(&ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual const char *getPassName() const {
-      return "NEON register pre-allocation pass";
-    }
-
-  private:
-    bool FormsRegSequence(MachineInstr *MI,
-                          unsigned FirstOpnd, unsigned NumRegs,
-                          unsigned Offset, unsigned Stride) const;
-    bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
-  };
-
-  char NEONPreAllocPass::ID = 0;
-}
-
-static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
-                             unsigned &Offset, unsigned &Stride) {
-  // Default to unit stride with no offset.
-  Stride = 1;
-  Offset = 0;
-
-  switch (Opcode) {
-  default:
-    break;
-
-  case ARM::VLD1q8:
-  case ARM::VLD1q16:
-  case ARM::VLD1q32:
-  case ARM::VLD1q64:
-  case ARM::VLD2d8:
-  case ARM::VLD2d16:
-  case ARM::VLD2d32:
-  case ARM::VLD2LNd8:
-  case ARM::VLD2LNd16:
-  case ARM::VLD2LNd32:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VLD2q8:
-  case ARM::VLD2q16:
-  case ARM::VLD2q32:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VLD2LNq16:
-  case ARM::VLD2LNq32:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD2LNq16odd:
-  case ARM::VLD2LNq32odd:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD3d8:
-  case ARM::VLD3d16:
-  case ARM::VLD3d32:
-  case ARM::VLD1d64T:
-  case ARM::VLD3LNd8:
-  case ARM::VLD3LNd16:
-  case ARM::VLD3LNd32:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VLD3q8_UPD:
-  case ARM::VLD3q16_UPD:
-  case ARM::VLD3q32_UPD:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD3q8odd_UPD:
-  case ARM::VLD3q16odd_UPD:
-  case ARM::VLD3q32odd_UPD:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD3LNq16:
-  case ARM::VLD3LNq32:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD3LNq16odd:
-  case ARM::VLD3LNq32odd:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD4d8:
-  case ARM::VLD4d16:
-  case ARM::VLD4d32:
-  case ARM::VLD1d64Q:
-  case ARM::VLD4LNd8:
-  case ARM::VLD4LNd16:
-  case ARM::VLD4LNd32:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VLD4q8_UPD:
-  case ARM::VLD4q16_UPD:
-  case ARM::VLD4q32_UPD:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD4q8odd_UPD:
-  case ARM::VLD4q16odd_UPD:
-  case ARM::VLD4q32odd_UPD:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD4LNq16:
-  case ARM::VLD4LNq32:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD4LNq16odd:
-  case ARM::VLD4LNq32odd:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST1q8:
-  case ARM::VST1q16:
-  case ARM::VST1q32:
-  case ARM::VST1q64:
-  case ARM::VST2d8:
-  case ARM::VST2d16:
-  case ARM::VST2d32:
-  case ARM::VST2LNd8:
-  case ARM::VST2LNd16:
-  case ARM::VST2LNd32:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VST2q8:
-  case ARM::VST2q16:
-  case ARM::VST2q32:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VST2LNq16:
-  case ARM::VST2LNq32:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST2LNq16odd:
-  case ARM::VST2LNq32odd:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST3d8:
-  case ARM::VST3d16:
-  case ARM::VST3d32:
-  case ARM::VST1d64T:
-  case ARM::VST3LNd8:
-  case ARM::VST3LNd16:
-  case ARM::VST3LNd32:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VST3q8_UPD:
-  case ARM::VST3q16_UPD:
-  case ARM::VST3q32_UPD:
-    FirstOpnd = 4;
-    NumRegs = 3;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST3q8odd_UPD:
-  case ARM::VST3q16odd_UPD:
-  case ARM::VST3q32odd_UPD:
-    FirstOpnd = 4;
-    NumRegs = 3;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST3LNq16:
-  case ARM::VST3LNq32:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST3LNq16odd:
-  case ARM::VST3LNq32odd:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4d8:
-  case ARM::VST4d16:
-  case ARM::VST4d32:
-  case ARM::VST1d64Q:
-  case ARM::VST4LNd8:
-  case ARM::VST4LNd16:
-  case ARM::VST4LNd32:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VST4q8_UPD:
-  case ARM::VST4q16_UPD:
-  case ARM::VST4q32_UPD:
-    FirstOpnd = 4;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4q8odd_UPD:
-  case ARM::VST4q16odd_UPD:
-  case ARM::VST4q32odd_UPD:
-    FirstOpnd = 4;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4LNq16:
-  case ARM::VST4LNq32:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4LNq16odd:
-  case ARM::VST4LNq32odd:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VTBL2:
-    FirstOpnd = 1;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VTBL3:
-    FirstOpnd = 1;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VTBL4:
-    FirstOpnd = 1;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VTBX2:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VTBX3:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VTBX4:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    return true;
-  }
-
-  return false;
-}
-
-bool
-NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
-                                   unsigned FirstOpnd, unsigned NumRegs,
-                                   unsigned Offset, unsigned Stride) const {
-  MachineOperand &FMO = MI->getOperand(FirstOpnd);
-  assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
-  unsigned VirtReg = FMO.getReg();
-  (void)VirtReg;
-  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-         "expected a virtual register");
-
-  unsigned LastSubIdx = 0;
-  if (FMO.isDef()) {
-    MachineInstr *RegSeq = 0;
-    for (unsigned R = 0; R < NumRegs; ++R) {
-      const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-      assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
-      unsigned VirtReg = MO.getReg();
-      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-             "expected a virtual register");
-      // Feeding into a REG_SEQUENCE.
-      if (!MRI->hasOneNonDBGUse(VirtReg))
-        return false;
-      MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
-      if (!UseMI->isRegSequence())
-        return false;
-      if (RegSeq && RegSeq != UseMI)
-        return false;
-      unsigned OpIdx = 1 + (Offset + R * Stride) * 2;
-      if (UseMI->getOperand(OpIdx).getReg() != VirtReg)
-        llvm_unreachable("Malformed REG_SEQUENCE instruction!");
-      unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm();
-      if (LastSubIdx) {
-        if (LastSubIdx != SubIdx-Stride)
-          return false;
-      } else {
-        // Must start from dsub_0 or qsub_0.
-        if (SubIdx != (ARM::dsub_0+Offset) &&
-            SubIdx != (ARM::qsub_0+Offset))
-          return false;
-      }
-      RegSeq = UseMI;
-      LastSubIdx = SubIdx;
-    }
-
-    // In the case of vld3, etc., make sure the trailing operand of
-    // REG_SEQUENCE is an undef.
-    if (NumRegs == 3) {
-      unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2;
-      const MachineOperand &MO = RegSeq->getOperand(OpIdx);
-      unsigned VirtReg = MO.getReg();
-      MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
-      if (!DefMI || !DefMI->isImplicitDef())
-        return false;
-    }
-    return true;
-  }
-
-  unsigned LastSrcReg = 0;
-  SmallVector<unsigned, 4> SubIds;
-  for (unsigned R = 0; R < NumRegs; ++R) {
-    const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-    assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
-    unsigned VirtReg = MO.getReg();
-    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-           "expected a virtual register");
-    // Extracting from a Q or QQ register.
-    MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
-    if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg())
-      return false;
-    VirtReg = DefMI->getOperand(1).getReg();
-    if (LastSrcReg && LastSrcReg != VirtReg)
-      return false;
-    LastSrcReg = VirtReg;
-    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-    if (RC != ARM::QPRRegisterClass &&
-        RC != ARM::QQPRRegisterClass &&
-        RC != ARM::QQQQPRRegisterClass)
-      return false;
-    unsigned SubIdx = DefMI->getOperand(1).getSubReg();
-    if (LastSubIdx) {
-      if (LastSubIdx != SubIdx-Stride)
-        return false;
-    } else {
-      // Must start from dsub_0 or qsub_0.
-      if (SubIdx != (ARM::dsub_0+Offset) &&
-          SubIdx != (ARM::qsub_0+Offset))
-        return false;
-    }
-    SubIds.push_back(SubIdx);
-    LastSubIdx = SubIdx;
-  }
-
-  // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
-  // currently required for correctness. e.g.
-  //  %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
-  //  %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
-  //  %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
-  //  VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
-  // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5
-  // respectively.
-  // We need to change how we model uses of REG_SEQUENCE.
-  for (unsigned R = 0; R < NumRegs; ++R) {
-    MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-    unsigned OldReg = MO.getReg();
-    MachineInstr *DefMI = MRI->getVRegDef(OldReg);
-    assert(DefMI->isCopy());
-    MO.setReg(LastSrcReg);
-    MO.setSubReg(SubIds[R]);
-    MO.setIsKill(false);
-    // Delete the EXTRACT_SUBREG if its result is now dead.
-    if (MRI->use_empty(OldReg))
-      DefMI->eraseFromParent();
-  }
-
-  return true;
-}
-
-bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
-  bool Modified = false;
-
-  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-  for (; MBBI != E; ++MBBI) {
-    MachineInstr *MI = &*MBBI;
-    unsigned FirstOpnd, NumRegs, Offset, Stride;
-    if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
-      continue;
-    if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
-      continue;
-    llvm_unreachable("expected a REG_SEQUENCE");
-  }
-
-  return Modified;
-}
-
-bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  MRI = &MF.getRegInfo();
-
-  bool Modified = false;
-  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
-       ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    Modified |= PreAllocNEONRegisters(MBB);
-  }
-
-  return Modified;
-}
-
-/// createNEONPreAllocPass - returns an instance of the NEON register
-/// pre-allocation pass.
-FunctionPass *llvm::createNEONPreAllocPass() {
-  return new NEONPreAllocPass();
-}

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -71,8 +71,9 @@
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOStore, 0,
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
@@ -99,8 +100,9 @@
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOLoad, 0,
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb1InstrInfo.h Tue Oct 26 19:48:03 2010
@@ -51,14 +51,14 @@
                    unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const;
   void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
                            const TargetRegisterClass *RC,
                            const TargetRegisterInfo *TRI) const;
 
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -92,7 +92,7 @@
                               unsigned DestReg, unsigned BaseReg,
                               int NumBytes, bool CanChangeCC,
                               const TargetInstrInfo &TII,
-                              const Thumb1RegisterInfo& MRI,
+                              const ARMBaseRegisterInfo& MRI,
                               DebugLoc dl) {
     MachineFunction &MF = *MBB.getParent();
     bool isHigh = !isARMLowRegister(DestReg) ||
@@ -162,13 +162,12 @@
 
 /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
 /// a destreg = basereg + immediate in Thumb code.
-static
-void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator &MBBI,
-                               unsigned DestReg, unsigned BaseReg,
-                               int NumBytes, const TargetInstrInfo &TII,
-                               const Thumb1RegisterInfo& MRI,
-                               DebugLoc dl) {
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &MBBI,
+                                     unsigned DestReg, unsigned BaseReg,
+                                     int NumBytes, const TargetInstrInfo &TII,
+                                     const ARMBaseRegisterInfo& MRI,
+                                     DebugLoc dl) {
   bool isSub = NumBytes < 0;
   unsigned Bytes = (unsigned)NumBytes;
   if (isSub) Bytes = -NumBytes;
@@ -363,107 +362,19 @@
     MI.RemoveOperand(Op);
 }
 
-int Thumb1RegisterInfo::
-rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
-                  unsigned FrameReg, int Offset,
-                  unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const
-{
-  // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
-  // version then can pull out Thumb1 specific parts here
-  return 0;
-}
-
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I,
-                                          MachineBasicBlock::iterator &UseMI,
-                                          const TargetRegisterClass *RC,
-                                          unsigned Reg) const {
-  // Thumb1 can't use the emergency spill slot on the stack because
-  // ldr/str immediate offsets must be positive, and if we're referencing
-  // off the frame pointer (if, for example, there are alloca() calls in
-  // the function, the offset will be negative. Use R12 instead since that's
-  // a call clobbered register that we know won't be used in Thumb1 mode.
-  DebugLoc DL;
-  BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
-    addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
-
-  // The UseMI is where we would like to restore the register. If there's
-  // interference with R12 before then, however, we'll need to restore it
-  // before that instead and adjust the UseMI.
-  bool done = false;
-  for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
-    if (II->isDebugValue())
-      continue;
-    // If this instruction affects R12, adjust our restore point.
-    for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = II->getOperand(i);
-      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
-          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-        continue;
-      if (MO.getReg() == ARM::R12) {
-        UseMI = II;
-        done = true;
-        break;
-      }
-    }
-  }
-  // Restore the register from R12
-  BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
-    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
-
-  return true;
-}
-
-unsigned
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                        int SPAdj, FrameIndexValue *Value,
-                                        RegScavenger *RS) const{
-  unsigned VReg = 0;
-  unsigned i = 0;
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+                  unsigned FrameReg, int &Offset,
+                  const ARMBaseInstrInfo &TII) const {
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc dl = MI.getDebugLoc();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  unsigned FrameReg = ARM::SP;
-  int FrameIndex = MI.getOperand(i).getIndex();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MF.getFrameInfo()->getStackSize() + SPAdj;
-
-  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea1Offset();
-  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
-    Offset -= AFI->getGPRCalleeSavedArea2Offset();
-  else if (MF.getFrameInfo()->hasVarSizedObjects()) {
-    assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
-    // There are alloca()'s in this function, must reference off the frame
-    // pointer instead.
-    FrameReg = getFrameRegister(MF);
-    Offset -= AFI->getFramePtrSpillOffset();
-  }
-
-  // Special handling of dbg_value instructions.
-  if (MI.isDebugValue()) {
-    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
-    MI.getOperand(i+1).ChangeToImmediate(Offset);
-    return 0;
-  }
-
   unsigned Opcode = MI.getOpcode();
   const TargetInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
 
   if (Opcode == ARM::tADDrSPi) {
-    Offset += MI.getOperand(i+1).getImm();
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
 
     // Can't use tADDrSPi if it's based off the frame pointer.
     unsigned NumBits = 0;
@@ -483,12 +394,13 @@
     if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
       // Turn it into a move.
       MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       // Remove offset and remaining explicit predicate operands.
-      do MI.RemoveOperand(i+1);
-      while (MI.getNumOperands() > i+1 &&
-             (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm()));
-      return 0;
+      do MI.RemoveOperand(FrameRegIdx+1);
+      while (MI.getNumOperands() > FrameRegIdx+1 &&
+             (!MI.getOperand(FrameRegIdx+1).isReg() ||
+              !MI.getOperand(FrameRegIdx+1).isImm()));
+      return true;
     }
 
     // Common case: small offset, fits into instruction.
@@ -496,15 +408,15 @@
     if (((Offset / Scale) & ~Mask) == 0) {
       // Replace the FrameIndex with sp / fp
       if (Opcode == ARM::tADDi3) {
-        removeOperands(MI, i);
+        removeOperands(MI, FrameRegIdx);
         MachineInstrBuilder MIB(&MI);
         AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
                        .addImm(Offset / Scale));
       } else {
-        MI.getOperand(i).ChangeToRegister(FrameReg, false);
-        MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
       }
-      return 0;
+      return true;
     }
 
     unsigned DestReg = MI.getOperand(0).getReg();
@@ -516,7 +428,7 @@
       emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
                                 *this, dl);
       MBB.erase(II);
-      return 0;
+      return true;
     }
 
     if (Offset > 0) {
@@ -524,12 +436,12 @@
       // r0 = add sp, 255*4
       // r0 = add r0, (imm - 255*4)
       if (Opcode == ARM::tADDi3) {
-        removeOperands(MI, i);
+        removeOperands(MI, FrameRegIdx);
         MachineInstrBuilder MIB(&MI);
         AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
       } else {
-        MI.getOperand(i).ChangeToRegister(FrameReg, false);
-        MI.getOperand(i+1).ChangeToImmediate(Mask);
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
       }
       Offset = (Offset - Mask * Scale);
       MachineBasicBlock::iterator NII = llvm::next(II);
@@ -542,14 +454,14 @@
       emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
 
       MI.setDesc(TII.get(ARM::tADDhirr));
-      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
-      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
       if (Opcode == ARM::tADDi3) {
         MachineInstrBuilder MIB(&MI);
         AddDefaultPred(MIB);
       }
     }
-    return 0;
+    return true;
   } else {
     unsigned ImmIdx = 0;
     int InstrOffs = 0;
@@ -557,7 +469,7 @@
     unsigned Scale = 1;
     switch (AddrMode) {
     case ARMII::AddrModeT1_s: {
-      ImmIdx = i+1;
+      ImmIdx = FrameRegIdx+1;
       InstrOffs = MI.getOperand(ImmIdx).getImm();
       NumBits = (FrameReg == ARM::SP) ? 8 : 5;
       Scale = 4;
@@ -577,9 +489,9 @@
     unsigned Mask = (1 << NumBits) - 1;
     if ((unsigned)Offset <= Mask * Scale) {
       // Replace the FrameIndex with sp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       ImmOp.ChangeToImmediate(ImmedOffset);
-      return 0;
+      return true;
     }
 
     bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
@@ -600,12 +512,124 @@
       Offset &= ~(Mask*Scale);
     }
   }
+  return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                      unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+  int Off = Offset; // ARM doesn't need the general 64-bit offsets
+  unsigned i = 0;
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  bool Done = false;
+  Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+  assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          MachineBasicBlock::iterator &UseMI,
+                                          const TargetRegisterClass *RC,
+                                          unsigned Reg) const {
+  // Thumb1 can't use the emergency spill slot on the stack because
+  // ldr/str immediate offsets must be positive, and if we're referencing
+  // off the frame pointer (if, for example, there are alloca() calls in
+  // the function, the offset will be negative. Use R12 instead since that's
+  // a call clobbered register that we know won't be used in Thumb1 mode.
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+    addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+  // The UseMI is where we would like to restore the register. If there's
+  // interference with R12 before then, however, we'll need to restore it
+  // before that instead and adjust the UseMI.
+  bool done = false;
+  for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+    if (II->isDebugValue())
+      continue;
+    // If this instruction affects R12, adjust our restore point.
+    for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = II->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      if (MO.getReg() == ARM::R12) {
+        UseMI = II;
+        done = true;
+        break;
+      }
+    }
+  }
+  // Restore the register from R12
+  BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+  return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  unsigned VReg = 0;
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedAreaFrame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedAreaOffset();
+  else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+    // There are alloca()'s in this function, must reference off the frame
+    // pointer or base pointer instead.
+    if (!hasBasePointer(MF)) {
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+    } else
+      FrameReg = BasePtr;
+  }
+
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // Modify MI as necessary to handle as much of 'Offset' as possible
+  assert(AFI->isThumbFunction() &&
+         "This eliminateFrameIndex only supports Thumb1!");
+  if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+    return;
 
   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above, handle the rest, providing a register that is
   // SP+LargeImm.
   assert(Offset && "This code isn't needed if offset already handled!");
 
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+
   // Remove predicate first.
   int PIdx = MI.findFirstPredOperandIdx();
   if (PIdx != -1)
@@ -637,11 +661,7 @@
       MI.addOperand(MachineOperand::CreateReg(0, false));
   } else if (Desc.mayStore()) {
       VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
-      assert (Value && "Frame index virtual allocated, but Value arg is NULL!");
       bool UseRR = false;
-      bool TrackVReg = true;
-      Value->first = FrameReg; // use the frame register as a kind indicator
-      Value->second = Offset;
 
       if (Opcode == ARM::tSpill) {
         if (FrameReg == ARM::SP)
@@ -650,7 +670,6 @@
         else {
           emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
           UseRR = true;
-          TrackVReg = false;
         }
       } else
         emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
@@ -661,8 +680,6 @@
         MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
       else // tSTR has an extra register operand.
         MI.addOperand(MachineOperand::CreateReg(0, false));
-      if (!ReuseFrameIndexVals || !TrackVReg)
-        VReg = 0;
   } else
     assert(false && "Unexpected opcode!");
 
@@ -671,7 +688,6 @@
     MachineInstrBuilder MIB(&MI);
     AddDefaultPred(MIB);
   }
-  return VReg;
 }
 
 void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
@@ -690,7 +706,7 @@
 
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
-  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  unsigned GPRCSSize = 0, DPRCSSize = 0;
   int FramePtrSpillFI = 0;
 
   if (VARegSaveSize)
@@ -710,25 +726,15 @@
     case ARM::R5:
     case ARM::R6:
     case ARM::R7:
-    case ARM::LR:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      AFI->addGPRCalleeSavedArea1Frame(FI);
-      GPRCS1Size += 4;
-      break;
     case ARM::R8:
     case ARM::R9:
     case ARM::R10:
     case ARM::R11:
+    case ARM::LR:
       if (Reg == FramePtr)
         FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
-        AFI->addGPRCalleeSavedArea2Frame(FI);
-        GPRCS2Size += 4;
-      } else {
-        AFI->addGPRCalleeSavedArea1Frame(FI);
-        GPRCS1Size += 4;
-      }
+      AFI->addGPRCalleeSavedAreaFrame(FI);
+      GPRCSSize += 4;
       break;
     default:
       AFI->addDPRCalleeSavedAreaFrame(FI);
@@ -742,20 +748,18 @@
       dl = MBBI->getDebugLoc();
   }
 
-  // Darwin ABI requires FP to point to the stack slot that contains the
-  // previous FP.
-  if (STI.isTargetDarwin() || hasFP(MF)) {
+  // Adjust FP so it point to the stack slot that contains the previous FP.
+  if (hasFP(MF)) {
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0);
+    AFI->setShouldRestoreSPFromFP(true);
   }
 
   // Determine starting offsets of spill areas.
-  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  unsigned DPRCSOffset  = NumBytes - (GPRCSSize + DPRCSSize);
+  unsigned GPRCSOffset = DPRCSOffset + DPRCSSize;
   AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
-  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
-  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setGPRCalleeSavedAreaOffset(GPRCSOffset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
 
   NumBytes = DPRCSOffset;
@@ -764,14 +768,19 @@
     emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
   }
 
-  if (STI.isTargetELF() && hasFP(MF)) {
+  if (STI.isTargetELF() && hasFP(MF))
     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
                              AFI->getFramePtrSpillOffset());
-  }
 
-  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
-  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setGPRCalleeSavedAreaSize(GPRCSSize);
   AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  if (hasBasePointer(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
 }
 
 static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -824,11 +833,10 @@
     }
 
     // Move SP to start of FP callee save spill area.
-    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
-                 AFI->getGPRCalleeSavedArea2Size() +
+    NumBytes -= (AFI->getGPRCalleeSavedAreaSize() +
                  AFI->getDPRCalleeSavedAreaSize());
 
-    if (hasFP(MF)) {
+    if (AFI->shouldRestoreSPFromFP()) {
       NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
       // Reset SP based on frame pointer only if the stack frame extends beyond
       // frame pointer stack slot or target is ELF and the function has FP.

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb1RegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -44,21 +44,21 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
-  // could not be handled directly in MI.
-  int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
-                        unsigned FrameReg, int Offset,
-                        unsigned MOVOpc, unsigned ADDriOpc,
-                        unsigned SUBriOpc) const;
-
+  // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+  // however much remains to be handled. Return 'true' if no further
+  // work is required.
+  bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+                         unsigned FrameReg, int &Offset,
+                         const ARMBaseInstrInfo &TII) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
   bool saveScavengerRegister(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator I,
                              MachineBasicBlock::iterator &UseMI,
                              const TargetRegisterClass *RC,
                              unsigned Reg) const;
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb2HazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb2HazardRecognizer.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb2HazardRecognizer.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb2HazardRecognizer.h Tue Oct 26 19:48:03 2010
@@ -26,7 +26,7 @@
   MachineInstr *ITBlockMIs[4];
 
 public:
-  Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
+  Thumb2HazardRecognizer(const InstrItineraryData *ItinData) :
     PostRAHazardRecognizer(ItinData) {}
 
   virtual HazardType getHazardType(SUnit *SU);

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb2ITBlockPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb2ITBlockPass.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb2ITBlockPass.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb2ITBlockPass.cpp Tue Oct 26 19:48:03 2010
@@ -27,7 +27,7 @@
 
   public:
     static char ID;
-    Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
+    Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
 
     const Thumb2InstrInfo *TII;
     const TargetRegisterInfo *TRI;

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -28,15 +28,10 @@
 
 using namespace llvm;
 
-static cl::opt<unsigned>
-IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden,
-           cl::desc("Thumb2 if-conversion limit (default 3)"),
-           cl::init(3));
-
-static cl::opt<unsigned>
-IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden,
-                  cl::desc("Thumb2 diamond if-conversion limit (default 3)"),
-                  cl::init(3));
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+           cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+           cl::init(false));
 
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
   : ARMBaseInstrInfo(STI), RI(*this, STI) {
@@ -47,6 +42,33 @@
   return 0;
 }
 
+bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                          unsigned NumInstrs,
+                                          float Prediction,
+                                          float Confidence) const {
+  if (!OldT2IfCvt)
+    return ARMBaseInstrInfo::isProfitableToIfCvt(MBB, NumInstrs,
+                                                 Prediction, Confidence);
+  return NumInstrs && NumInstrs <= 3;
+}
+  
+bool Thumb2InstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+                    MachineBasicBlock &FMBB, unsigned NumF,
+                    float Prediction, float Confidence) const {
+  if (!OldT2IfCvt)
+    return ARMBaseInstrInfo::isProfitableToIfCvt(TMBB, NumT,
+                                                 FMBB, NumF,
+                                                 Prediction, Confidence);
+    
+  // FIXME: Catch optimization such as:
+  //        r0 = movne
+  //        r0 = moveq
+  return NumT && NumF &&
+    NumT <= 3 && NumF <= 3;
+}
+
+
 void
 Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
                                          MachineBasicBlock *NewDest) const {
@@ -105,21 +127,6 @@
   return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
 }
 
-bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                          unsigned NumInstrs) const {
-  return NumInstrs && NumInstrs <= IfCvtLimit;
-}
-  
-bool Thumb2InstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
-                    MachineBasicBlock &FMBB, unsigned NumF) const {
-  // FIXME: Catch optimization such as:
-  //        r0 = movne
-  //        r0 = moveq
-  return NumT && NumF &&
-    NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit);
-}
-
 void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
@@ -155,8 +162,9 @@
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOStore, 0,
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
@@ -181,8 +189,9 @@
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOLoad, 0,
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
@@ -194,7 +203,7 @@
 }
 
 ScheduleHazardRecognizer *Thumb2InstrInfo::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const {
   return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
 }
 

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb2InstrInfo.h Tue Oct 26 19:48:03 2010
@@ -38,10 +38,11 @@
   bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI) const;
 
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const;
-  
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
+                           float Prediction, float Confidence) const;
   bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
-                           MachineBasicBlock &FMBB, unsigned NumFInstrs) const;
+                           MachineBasicBlock &FMBB, unsigned NumFInstrs,
+                           float Prediction, float Confidence) const;
 
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator I, DebugLoc DL,
@@ -72,7 +73,7 @@
   const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
 
   ScheduleHazardRecognizer *
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const;
 };
 
 /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical

Modified: llvm/branches/wendling/eh/lib/Target/ARM/Thumb2SizeReduction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/Thumb2SizeReduction.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/Thumb2SizeReduction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/Thumb2SizeReduction.cpp Tue Oct 26 19:48:03 2010
@@ -173,7 +173,7 @@
   char Thumb2SizeReduce::ID = 0;
 }
 
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -217,8 +217,8 @@
     /// Old opcode has an optional def of CPSR.
     if (HasCC)
       return true;
-    // If both old opcode does not implicit CPSR def, then it's not ok since
-    // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP.
+    // If old opcode does not implicitly define CPSR, then it's not ok since
+    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
     if (!HasImplicitCPSRDef(MI->getDesc()))
       return false;
     HasCC = true;
@@ -315,6 +315,18 @@
     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
     if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
       return false;
+    // For the non-writeback version (this one), the base register must be
+    // one of the registers being loaded.
+    bool isOK = false;
+    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+      if (MI->getOperand(i).getReg() == BaseReg) {
+        isOK = true;
+        break;
+      }
+    }
+    if (!isOK)
+      return false;
+
     OpNum = 0;
     isLdStMul = true;
     break;

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaBranchSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaBranchSelector.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaBranchSelector.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaBranchSelector.cpp Tue Oct 26 19:48:03 2010
@@ -22,7 +22,7 @@
 namespace {
   struct AlphaBSel : public MachineFunctionPass {
     static char ID;
-    AlphaBSel() : MachineFunctionPass(&ID) {}
+    AlphaBSel() : MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaCodeEmitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaCodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaCodeEmitter.cpp Tue Oct 26 19:48:03 2010
@@ -34,19 +34,19 @@
   public:
     static char ID;
 
-    AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID),
+    AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID),
     MCE(mce) {}
 
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
 
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
 
     /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
 
     unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO);
+                               const MachineOperand &MO) const;
     
     bool runOnMachineFunction(MachineFunction &MF);
     
@@ -143,7 +143,7 @@
 }
 
 unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                             const MachineOperand &MO) {
+                                             const MachineOperand &MO) const {
 
   unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
                    // or things that get fixed up later by the JIT.

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -113,8 +113,8 @@
     static uint64_t getNearPower2(uint64_t x) {
       if (!x) return 0;
       unsigned at = CountLeadingZeros_64(x);
-      uint64_t complow = 1 << (63 - at);
-      uint64_t comphigh = 1 << (64 - at);
+      uint64_t complow = 1ULL << (63 - at);
+      uint64_t comphigh = 1ULL << (64 - at);
       //cerr << x << ":" << complow << ":" << comphigh << "\n";
       if (abs64(complow - x) <= abs64(comphigh - x))
         return complow;
@@ -130,19 +130,6 @@
         return (x - y) == r;
     }
 
-    static bool isFPZ(SDValue N) {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      return (CN && (CN->getValueAPF().isZero()));
-    }
-    static bool isFPZn(SDValue N) {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      return (CN && CN->getValueAPF().isNegZero());
-    }
-    static bool isFPZp(SDValue N) {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      return (CN && CN->getValueAPF().isPosZero());
-    }
-
   public:
     explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
       : SelectionDAGISel(TM)

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -284,8 +284,7 @@
                                    DAG.getIntPtrConstant(VA.getLocMemOffset()));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), 0,
-                                         false, false, 0));
+                                         MachinePointerInfo(),false, false, 0));
     }
   }
 
@@ -431,7 +430,7 @@
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
                            false, false, 0);
     }
     InVals.push_back(ArgVal);
@@ -448,7 +447,7 @@
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
       if (i == 0) FuncInfo->setVarArgsBase(FI);
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
                                 false, false, 0));
 
       if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
@@ -456,7 +455,7 @@
       argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
       FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
       SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
                                 false, false, 0));
     }
 
@@ -537,12 +536,14 @@
   const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0,
+  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+                             MachinePointerInfo(VAListS),
                              false, false, 0);
   SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                               DAG.getConstant(8, MVT::i64));
   SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1),
-                                  Tmp, NULL, 0, MVT::i32, false, false, 0);
+                                  Tmp, MachinePointerInfo(),
+                                  MVT::i32, false, false, 0);
   DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
   if (N->getValueType(0).isFloatingPoint())
   {
@@ -556,7 +557,8 @@
 
   SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
                                     DAG.getConstant(8, MVT::i64));
-  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
+  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+                            MachinePointerInfo(),
                             MVT::i32, false, false, 0);
 }
 
@@ -707,9 +709,10 @@
     SDValue Result;
     if (Op.getValueType() == MVT::i32)
       Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr,
-                              NULL, 0, MVT::i32, false, false, 0);
+                              MachinePointerInfo(), MVT::i32, false, false, 0);
     else
-      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
+      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+                           MachinePointerInfo(),
                            false, false, 0);
     return Result;
   }
@@ -720,17 +723,20 @@
     const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
     const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
-    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0,
+    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+                              MachinePointerInfo(SrcS),
                               false, false, 0);
-    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0,
+    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, 
+                                  MachinePointerInfo(DestS),
                                   false, false, 0);
     SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
                                DAG.getConstant(8, MVT::i64));
     Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result,
-                         NP, NULL,0, MVT::i32, false, false, 0);
+                         NP, MachinePointerInfo(), MVT::i32, false, false, 0);
     SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
                                 DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32,
+    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+                             MachinePointerInfo(), MVT::i32,
                              false, false, 0);
   }
   case ISD::VASTART: {
@@ -743,14 +749,15 @@
 
     // vastart stores the address of the VarArgsBase and VarArgsOffset
     SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
-    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
-                               false, false, 0);
+    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP,
+                               MachinePointerInfo(VAListS), false, false, 0);
     SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                                 DAG.getConstant(8, MVT::i64));
     return DAG.getTruncStore(S1, dl,
                              DAG.getConstant(FuncInfo->getVarArgsOffset(),
                                              MVT::i64),
-                             SA2, NULL, 0, MVT::i32, false, false, 0);
+                             SA2, MachinePointerInfo(),
+                             MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
     return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
@@ -771,7 +778,8 @@
 
   SDValue Chain, DataPtr;
   LowerVAARG(N, Chain, DataPtr, DAG);
-  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0,
+  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, 
+                            MachinePointerInfo(),
                             false, false, 0);
   Results.push_back(Res);
   Results.push_back(SDValue(Res.getNode(), 1));

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaLLRP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaLLRP.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaLLRP.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaLLRP.cpp Tue Oct 26 19:48:03 2010
@@ -39,7 +39,7 @@
 
     static char ID;
     AlphaLLRPPass(AlphaTargetMachine &tm) 
-      : MachineFunctionPass(&ID), TM(tm) { }
+      : MachineFunctionPass(ID), TM(tm) { }
 
     virtual const char *getPassName() const {
       return "Alpha NOP inserter";

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -137,10 +137,9 @@
 //variable locals
 //<- SP
 
-unsigned
+void
 AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                       int SPAdj, FrameIndexValue *Value,
-                                       RegScavenger *RS) const {
+                                       int SPAdj, RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -185,7 +184,6 @@
   } else {
     MI.getOperand(i).ChangeToImmediate(Offset);
   }
-  return 0;
 }
 
 

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -38,9 +38,8 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AlphaSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AlphaSchedule.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AlphaSchedule.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AlphaSchedule.td Tue Oct 26 19:48:03 2010
@@ -50,11 +50,11 @@
 def s_itof  : InstrItinClass;
 def s_pseudo : InstrItinClass;
 
-//Table 2­4 Instruction Class Latency in Cycles
+//Table 2-4 Instruction Class Latency in Cycles
 //modified some
 
 def Alpha21264Itineraries : ProcessorItineraries<
-  [L0, L1, FST0, FST1, U0, U1, FA, FM], [
+  [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
   InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
   InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
   InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp Tue Oct 26 19:48:03 2010
@@ -53,8 +53,6 @@
 
     void printOp(const MachineOperand &MO, raw_ostream &O);
     void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printBaseOffsetPair(const MachineInstr *MI, int i, raw_ostream &O,
-                             bool brackets=true);
     virtual void EmitFunctionBodyStart();
     virtual void EmitFunctionBodyEnd(); 
     void EmitStartOfAsmFile(Module &M);

Modified: llvm/branches/wendling/eh/lib/Target/Alpha/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Alpha/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Alpha/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/Alpha/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -25,5 +25,3 @@
   AlphaTargetMachine.cpp
   AlphaSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -51,8 +51,7 @@
 
   private:
     SDNode *Select(SDNode *N);
-    bool SelectADDRspii(SDNode *Op, SDValue Addr,
-                        SDValue &Base, SDValue &Offset);
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
 
     // Walk the DAG after instruction selection, fixing register class issues.
     void FixRegisterClasses(SelectionDAG &DAG);
@@ -94,8 +93,7 @@
   return SelectCode(N);
 }
 
-bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op,
-                                          SDValue Addr,
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
                                           SDValue &Base,
                                           SDValue &Offset) {
   FrameIndexSDNode *FIN = 0;

Modified: llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -207,7 +207,8 @@
       unsigned ObjSize = VA.getLocVT().getStoreSize();
       int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
                                    false, false, 0));
     }
   }
@@ -332,8 +333,7 @@
       SDValue OffsetN = DAG.getIntPtrConstant(Offset);
       OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
-                                         PseudoSourceValue::getStack(),
-                                         Offset, false, false, 0));
+                                         MachinePointerInfo(),false, false, 0));
     }
   }
 

Modified: llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -190,10 +190,9 @@
   return Reg;
 }
 
-unsigned
+void
 BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                          int SPAdj, FrameIndexValue *Value,
-                                          RegScavenger *RS) const {
+                                          int SPAdj, RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
@@ -230,20 +229,20 @@
       MI.setDesc(TII.get(isStore
                          ? BF::STORE32p_uimm6m4
                          : BF::LOAD32p_uimm6m4));
-      return 0;
+      return;
     }
     if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
       MI.setDesc(TII.get(isStore
                          ? BF::STORE32fp_nimm7m4
                          : BF::LOAD32fp_nimm7m4));
       MI.getOperand(FIPos+1).setImm(-Offset);
-      return 0;
+      return;
     }
     if (isInt<18>(Offset)) {
       MI.setDesc(TII.get(isStore
                          ? BF::STORE32p_imm18m4
                          : BF::LOAD32p_imm18m4));
-      return 0;
+      return;
     }
     // Use RegScavenger to calculate proper offset...
     MI.dump();
@@ -328,7 +327,6 @@
     llvm_unreachable("Cannot eliminate frame index");
     break;
   }
-  return 0;
 }
 
 void BlackfinRegisterInfo::
@@ -344,10 +342,6 @@
   }
 }
 
-void BlackfinRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-}
-
 // Emit a prologue that sets up a stack frame.
 // On function entry, R0-R2 and P0 may hold arguments.
 // R3, P1, and P2 may be used as scratch registers

Modified: llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/Blackfin/BlackfinRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -51,15 +51,12 @@
                                        MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator I) const;
 
-    unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                 int SPAdj, FrameIndexValue *Value = NULL,
-                                 RegScavenger *RS = NULL) const;
+    void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                             int SPAdj, RegScavenger *RS = NULL) const;
 
     void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                               RegScavenger *RS) const;
 
-    void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
     void emitPrologue(MachineFunction &MF) const;
     void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/CBackend/CBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CBackend/CBackend.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CBackend/CBackend.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/CBackend/CBackend.cpp Tue Oct 26 19:48:03 2010
@@ -50,9 +50,13 @@
 #include "llvm/System/Host.h"
 #include "llvm/Config/config.h"
 #include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
 using namespace llvm;
 
-extern "C" void LLVMInitializeCBackendTarget() { 
+extern "C" void LLVMInitializeCBackendTarget() {
   // Register the target.
   RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
 }
@@ -72,8 +76,10 @@
   class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
   public:
     static char ID;
-    CBackendNameAllUsedStructsAndMergeFunctions() 
-      : ModulePass(&ID) {}
+    CBackendNameAllUsedStructsAndMergeFunctions()
+        : ModulePass(ID) {
+          initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
+        }
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<FindUsedTypes>();
     }
@@ -110,9 +116,10 @@
   public:
     static char ID;
     explicit CWriter(formatted_raw_ostream &o)
-      : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), 
+      : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
         TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
         NextAnonValueNumber(0) {
+      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
       FPCounter = 0;
     }
 
@@ -183,7 +190,7 @@
         Out << ")";
       }
     }
-    
+
     void writeOperand(Value *Operand, bool Static = false);
     void writeInstComputationInline(Instruction &I);
     void writeOperandInternal(Value *Operand, bool Static = false);
@@ -199,7 +206,6 @@
 
     void lowerIntrinsics(Function &F);
 
-    void printModule(Module *M);
     void printModuleTypes(const TypeSymbolTable &ST);
     void printContainedStructs(const Type *Ty, std::set<const Type *> &);
     void printFloatingPointConstants(Function &F);
@@ -225,7 +231,7 @@
         return ByValParams.count(A);
       return isa<GlobalVariable>(V) || isDirectAlloca(V);
     }
-    
+
     // isInlinableInst - Attempt to inline instructions into their uses to build
     // trees as much as possible.  To do this, we have to consistently decide
     // what is acceptable to inline, so that variable declarations don't get
@@ -234,7 +240,7 @@
     static bool isInlinableInst(const Instruction &I) {
       // Always inline cmp instructions, even if they are shared by multiple
       // expressions.  GCC generates horrible code if we don't.
-      if (isa<CmpInst>(I)) 
+      if (isa<CmpInst>(I))
         return true;
 
       // Must be an expression, must be used exactly once.  If it is dead, we
@@ -271,14 +277,14 @@
         return 0;
       return AI;
     }
-    
+
     // isInlineAsm - Check if the instruction is a call to an inline asm chunk
     static bool isInlineAsm(const Instruction& I) {
       if (const CallInst *CI = dyn_cast<CallInst>(&I))
         return isa<InlineAsm>(CI->getCalledValue());
       return false;
     }
-    
+
     // Instruction visitation functions
     friend class InstVisitor<CWriter>;
 
@@ -311,7 +317,7 @@
     void visitStoreInst (StoreInst  &I);
     void visitGetElementPtrInst(GetElementPtrInst &I);
     void visitVAArgInst (VAArgInst &I);
-    
+
     void visitInsertElementInst(InsertElementInst &I);
     void visitExtractElementInst(ExtractElementInst &I);
     void visitShuffleVectorInst(ShuffleVectorInst &SVI);
@@ -347,7 +353,7 @@
 
 static std::string CBEMangle(const std::string &S) {
   std::string Result;
-  
+
   for (unsigned i = 0, e = S.size(); i != e; ++i)
     if (isalnum(S[i]) || S[i] == '_') {
       Result += S[i];
@@ -376,7 +382,7 @@
   for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
        TI != TE; ) {
     TypeSymbolTable::iterator I = TI++;
-    
+
     // If this isn't a struct or array type, remove it from our set of types
     // to name. This simplifies emission later.
     if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
@@ -404,8 +410,8 @@
         ++RenameCounter;
       Changed = true;
     }
-      
-      
+
+
   // Loop over all external functions and globals.  If we have two with
   // identical names, merge them.
   // FIXME: This code should disappear when we don't allow values with the same
@@ -441,7 +447,7 @@
       }
     }
   }
-  
+
   return Changed;
 }
 
@@ -480,20 +486,20 @@
     FunctionInnards << "void";
   }
   FunctionInnards << ')';
-  printType(Out, RetTy, 
+  printType(Out, RetTy,
       /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
 }
 
 raw_ostream &
 CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
                          const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && 
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
          "Invalid type for printSimpleType");
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:   return Out << "void " << NameSoFar;
   case Type::IntegerTyID: {
     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-    if (NumBits == 1) 
+    if (NumBits == 1)
       return Out << "bool " << NameSoFar;
     else if (NumBits <= 8)
       return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
@@ -503,7 +509,7 @@
       return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
     else if (NumBits <= 64)
       return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
-    else { 
+    else {
       assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
       return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
     }
@@ -515,14 +521,18 @@
   case Type::X86_FP80TyID:
   case Type::PPC_FP128TyID:
   case Type::FP128TyID:  return Out << "long double " << NameSoFar;
-      
+
+  case Type::X86_MMXTyID:
+    return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+                     " __attribute__((vector_size(64))) " + NameSoFar);
+
   case Type::VectorTyID: {
     const VectorType *VTy = cast<VectorType>(Ty);
     return printSimpleType(Out, VTy->getElementType(), isSigned,
                      " __attribute__((vector_size(" +
                      utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
   }
-    
+
   default:
 #ifndef NDEBUG
     errs() << "Unknown primitive type: " << *Ty << "\n";
@@ -576,7 +586,7 @@
       FunctionInnards << "void";
     }
     FunctionInnards << ')';
-    printType(Out, FTy->getReturnType(), 
+    printType(Out, FTy->getReturnType(),
       /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
     return Out;
   }
@@ -760,7 +770,7 @@
 }
 
 /// Print out the casting for a cast operation. This does the double casting
-/// necessary for conversion to the destination type, if necessary. 
+/// necessary for conversion to the destination type, if necessary.
 /// @brief Print a cast
 void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
   // Print the destination type cast
@@ -783,7 +793,7 @@
       printSimpleType(Out, DstTy, false);
       Out << ')';
       break;
-    case Instruction::SExt: 
+    case Instruction::SExt:
     case Instruction::FPToSI: // For these, make sure we get a signed dest
       Out << '(';
       printSimpleType(Out, DstTy, true);
@@ -804,7 +814,7 @@
     case Instruction::SIToFP:
     case Instruction::SExt:
       Out << '(';
-      printSimpleType(Out, SrcTy, true); 
+      printSimpleType(Out, SrcTy, true);
       Out << ')';
       break;
     case Instruction::IntToPtr:
@@ -896,7 +906,7 @@
     case Instruction::AShr:
     {
       Out << '(';
-      bool NeedsClosingParens = printConstExprCast(CE, Static); 
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
       printConstantWithCast(CE->getOperand(0), CE->getOpcode());
       switch (CE->getOpcode()) {
       case Instruction::Add:
@@ -906,10 +916,10 @@
       case Instruction::Mul:
       case Instruction::FMul: Out << " * "; break;
       case Instruction::URem:
-      case Instruction::SRem: 
+      case Instruction::SRem:
       case Instruction::FRem: Out << " % "; break;
-      case Instruction::UDiv: 
-      case Instruction::SDiv: 
+      case Instruction::UDiv:
+      case Instruction::SDiv:
       case Instruction::FDiv: Out << " / "; break;
       case Instruction::And: Out << " & "; break;
       case Instruction::Or:  Out << " | "; break;
@@ -921,7 +931,7 @@
         switch (CE->getPredicate()) {
           case ICmpInst::ICMP_EQ: Out << " == "; break;
           case ICmpInst::ICMP_NE: Out << " != "; break;
-          case ICmpInst::ICMP_SLT: 
+          case ICmpInst::ICMP_SLT:
           case ICmpInst::ICMP_ULT: Out << " < "; break;
           case ICmpInst::ICMP_SLE:
           case ICmpInst::ICMP_ULE: Out << " <= "; break;
@@ -941,8 +951,8 @@
       return;
     }
     case Instruction::FCmp: {
-      Out << '('; 
-      bool NeedsClosingParens = printConstExprCast(CE, Static); 
+      Out << '(';
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
       if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
         Out << "0";
       else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
@@ -1007,18 +1017,18 @@
     else {
       Out << "((";
       printSimpleType(Out, Ty, false) << ')';
-      if (CI->isMinValue(true)) 
+      if (CI->isMinValue(true))
         Out << CI->getZExtValue() << 'u';
       else
         Out << CI->getSExtValue();
       Out << ')';
     }
     return;
-  } 
+  }
 
   switch (CPV->getType()->getTypeID()) {
   case Type::FloatTyID:
-  case Type::DoubleTyID: 
+  case Type::DoubleTyID:
   case Type::X86_FP80TyID:
   case Type::PPC_FP128TyID:
   case Type::FP128TyID: {
@@ -1028,8 +1038,8 @@
       // Because of FP precision problems we must load from a stack allocated
       // value that holds the value in hex.
       Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
-                       "float" : 
-                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? 
+                       "float" :
+                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
                        "double" :
                        "long double")
           << "*)&FPConstant" << I->second << ')';
@@ -1048,7 +1058,7 @@
         Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
         V = Tmp.convertToDouble();
       }
-      
+
       if (IsNAN(V)) {
         // The value is NaN
 
@@ -1212,10 +1222,10 @@
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
   case Instruction::LShr:
-  case Instruction::URem: 
+  case Instruction::URem:
   case Instruction::UDiv: NeedsExplicitCast = true; break;
   case Instruction::AShr:
-  case Instruction::SRem: 
+  case Instruction::SRem:
   case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
   case Instruction::SExt:
     Ty = CE->getType();
@@ -1268,7 +1278,7 @@
   switch (Opcode) {
     default:
       // for most instructions, it doesn't matter
-      break; 
+      break;
     case Instruction::Add:
     case Instruction::Sub:
     case Instruction::Mul:
@@ -1295,27 +1305,34 @@
     Out << ")";
     printConstant(CPV, false);
     Out << ")";
-  } else 
+  } else
     printConstant(CPV, false);
 }
 
 std::string CWriter::GetValueName(const Value *Operand) {
+
+  // Resolve potential alias.
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+    if (const Value *V = GA->resolveAliasedGlobal(false))
+      Operand = V;
+  }
+
   // Mangle globals with the standard mangler interface for LLC compatibility.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
     SmallString<128> Str;
     Mang->getNameWithPrefix(Str, GV, false);
     return CBEMangle(Str.str().str());
   }
-    
+
   std::string Name = Operand->getName();
-    
+
   if (Name.empty()) { // Assign unique names to local temporaries.
     unsigned &No = AnonValueNumbers[Operand];
     if (No == 0)
       No = ++NextAnonValueNumber;
     Name = "tmp__" + utostr(No);
   }
-    
+
   std::string VarName;
   VarName.reserve(Name.capacity());
 
@@ -1342,7 +1359,7 @@
   // Validate this.
   const Type *Ty = I.getType();
   if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
-        Ty!=Type::getInt8Ty(I.getContext()) && 
+        Ty!=Type::getInt8Ty(I.getContext()) &&
         Ty!=Type::getInt16Ty(I.getContext()) &&
         Ty!=Type::getInt32Ty(I.getContext()) &&
         Ty!=Type::getInt64Ty(I.getContext()))) {
@@ -1358,12 +1375,12 @@
   if (I.getType() == Type::getInt1Ty(I.getContext()) &&
       !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
     NeedBoolTrunc = true;
-  
+
   if (NeedBoolTrunc)
     Out << "((";
-  
+
   visit(I);
-  
+
   if (NeedBoolTrunc)
     Out << ")&1)";
 }
@@ -1398,9 +1415,9 @@
     Out << ')';
 }
 
-// Some instructions need to have their result value casted back to the 
-// original types because their operands were casted to the expected type. 
-// This function takes care of detecting that case and printing the cast 
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
 // for the Instruction.
 bool CWriter::writeInstructionCast(const Instruction &I) {
   const Type *Ty = I.getOperand(0)->getType();
@@ -1411,15 +1428,15 @@
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
   case Instruction::LShr:
-  case Instruction::URem: 
-  case Instruction::UDiv: 
+  case Instruction::URem:
+  case Instruction::UDiv:
     Out << "((";
     printSimpleType(Out, Ty, false);
     Out << ")(";
     return true;
   case Instruction::AShr:
-  case Instruction::SRem: 
-  case Instruction::SDiv: 
+  case Instruction::SRem:
+  case Instruction::SDiv:
     Out << "((";
     printSimpleType(Out, Ty, true);
     Out << ")(";
@@ -1431,7 +1448,7 @@
 
 // Write the operand with a cast to another type based on the Opcode being used.
 // This will be used in cases where an instruction has specific type
-// requirements (usually signedness) for its operands. 
+// requirements (usually signedness) for its operands.
 void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
 
   // Extract the operand's type, we'll need it.
@@ -1449,7 +1466,7 @@
   switch (Opcode) {
     default:
       // for most instructions, it doesn't matter
-      break; 
+      break;
     case Instruction::Add:
     case Instruction::Sub:
     case Instruction::Mul:
@@ -1478,14 +1495,14 @@
     Out << ")";
     writeOperand(Operand);
     Out << ")";
-  } else 
+  } else
     writeOperand(Operand);
 }
 
-// Write the operand with a cast to another type based on the icmp predicate 
-// being used. 
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
 void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
-  // This has to do a cast to ensure the operand has the right signedness. 
+  // This has to do a cast to ensure the operand has the right signedness.
   // Also, if the operand is a pointer, we make sure to cast to an integer when
   // doing the comparison both for signedness and so that the C compiler doesn't
   // optimize things like "p < NULL" to false (p may contain an integer value
@@ -1498,7 +1515,7 @@
     writeOperand(Operand);
     return;
   }
-  
+
   // Should this be a signed comparison?  If so, convert to signed.
   bool castIsSigned = Cmp.isSigned();
 
@@ -1506,7 +1523,7 @@
   const Type* OpTy = Operand->getType();
   if (OpTy->isPointerTy())
     OpTy = TD->getIntPtrType(Operand->getContext());
-  
+
   Out << "((";
   printSimpleType(Out, OpTy, castIsSigned);
   Out << ")";
@@ -1573,7 +1590,7 @@
   Out << "#if defined(__GNUC__)\n"
       << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
       << "#endif\n\n";
-    
+
   // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
   // From the GCC documentation:
   //
@@ -1629,7 +1646,7 @@
       << "#define __ATTRIBUTE_DTOR__\n"
       << "#define LLVM_ASM(X)\n"
       << "#endif\n\n";
-  
+
   Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
       << "#define __builtin_stack_save() 0   /* not implemented */\n"
       << "#define __builtin_stack_restore(X) /* noop */\n"
@@ -1652,11 +1669,11 @@
 static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
   ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (!InitList) return;
-  
+
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
       if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
-      
+
       if (CS->getOperand(1)->isNullValue())
         return;  // Found a null terminator, exit printing.
       Constant *FP = CS->getOperand(1);
@@ -1684,12 +1701,12 @@
     else if (GV->getName() == "llvm.global_dtors")
       return GlobalDtors;
   }
-  
+
   // Otherwise, if it is other metadata, don't print it.  This catches things
   // like debug information.
   if (GV->getSection() == "llvm.metadata")
     return NotPrinted;
-  
+
   return NotSpecial;
 }
 
@@ -1720,7 +1737,7 @@
 
 bool CWriter::doInitialization(Module &M) {
   FunctionPass::doInitialization(M);
-  
+
   // Initialize
   TheModule = &M;
 
@@ -1732,11 +1749,11 @@
   std::string Triple = TheModule->getTargetTriple();
   if (Triple.empty())
     Triple = llvm::sys::getHostTriple();
-  
+
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
     TAsm = Match->createAsmInfo(Triple);
-#endif    
+#endif
   TAsm = new CBEMCAsmInfo();
   TCtx = new MCContext(*TAsm);
   Mang = new Mangler(*TCtx, *TD);
@@ -1756,7 +1773,7 @@
       break;
     }
   }
-  
+
   // get declaration for alloca
   Out << "/* Provide Declarations */\n";
   Out << "#include <stdarg.h>\n";      // Varargs support
@@ -1813,7 +1830,7 @@
     for (Module::global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
 
-      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || 
+      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
           I->hasCommonLinkage())
         Out << "extern ";
       else if (I->hasDLLImportLinkage())
@@ -1838,7 +1855,7 @@
   Out << "double fmod(double, double);\n";   // Support for FP rem
   Out << "float fmodf(float, float);\n";
   Out << "long double fmodl(long double, long double);\n";
-  
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     // Don't print declarations for intrinsic functions.
     if (!I->isIntrinsic() && I->getName() != "setjmp" &&
@@ -1846,7 +1863,7 @@
       if (I->hasExternalWeakLinkage())
         Out << "extern ";
       printFunctionSignature(I, true);
-      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) 
+      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
         Out << " __ATTRIBUTE_WEAK__";
       if (I->hasExternalWeakLinkage())
         Out << " __EXTERNAL_WEAK__";
@@ -1856,10 +1873,10 @@
         Out << " __ATTRIBUTE_DTOR__";
       if (I->hasHiddenVisibility())
         Out << " __HIDDEN__";
-      
+
       if (I->hasName() && I->getName()[0] == 1)
         Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
-          
+
       Out << ";\n";
     }
   }
@@ -1883,7 +1900,7 @@
         if (I->isThreadLocal())
           Out << "__thread ";
 
-        printType(Out, I->getType()->getElementType(), false, 
+        printType(Out, I->getType()->getElementType(), false,
                   GetValueName(I));
 
         if (I->hasLinkOnceLinkage())
@@ -1903,7 +1920,7 @@
   // Output the global variable definitions and contents...
   if (!M.global_empty()) {
     Out << "\n\n/* Global Variable Definitions and Initialization */\n";
-    for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I)
       if (!I->isDeclaration()) {
         // Ignore special globals, such as debug info.
@@ -1921,7 +1938,7 @@
         if (I->isThreadLocal())
           Out << "__thread ";
 
-        printType(Out, I->getType()->getElementType(), false, 
+        printType(Out, I->getType()->getElementType(), false,
                   GetValueName(I));
         if (I->hasLinkOnceLinkage())
           Out << " __attribute__((common))";
@@ -1932,7 +1949,7 @@
 
         if (I->hasHiddenVisibility())
           Out << " __HIDDEN__";
-        
+
         // If the initializer is not null, emit the initializer.  If it is null,
         // we try to avoid emitting large amounts of zeros.  The problem with
         // this, however, occurs when the variable has weak linkage.  In this
@@ -1966,7 +1983,7 @@
   if (!M.empty())
     Out << "\n\n/* Function Bodies */\n";
 
-  // Emit some helper functions for dealing with FCMP instruction's 
+  // Emit some helper functions for dealing with FCMP instruction's
   // predicates
   Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
   Out << "return X == X && Y == Y; }\n";
@@ -2021,7 +2038,7 @@
       printFloatingPointConstants(CE->getOperand(i));
     return;
   }
-    
+
   // Otherwise, check for a FP constant that we need to print.
   const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
   if (FPC == 0 ||
@@ -2032,7 +2049,7 @@
     return;
 
   FPConstantMap[FPC] = FPCounter;  // Number the FP constants
-  
+
   if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
     double Val = FPC->getValueAPF().convertToDouble();
     uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
@@ -2051,7 +2068,7 @@
     APInt api = FPC->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
-    << " = { 0x" << utohexstr(p[0]) 
+    << " = { 0x" << utohexstr(p[0])
     << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
     << "}; /* Long double constant */\n";
   } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
@@ -2062,7 +2079,7 @@
     << " = { 0x"
     << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
     << "}; /* Long double constant */\n";
-    
+
   } else {
     llvm_unreachable("Unknown float type!");
   }
@@ -2134,12 +2151,12 @@
   // Don't walk through pointers.
   if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
     return;
-  
+
   // Print all contained types first.
   for (Type::subtype_iterator I = Ty->subtype_begin(),
        E = Ty->subtype_end(); I != E; ++I)
     printContainedStructs(*I, StructPrinted);
-  
+
   if (Ty->isStructTy() || Ty->isArrayTy()) {
     // Check to see if we have already printed this struct.
     if (StructPrinted.insert(Ty).second) {
@@ -2154,10 +2171,10 @@
 void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
   /// isStructReturn - Should this function actually return a struct by-value?
   bool isStructReturn = F->hasStructRetAttr();
-  
+
   if (F->hasLocalLinkage()) Out << "static ";
   if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
-  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";  
+  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
   switch (F->getCallingConv()) {
    case CallingConv::X86_StdCall:
     Out << "__attribute__((stdcall)) ";
@@ -2171,7 +2188,7 @@
    default:
     break;
   }
-  
+
   // Loop over the arguments, printing them...
   const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
   const AttrListPtr &PAL = F->getAttributes();
@@ -2187,7 +2204,7 @@
     if (!F->arg_empty()) {
       Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
       unsigned Idx = 1;
-      
+
       // If this is a struct-return function, don't print the hidden
       // struct-return argument.
       if (isStructReturn) {
@@ -2195,7 +2212,7 @@
         ++I;
         ++Idx;
       }
-      
+
       std::string ArgName;
       for (; I != E; ++I) {
         if (PrintedArg) FunctionInnards << ", ";
@@ -2219,7 +2236,7 @@
     // Loop over the arguments, printing them.
     FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
     unsigned Idx = 1;
-    
+
     // If this is a struct-return function, don't print the hidden
     // struct-return argument.
     if (isStructReturn) {
@@ -2227,7 +2244,7 @@
       ++I;
       ++Idx;
     }
-    
+
     for (; I != E; ++I) {
       if (PrintedArg) FunctionInnards << ", ";
       const Type *ArgTy = *I;
@@ -2256,7 +2273,7 @@
     FunctionInnards << "void"; // ret() -> ret(void) in C.
   }
   FunctionInnards << ')';
-  
+
   // Get the return tpe for the function.
   const Type *RetTy;
   if (!isStructReturn)
@@ -2265,9 +2282,9 @@
     // If this is a struct-return function, print the struct-return type.
     RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
   }
-    
+
   // Print out the return type and the signature built above.
-  printType(Out, RetTy, 
+  printType(Out, RetTy,
             /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
             FunctionInnards.str());
 }
@@ -2287,7 +2304,7 @@
 
   printFunctionSignature(&F, false);
   Out << " {\n";
-  
+
   // If this is a struct return function, handle the result with magic.
   if (isStructReturn) {
     const Type *StructTy =
@@ -2297,13 +2314,13 @@
     Out << ";  /* Struct return temporary */\n";
 
     Out << "  ";
-    printType(Out, F.arg_begin()->getType(), false, 
+    printType(Out, F.arg_begin()->getType(), false,
               GetValueName(F.arg_begin()));
     Out << " = &StructReturn;\n";
   }
 
   bool PrintedVar = false;
-  
+
   // print local variable information for the function
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
     if (const AllocaInst *AI = isDirectAlloca(&*I)) {
@@ -2311,7 +2328,7 @@
       printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
       Out << ";    /* Address-exposed local */\n";
       PrintedVar = true;
-    } else if (I->getType() != Type::getVoidTy(F.getContext()) && 
+    } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
                !isInlinableInst(*I)) {
       Out << "  ";
       printType(Out, I->getType(), false, GetValueName(&*I));
@@ -2327,7 +2344,7 @@
     }
     // We need a temporary for the BitCast to use so it can pluck a value out
     // of a union to do the BitCast. This is separate from the need for a
-    // variable to hold the result of the BitCast. 
+    // variable to hold the result of the BitCast.
     if (isFPIntBitCast(*I)) {
       Out << "  llvmBitCastUnion " << GetValueName(&*I)
           << "__BITCAST_TEMPORARY;\n";
@@ -2415,7 +2432,7 @@
     Out << "  return StructReturn;\n";
     return;
   }
-  
+
   // Don't output a void return if this is the last basic block in the function
   if (I.getNumOperands() == 0 &&
       &*--I.getParent()->getParent()->end() == I.getParent() &&
@@ -2572,7 +2589,7 @@
   // We must cast the results of binary operations which might be promoted.
   bool needsCast = false;
   if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
-      (I.getType() == Type::getInt16Ty(I.getContext())) 
+      (I.getType() == Type::getInt16Ty(I.getContext()))
       || (I.getType() == Type::getFloatTy(I.getContext()))) {
     needsCast = true;
     Out << "((";
@@ -2624,7 +2641,7 @@
     case Instruction::SRem:
     case Instruction::FRem: Out << " % "; break;
     case Instruction::UDiv:
-    case Instruction::SDiv: 
+    case Instruction::SDiv:
     case Instruction::FDiv: Out << " / "; break;
     case Instruction::And:  Out << " & "; break;
     case Instruction::Or:   Out << " | "; break;
@@ -2632,7 +2649,7 @@
     case Instruction::Shl : Out << " << "; break;
     case Instruction::LShr:
     case Instruction::AShr: Out << " >> "; break;
-    default: 
+    default:
 #ifndef NDEBUG
        errs() << "Invalid operator type!" << I;
 #endif
@@ -2675,7 +2692,7 @@
   case ICmpInst::ICMP_SGT: Out << " > "; break;
   default:
 #ifndef NDEBUG
-    errs() << "Invalid icmp predicate!" << I; 
+    errs() << "Invalid icmp predicate!" << I;
 #endif
     llvm_unreachable(0);
   }
@@ -2748,7 +2765,7 @@
   if (isFPIntBitCast(I)) {
     Out << '(';
     // These int<->float and long<->double casts need to be handled specially
-    Out << GetValueName(&I) << "__BITCAST_TEMPORARY." 
+    Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
         << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
     writeOperand(I.getOperand(0));
     Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
@@ -2756,7 +2773,7 @@
     Out << ')';
     return;
   }
-  
+
   Out << '(';
   printCast(I.getOpcode(), SrcTy, DstTy);
 
@@ -2764,15 +2781,15 @@
   if (SrcTy == Type::getInt1Ty(I.getContext()) &&
       I.getOpcode() == Instruction::SExt)
     Out << "0-";
-  
+
   writeOperand(I.getOperand(0));
-    
-  if (DstTy == Type::getInt1Ty(I.getContext()) && 
+
+  if (DstTy == Type::getInt1Ty(I.getContext()) &&
       (I.getOpcode() == Instruction::Trunc ||
        I.getOpcode() == Instruction::FPToUI ||
        I.getOpcode() == Instruction::FPToSI ||
        I.getOpcode() == Instruction::PtrToInt)) {
-    // Make sure we really get a trunc to bool by anding the operand with 1 
+    // Make sure we really get a trunc to bool by anding the operand with 1
     Out << "&1u";
   }
   Out << ')';
@@ -2829,7 +2846,7 @@
 #undef GET_GCC_BUILTIN_NAME
             // If we handle it, don't lower it.
             if (BuiltinName[0]) break;
-            
+
             // All other intrinsic calls we must lower.
             Instruction *Before = 0;
             if (CI != &BB->front())
@@ -2852,7 +2869,7 @@
             break;
           }
 
-  // We may have collected some prototypes to emit in the loop above. 
+  // We may have collected some prototypes to emit in the loop above.
   // Emit them now, before the function that uses them is emitted. But,
   // be careful not to emit them twice.
   std::vector<Function*>::iterator I = prototypesToGen.begin();
@@ -2892,9 +2909,9 @@
     writeOperandDeref(I.getArgOperand(0));
     Out << " = ";
   }
-  
+
   if (I.isTailCall()) Out << " /*tail*/ ";
-  
+
   if (!WroteCallee) {
     // If this is an indirect call to a struct return function, we need to cast
     // the pointer. Ditto for indirect calls with byval arguments.
@@ -2918,7 +2935,7 @@
           NeedsCast = true;
           Callee = RF;
         }
-  
+
     if (NeedsCast) {
       // Ok, just cast the pointer type.
       Out << "((";
@@ -2951,14 +2968,14 @@
     ++AI;
     ++ArgNo;
   }
-      
+
 
   for (; AI != AE; ++AI, ++ArgNo) {
     if (PrintedArg) Out << ", ";
     if (ArgNo < NumDeclaredParams &&
         (*AI)->getType() != FTy->getParamType(ArgNo)) {
       Out << '(';
-      printType(Out, FTy->getParamType(ArgNo), 
+      printType(Out, FTy->getParamType(ArgNo),
             /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
       Out << ')';
     }
@@ -2987,7 +3004,7 @@
 #include "llvm/Intrinsics.gen"
 #undef GET_GCC_BUILTIN_NAME
     assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
-    
+
     Out << BuiltinName;
     WroteCallee = true;
     return false;
@@ -2997,7 +3014,7 @@
     return true;
   case Intrinsic::vastart:
     Out << "0; ";
-      
+
     Out << "va_start(*(va_list*)";
     writeOperand(I.getArgOperand(0));
     Out << ", ";
@@ -3075,7 +3092,7 @@
   case Intrinsic::x86_sse2_cmp_pd:
     Out << '(';
     printType(Out, I.getType());
-    Out << ')';  
+    Out << ')';
     // Multiple GCC builtins multiplex onto this intrinsic.
     switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
     default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
@@ -3096,7 +3113,7 @@
       Out << 's';
     else
       Out << 'd';
-      
+
     Out << "(";
     writeOperand(I.getArgOperand(0));
     Out << ", ";
@@ -3106,7 +3123,7 @@
   case Intrinsic::ppc_altivec_lvsl:
     Out << '(';
     printType(Out, I.getType());
-    Out << ')';  
+    Out << ')';
     Out << "__builtin_altivec_lvsl(0, (void*)";
     writeOperand(I.getArgOperand(0));
     Out << ")";
@@ -3126,13 +3143,13 @@
   std::string Triple = TheModule->getTargetTriple();
   if (Triple.empty())
     Triple = llvm::sys::getHostTriple();
-  
+
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
     TargetAsm = Match->createAsmInfo(Triple);
   else
     return c.Codes[0];
-  
+
   const char *const *table = TargetAsm->getAsmCBE();
 
   // Search the translation table if it exists.
@@ -3158,7 +3175,7 @@
       if (asmstr[i + 1] == '{') {
         std::string::size_type a = asmstr.find_first_of(':', i + 1);
         std::string::size_type b = asmstr.find_first_of('}', i + 1);
-        std::string n = "%" + 
+        std::string n = "%" +
           asmstr.substr(a + 1, b - a - 1) +
           asmstr.substr(i + 2, a - i - 2);
         asmstr.replace(i, b - i + 1, n);
@@ -3168,7 +3185,7 @@
     }
     else if (asmstr[i] == '%')//grr
       { asmstr.replace(i, 1, "%%"); ++i;}
-  
+
   return asmstr;
 }
 
@@ -3177,7 +3194,7 @@
 void CWriter::visitInlineAsm(CallInst &CI) {
   InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
   std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
-  
+
   std::vector<std::pair<Value*, int> > ResultVals;
   if (CI.getType() == Type::getVoidTy(CI.getContext()))
     ;
@@ -3187,27 +3204,27 @@
   } else {
     ResultVals.push_back(std::make_pair(&CI, -1));
   }
-  
+
   // Fix up the asm string for gcc and emit it.
   Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
   Out << "        :";
 
   unsigned ValueCount = 0;
   bool IsFirst = true;
-  
+
   // Convert over all the output constraints.
   for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
        E = Constraints.end(); I != E; ++I) {
-    
+
     if (I->Type != InlineAsm::isOutput) {
       ++ValueCount;
       continue;  // Ignore non-output constraints.
     }
-    
+
     assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
     std::string C = InterpretASMConstraint(*I);
     if (C.empty()) continue;
-    
+
     if (!IsFirst) {
       Out << ", ";
       IsFirst = false;
@@ -3216,7 +3233,7 @@
     // Unpack the dest.
     Value *DestVal;
     int DestValNo = -1;
-    
+
     if (ValueCount < ResultVals.size()) {
       DestVal = ResultVals[ValueCount].first;
       DestValNo = ResultVals[ValueCount].second;
@@ -3225,15 +3242,15 @@
 
     if (I->isEarlyClobber)
       C = "&"+C;
-      
+
     Out << "\"=" << C << "\"(" << GetValueName(DestVal);
     if (DestValNo != -1)
       Out << ".field" << DestValNo; // Multiple retvals.
     Out << ")";
     ++ValueCount;
   }
-  
-  
+
+
   // Convert over all the input constraints.
   Out << "\n        :";
   IsFirst = true;
@@ -3244,19 +3261,19 @@
       ++ValueCount;
       continue;  // Ignore non-input constraints.
     }
-    
+
     assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
     std::string C = InterpretASMConstraint(*I);
     if (C.empty()) continue;
-    
+
     if (!IsFirst) {
       Out << ", ";
       IsFirst = false;
     }
-    
+
     assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
     Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
-    
+
     Out << "\"" << C << "\"(";
     if (!I->isIndirect)
       writeOperand(SrcVal);
@@ -3264,7 +3281,7 @@
       writeOperandDeref(SrcVal);
     Out << ")";
   }
-  
+
   // Convert over the clobber constraints.
   IsFirst = true;
   for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
@@ -3275,15 +3292,15 @@
     assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
     std::string C = InterpretASMConstraint(*I);
     if (C.empty()) continue;
-    
+
     if (!IsFirst) {
       Out << ", ";
       IsFirst = false;
     }
-    
+
     Out << '\"' << C << '"';
   }
-  
+
   Out << ")";
 }
 
@@ -3302,13 +3319,13 @@
 
 void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
                                  gep_type_iterator E, bool Static) {
-  
+
   // If there are no indices, just print out the pointer.
   if (I == E) {
     writeOperand(Ptr);
     return;
   }
-    
+
   // Find out if the last index is into a vector.  If so, we have to print this
   // specially.  Since vectors can't have elements of indexable type, only the
   // last index could possibly be of a vector element.
@@ -3317,9 +3334,9 @@
     for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
       LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
   }
-  
+
   Out << "(";
-  
+
   // If the last index is into a vector, we can't print it as &a[i][j] because
   // we can't index into a vector with j in GCC.  Instead, emit this as
   // (((float*)&a[i])+j)
@@ -3328,7 +3345,7 @@
     printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
     Out << ")(";
   }
-  
+
   Out << '&';
 
   // If the first index is 0 (very typical) we can do a number of
@@ -3438,7 +3455,7 @@
   if (BitMask) {
     Out << ") & ";
     printConstant(BitMask, false);
-    Out << ")"; 
+    Out << ")";
   }
 }
 
@@ -3471,7 +3488,7 @@
 void CWriter::visitExtractElementInst(ExtractElementInst &I) {
   // We know that our operand is not inlined.
   Out << "((";
-  const Type *EltTy = 
+  const Type *EltTy =
     cast<VectorType>(I.getOperand(0)->getType())->getElementType();
   printType(Out, PointerType::getUnqual(EltTy));
   Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp Tue Oct 26 19:48:03 2010
@@ -46,10 +46,6 @@
       return "STI CBEA SPU Assembly Printer";
     }
 
-    SPUTargetMachine &getTM() {
-      return static_cast<SPUTargetMachine&>(TM);
-    }
-
     /// printInstruction - This method is automatically generated by tablegen
     /// from the instruction set description.
     void printInstruction(const MachineInstr *MI, raw_ostream &OS);
@@ -64,15 +60,6 @@
     }
     void printOp(const MachineOperand &MO, raw_ostream &OS);
 
-    /// printRegister - Print register according to target requirements.
-    ///
-    void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){
-      unsigned RegNo = MO.getReg();
-      assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
-             "Not physreg??");
-      O << getRegisterName(RegNo);
-    }
-
     void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
       const MachineOperand &MO = MI->getOperand(OpNo);
       if (MO.isReg()) {
@@ -93,17 +80,6 @@
 
 
     void
-    printS7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      int value = MI->getOperand(OpNo).getImm();
-      value = (value << (32 - 7)) >> (32 - 7);
-
-      assert((value >= -(1 << 8) && value <= (1 << 7) - 1)
-             && "Invalid s7 argument");
-      O << value;
-    }
-
-    void
     printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
     {
       unsigned int value = MI->getOperand(OpNo).getImm();
@@ -134,12 +110,6 @@
     }
 
     void
-    printU32ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      O << (unsigned)MI->getOperand(OpNo).getImm();
-    }
-
-    void
     printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
       // When used as the base register, r0 reads constant zero rather than
       // the value contained in the register.  For this reason, the darwin
@@ -221,13 +191,6 @@
       printOp(MI->getOperand(OpNo), O);
     }
 
-    void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // HBR operands are generated in front of branches, hence, the
-      // program counter plus the target.
-      O << ".+";
-      printOp(MI->getOperand(OpNo), O);
-    }
-
     void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
       if (MI->getOperand(OpNo).isImm()) {
         printS16ImmOperand(MI, OpNo, O);

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -23,5 +23,3 @@
   SPUTargetMachine.cpp
   SPUSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPU64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPU64InstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPU64InstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPU64InstrInfo.td Tue Oct 26 19:48:03 2010
@@ -54,8 +54,8 @@
 // The i64 seteq fragment that does the scalar->vector conversion and
 // comparison:
 def CEQr64compare:
-    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
-                                           (ORv2i64_i64 R64C:$rB))), 0xb)>;
+    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                           (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
 
 // The i64 seteq fragment that does the vector comparison
 def CEQv2i64compare:
@@ -67,12 +67,14 @@
 // v2i64 seteq (equality): the setcc result is v4i32
 multiclass CompareEqual64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
-  def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
 }
 
 defm I64EQ: CompareEqual64;
@@ -89,10 +91,12 @@
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 def CLGTr64ugt:
-    CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
 
 def CLGTr64eq:
-    CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
     
 def CLGTr64compare:
     CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
@@ -112,12 +116,14 @@
 
 multiclass CompareLogicalGreaterThan64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
 }
 
 defm I64LGT: CompareLogicalGreaterThan64;
@@ -144,12 +150,14 @@
 
 multiclass CompareLogicalGreaterEqual64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
 }
 
 defm I64LGE: CompareLogicalGreaterEqual64;
@@ -168,10 +176,12 @@
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 def CGTr64sgt:
-    CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
 
 def CGTr64eq:
-    CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
     
 def CGTr64compare:
     CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
@@ -191,12 +201,14 @@
 
 multiclass CompareGreaterThan64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                             (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
 }
 
 defm I64GT: CompareLogicalGreaterThan64;
@@ -223,12 +235,12 @@
 
 multiclass CompareGreaterEqual64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
 }
 
 defm I64GE: CompareGreaterEqual64;
@@ -255,9 +267,9 @@
     v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
 
 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-           (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
-                                  (ORv2i64_i64 R64C:$rB),
-                                  (v4i32 VECREG:$rCGmask)>.Fragment)>;
+           (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
 
 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)),
@@ -275,11 +287,12 @@
     CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
 
 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-           (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
-                                  (ORv2i64_i64 R64C:$rB),
-                                  v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
-                                               (ORv2i64_i64 R64C:$rB)>.Fragment,
-                                  (v4i32 VECREG:$rCGmask)>.Fragment)>;
+           (COPY_TO_REGCLASS 
+               v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                         (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                         v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                      (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
 
 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)),
@@ -374,9 +387,9 @@
               rCGmask>;
 
 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-          (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
-                                 (ORv2i64_i64 R64C:$rB),
-                                 (v4i32 VECREG:$rCGmask)>.Fragment)>;
+          (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                 (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
 
 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)),

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUCallingConv.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUCallingConv.td (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUCallingConv.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
 // 
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,16 +19,17 @@
 // Return Value Calling Convention
 //===----------------------------------------------------------------------===//
 
-// Return-value convention for Cell SPU: Everything can be passed back via $3:
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
 def RetCC_SPU : CallingConv<[
-  CCIfType<[i8],       CCAssignToReg<[R3]>>,
-  CCIfType<[i16],      CCAssignToReg<[R3]>>,
-  CCIfType<[i32],      CCAssignToReg<[R3]>>,
-  CCIfType<[i64],      CCAssignToReg<[R3]>>,
-  CCIfType<[i128],     CCAssignToReg<[R3]>>,
-  CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
-  CCIfType<[v2i32],                                    CCAssignToReg<[R3]>>
+  CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+  CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
+                 R12, R13, R14, R15, R16, R17, R18, R19, R20,
+                 R21, R22, R23, R24, R25, R26, R27, R28, R29,
+                 R30, R31, R32, R33, R34, R35, R36, R37, R38,
+                 R39, R40, R41, R42, R43, R44, R45, R46, R47,
+                 R48, R49, R50, R51, R52, R53, R54, R55, R56,
+                 R57, R58, R59, R60, R61, R62, R63, R64, R65,
+                 R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
 ]>;
 
 
@@ -45,8 +46,7 @@
                            R39, R40, R41, R42, R43, R44, R45, R46, R47,
                            R48, R49, R50, R51, R52, R53, R54, R55, R56,
                            R57, R58, R59, R60, R61, R62, R63, R64, R65,
-                           R66, R67, R68, R69, R70, R71, R72, R73, R74,
-                           R75, R76, R77, R78, R79]>>,
+                           R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
   CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -41,13 +41,6 @@
 namespace {
   //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
   bool
-  isI64IntS10Immediate(ConstantSDNode *CN)
-  {
-    return isInt<10>(CN->getSExtValue());
-  }
-
-  //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
-  bool
   isI32IntS10Immediate(ConstantSDNode *CN)
   {
     return isInt<10>(CN->getSExtValue());
@@ -67,14 +60,6 @@
     return isInt<10>(CN->getSExtValue());
   }
 
-  //! SDNode predicate for i16 sign-extended, 10-bit immediate values
-  bool
-  isI16IntS10Immediate(SDNode *N)
-  {
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
-    return (CN != 0 && isI16IntS10Immediate(CN));
-  }
-
   //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
   bool
   isI16IntU10Immediate(ConstantSDNode *CN)
@@ -82,14 +67,6 @@
     return isUInt<10>((short) CN->getZExtValue());
   }
 
-  //! SDNode predicate for i16 sign-extended, 10-bit immediate values
-  bool
-  isI16IntU10Immediate(SDNode *N)
-  {
-    return (N->getOpcode() == ISD::Constant
-            && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
-  }
-
   //! ConstantSDNode predicate for signed 16-bit values
   /*!
     \arg CN The constant SelectionDAG node holding the value
@@ -119,14 +96,6 @@
     return false;
   }
 
-  //! SDNode predicate for signed 16-bit values.
-  bool
-  isIntS16Immediate(SDNode *N, short &Imm)
-  {
-    return (N->getOpcode() == ISD::Constant
-            && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
-  }
-
   //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
   static bool
   isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
@@ -142,65 +111,6 @@
     return false;
   }
 
-  bool
-  isHighLow(const SDValue &Op)
-  {
-    return (Op.getOpcode() == SPUISD::IndirectAddr
-            && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
-                 && Op.getOperand(1).getOpcode() == SPUISD::Lo)
-                || (Op.getOperand(0).getOpcode() == SPUISD::Lo
-                    && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
-  }
-
-  //===------------------------------------------------------------------===//
-  //! EVT to "useful stuff" mapping structure:
-
-  struct valtype_map_s {
-    EVT VT;
-    unsigned ldresult_ins;      /// LDRESULT instruction (0 = undefined)
-    bool ldresult_imm;          /// LDRESULT instruction requires immediate?
-    unsigned lrinst;            /// LR instruction
-  };
-
-  const valtype_map_s valtype_map[] = {
-    { MVT::i8,    SPU::ORBIr8,  true,  SPU::LRr8 },
-    { MVT::i16,   SPU::ORHIr16, true,  SPU::LRr16 },
-    { MVT::i32,   SPU::ORIr32,  true,  SPU::LRr32 },
-    { MVT::i64,   SPU::ORr64,   false, SPU::LRr64 },
-    { MVT::f32,   SPU::ORf32,   false, SPU::LRf32 },
-    { MVT::f64,   SPU::ORf64,   false, SPU::LRf64 },
-    // vector types... (sigh!)
-    { MVT::v16i8, 0,            false, SPU::LRv16i8 },
-    { MVT::v8i16, 0,            false, SPU::LRv8i16 },
-    { MVT::v4i32, 0,            false, SPU::LRv4i32 },
-    { MVT::v2i64, 0,            false, SPU::LRv2i64 },
-    { MVT::v4f32, 0,            false, SPU::LRv4f32 },
-    { MVT::v2f64, 0,            false, SPU::LRv2f64 }
-  };
-
-  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
-  const valtype_map_s *getValueTypeMapEntry(EVT VT)
-  {
-    const valtype_map_s *retval = 0;
-    for (size_t i = 0; i < n_valtype_map; ++i) {
-      if (valtype_map[i].VT == VT) {
-        retval = valtype_map + i;
-        break;
-      }
-    }
-
-
-#ifndef NDEBUG
-    if (retval == 0) {
-      report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns"
-                         "NULL for " + Twine(VT.getEVTString()));
-    }
-#endif
-
-    return retval;
-  }
-
   //! Generate the carry-generate shuffle mask.
   SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
     SmallVector<SDValue, 16 > ShufBytes;
@@ -262,16 +172,10 @@
       return CurDAG->getTargetConstant(Imm, MVT::i32);
     }
 
-    /// getI64Imm - Return a target constant with the specified value, of type
-    /// i64.
-    inline SDValue getI64Imm(uint64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i64);
-    }
-
     /// getSmallIPtrImm - Return a target constant of pointer type.
     inline SDValue getSmallIPtrImm(unsigned Imm) {
       return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
-      }
+    }
 
     SDNode *emitBuildVector(SDNode *bvNode) {
       EVT vecVT = bvNode->getValueType(0);
@@ -312,7 +216,7 @@
       
       HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
                                          CurDAG->getEntryNode(), CGPoolOffset,
-                                         PseudoSourceValue::getConstantPool(),0,
+                                         MachinePointerInfo::getConstantPool(),
                                          false, false, Alignment));
       CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
       if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
@@ -404,6 +308,9 @@
       assert(II && "No InstrInfo?");
       return new SPUHazardRecognizer(*II);
     }
+    
+  private:
+    SDValue getRC( MVT );  
 
     // Include the pieces autogenerated from the target description.
 #include "SPUGenDAGISel.inc"
@@ -607,7 +514,8 @@
     return true;
   } else if (Opc == ISD::Register 
            ||Opc == ISD::CopyFromReg 
-           ||Opc == ISD::UNDEF) {
+           ||Opc == ISD::UNDEF
+           ||Opc == ISD::Constant) {
     unsigned OpOpc = Op->getOpcode();
 
     if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
@@ -665,6 +573,43 @@
   return false;
 }
 
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue 
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+  switch( VT.SimpleTy ) {
+  case MVT::i8:
+    return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32); 
+    break; 
+  case MVT::i16:
+    return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32); 
+    break; 
+  case MVT::i32:
+    return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); 
+    break; 
+  case MVT::f32:
+    return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32); 
+    break; 
+  case MVT::i64:
+    return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); 
+    break;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+  case MVT::v2i64:
+  case MVT::v2f64:
+    return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); 
+    break;
+  default:
+    assert( false && "add a new case here" );
+  }
+  return SDValue();
+}
+
 //! Convert the operand from a target-independent to a target-specific node
 /*!
  */
@@ -819,8 +764,8 @@
 
         if (shift_amt >= 32) {
           SDNode *hi32 =
-                  CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT,
-                                         Op0.getOperand(0));
+                  CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                         Op0.getOperand(0), getRC(MVT::i32));
 
           shift_amt -= 32;
           if (shift_amt > 0) {
@@ -902,23 +847,12 @@
     SDValue Arg = N->getOperand(0);
     SDValue Chain = N->getOperand(1);
     SDNode *Result;
-    const valtype_map_s *vtm = getValueTypeMapEntry(VT);
-
-    if (vtm->ldresult_ins == 0) {
-      report_fatal_error("LDRESULT for unsupported type: " +
-                         Twine(VT.getEVTString()));
-    }
-
-    Opc = vtm->ldresult_ins;
-    if (vtm->ldresult_imm) {
-      SDValue Zero = CurDAG->getTargetConstant(0, VT);
-
-      Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
-    } else {
-      Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
-    }
-
+   
+    Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+                                    MVT::Other, Arg,
+                                    getRC( VT.getSimpleVT()), Chain);
     return Result;
+     
   } else if (Opc == SPUISD::IndirectAddr) {
     // Look at the operands: SelectCode() will catch the cases that aren't
     // specifically handled here.
@@ -987,7 +921,8 @@
   SDValue SelMaskVal;
   DebugLoc dl = N->getDebugLoc();
 
-  VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
   SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
   ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
@@ -1031,7 +966,8 @@
                              SDValue(Shift, 0), SDValue(Bits, 0));
   }
 
-  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, 
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
 }
 
 /*!
@@ -1052,7 +988,8 @@
   SDNode *VecOp0, *Shift = 0;
   DebugLoc dl = N->getDebugLoc();
 
-  VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
 
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
     unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
@@ -1098,7 +1035,8 @@
                              SDValue(Shift, 0), SDValue(Bits, 0));
   }
 
-  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, 
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
 }
 
 /*!
@@ -1119,14 +1057,16 @@
   DebugLoc dl = N->getDebugLoc();
 
   SDNode *VecOp0 =
-    CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0));
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, 
+                           VecVT, N->getOperand(0), getRC(MVT::v2i64));
 
   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
   SDNode *SignRot =
     CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
                            SDValue(VecOp0, 0), SignRotAmt);
   SDNode *UpperHalfSign =
-    CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, 
+                           MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
 
   SDNode *UpperHalfSignMask =
     CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
@@ -1173,7 +1113,8 @@
                              SDValue(Shift, 0), SDValue(NegShift, 0));
   }
 
-  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, 
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
 }
 
 /*!
@@ -1200,8 +1141,9 @@
     SDValue Op0 = i64vec.getOperand(0);
 
     ReplaceUses(i64vec, Op0);
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(emitBuildVector(Op0.getNode()), 0));
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(Op0.getNode()), 0),
+                                  getRC(MVT::i64));
   } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
     SDValue lhs = i64vec.getOperand(0);
     SDValue rhs = i64vec.getOperand(1);
@@ -1242,10 +1184,12 @@
     SDNode *SN = SelectCode(Dummy.getValue().getNode());
     if (SN == 0) SN = Dummy.getValue().getNode();
     
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, 
+                                  OpVT, SDValue(SN, 0), getRC(MVT::i64));
   } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(emitBuildVector(i64vec.getNode()), 0));
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(i64vec.getNode()), 0),
+                                  getRC(MVT::i64));
   } else {
     report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
                       "condition");

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -426,9 +426,6 @@
   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 
-  // "Odd size" vector classes that we're willing to support:
-  addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
-
   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
@@ -665,7 +662,7 @@
 
     // Re-emit as a v16i8 vector load
     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
-                         LN->getSrcValue(), LN->getSrcValueOffset(),
+                         LN->getPointerInfo(),
                          LN->isVolatile(), LN->isNonTemporal(), 16);
 
     // Update the chain
@@ -751,7 +748,6 @@
 
     if (alignment == 16) {
       ConstantSDNode *CN;
-
       // Special cases for a known aligned load to simplify the base pointer
       // and insertion byte:
       if (basePtr.getOpcode() == ISD::ADD
@@ -775,6 +771,9 @@
         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                     basePtr,
                                     DAG.getConstant(0, PtrVT));
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                    basePtr,
+                                    DAG.getConstant(0, PtrVT));
       }
     } else {
       // Unaligned load: must be more pessimistic about addressing modes:
@@ -811,9 +810,9 @@
                                   DAG.getConstant(0, PtrVT));
     }
 
-    // Re-emit as a v16i8 vector load
-    alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
-                               SN->getSrcValue(), SN->getSrcValueOffset(),
+    // Load the memory to which to store.
+    alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+                               SN->getPointerInfo(),
                                SN->isVolatile(), SN->isNonTemporal(), 16);
 
     // Update the chain
@@ -843,10 +842,10 @@
       }
 #endif
 
-    SDValue insertEltOp =
-            DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
-    SDValue vectorizeOp =
-            DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+                                      insertEltOffs);
+    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, 
+                                      theValue);
 
     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
                          vectorizeOp, alignLoadVec,
@@ -854,7 +853,7 @@
                                      MVT::v4i32, insertEltOp));
 
     result = DAG.getStore(the_chain, dl, result, basePtr,
-                          LN->getSrcValue(), LN->getSrcValueOffset(),
+                          LN->getPointerInfo(),
                           LN->isVolatile(), LN->isNonTemporal(),
                           LN->getAlignment());
 
@@ -1081,7 +1080,8 @@
       // or we're forced to do vararg
       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
       ArgOffset += StackSlotSize;
     }
 
@@ -1120,7 +1120,7 @@
       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
                                    false, false, 0);
       Chain = Store.getOperand(0);
       MemOps.push_back(Store);
@@ -1220,7 +1220,8 @@
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
                                            false, false, 0));
         ArgOffset += StackSlotSize;
       }
@@ -1325,41 +1326,23 @@
   if (Ins.empty())
     return Chain;
 
+  // Now handle the return value(s)
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+                    RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
   // If the call has results, copy the values out of the ret val registers.
-  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected ret value!");
-  case MVT::Other: break;
-  case MVT::i32:
-    if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
-                                 MVT::i32, InFlag).getValue(1);
-      InVals.push_back(Chain.getValue(0));
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
-                                 Chain.getValue(2)).getValue(1);
-      InVals.push_back(Chain.getValue(0));
-    } else {
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
-                                 InFlag).getValue(1);
-      InVals.push_back(Chain.getValue(0));
-    }
-    break;
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i64:
-  case MVT::i128:
-  case MVT::f32:
-  case MVT::f64:
-  case MVT::v2f64:
-  case MVT::v2i64:
-  case MVT::v4f32:
-  case MVT::v4i32:
-  case MVT::v8i16:
-  case MVT::v16i8:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
-                                   InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
-  }
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+    
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                                     InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+    InVals.push_back(Val);
+   }
 
   return Chain;
 }
@@ -1621,10 +1604,6 @@
     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
   }
-  case MVT::v2i32: {
-    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
-  }
   case MVT::v2i64: {
     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
   }
@@ -1748,18 +1727,19 @@
 
   // If we have a single element being moved from V1 to V2, this can be handled
   // using the C*[DX] compute mask instructions, but the vector elements have
-  // to be monotonically increasing with one exception element.
+  // to be monotonically increasing with one exception element, and the source
+  // slot of the element to move must be the same as the destination.
   EVT VecVT = V1.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   unsigned EltsFromV2 = 0;
-  unsigned V2Elt = 0;
+  unsigned V2EltOffset = 0;
   unsigned V2EltIdx0 = 0;
   unsigned CurrElt = 0;
   unsigned MaxElts = VecVT.getVectorNumElements();
   unsigned PrevElt = 0;
-  unsigned V0Elt = 0;
   bool monotonic = true;
   bool rotate = true;
+  int rotamt=0;
   EVT maskVT;             // which of the c?d instructions to use
 
   if (EltVT == MVT::i8) {
@@ -1785,9 +1765,13 @@
 
     if (monotonic) {
       if (SrcElt >= V2EltIdx0) {
-        if (1 >= (++EltsFromV2)) {
-          V2Elt = (V2EltIdx0 - SrcElt) << 2;
-        }
+        // TODO: optimize for the monotonic case when several consecutive
+        // elements are taken form V2. Do we ever get such a case?
+        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+        else
+          monotonic = false;
+        ++EltsFromV2;
       } else if (CurrElt != SrcElt) {
         monotonic = false;
       }
@@ -1799,14 +1783,13 @@
       if (PrevElt > 0 && SrcElt < MaxElts) {
         if ((PrevElt == SrcElt - 1)
             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+          rotamt = SrcElt-i;
           PrevElt = SrcElt;
-          if (SrcElt == 0)
-            V0Elt = i;
         } else {
           rotate = false;
         }
-      } else if (i == 0) {
-        // First time through, need to keep track of previous element
+      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+        // First time or after a "wrap around"
         PrevElt = SrcElt;
       } else {
         // This isn't a rotation, takes elements from vector 2
@@ -1823,7 +1806,7 @@
     // R1 ($sp) is used here only as it is guaranteed to have last bits zero
     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                 DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(V2Elt, MVT::i32));
+                                DAG.getConstant(V2EltOffset, MVT::i32));
     SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, 
                                      maskVT, Pointer);
 
@@ -1831,8 +1814,9 @@
     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
                        ShufMaskOp);
   } else if (rotate) {
-    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+    if (rotamt < 0)
+      rotamt +=MaxElts;
+    rotamt *= EltVT.getSizeInBits()/8;
     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
                        V1, DAG.getConstant(rotamt, MVT::i16));
   } else {
@@ -1847,7 +1831,6 @@
       for (unsigned j = 0; j < BytesPerElement; ++j)
         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
     }
-
     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
                                     &ResultMask[0], ResultMask.size());
     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
@@ -1997,7 +1980,7 @@
     // Variable index: Rotate the requested element into slot 0, then replicate
     // slot 0 across the vector
     EVT VecVT = N.getValueType();
-    if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+    if (!VecVT.isSimple() || !VecVT.isVector()) {
       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
                         "vector type!");
     }
@@ -2072,21 +2055,25 @@
   SDValue IdxOp = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
+  EVT eltVT = ValOp.getValueType();
 
   // use 0 when the lane to insert to is 'undef'
-  int64_t Idx=0;
+  int64_t Offset=0;
   if (IdxOp.getOpcode() != ISD::UNDEF) {
     ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
     assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
-    Idx = (CN->getSExtValue());
+    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
   }
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Use $sp ($1) because it's always 16-byte aligned and it's available:
   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                 DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(Idx, PtrVT));
-  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+                                DAG.getConstant(Offset, PtrVT));
+  // widen the mask when dealing with half vectors
+  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), 
+                                128/ VT.getVectorElementType().getSizeInBits());
+  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
 
   SDValue result =
     DAG.getNode(SPUISD::SHUFB, dl, VT,
@@ -2655,11 +2642,16 @@
                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
                  DAG.getConstant(31, MVT::i32));
 
+  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 
+                                        dl, Op0VT, Op0,
+                                        DAG.getTargetConstant(
+                                                  SPU::GPRCRegClass.getID(), 
+                                                  MVT::i32)), 0);
   // Shuffle bytes - Copy the sign bits into the upper 64 bits
   // and the input value into the lower 64 bits.
   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
-      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
-
+        extended, sraVal, shufMask);
   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
 }
 
@@ -3101,3 +3093,29 @@
   // The SPU target isn't yet aware of offsets.
   return false;
 }
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  //ceqi, cgti, etc. all take s10 operand
+  return isInt<10>(Imm);
+}
+
+bool 
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+                                         const Type * ) const{
+
+  // A-form: 18bit absolute address. 
+  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+    return true;
+ 
+  // D-form: reg + 14bit offset
+  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+    return true;
+
+  // X-form: reg+reg
+  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+    return true;
+
+  return false;
+}
+

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.h (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUISelLowering.h Tue Oct 26 19:48:03 2010
@@ -170,6 +170,11 @@
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    virtual bool isLegalAddressingMode(const AddrMode &AM, 
+                                       const Type *Ty) const;
   };
 }
 

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -62,8 +62,6 @@
     def v4f32: LoadDFormVec<v4f32>;
     def v2f64: LoadDFormVec<v2f64>;
 
-    def v2i32: LoadDFormVec<v2i32>;
-
     def r128:  LoadDForm<GPRC>;
     def r64:   LoadDForm<R64C>;
     def r32:   LoadDForm<R32C>;
@@ -96,8 +94,6 @@
     def v4f32: LoadAFormVec<v4f32>;
     def v2f64: LoadAFormVec<v2f64>;
 
-    def v2i32: LoadAFormVec<v2i32>;
-
     def r128:  LoadAForm<GPRC>;
     def r64:   LoadAForm<R64C>;
     def r32:   LoadAForm<R32C>;
@@ -130,8 +126,6 @@
     def v4f32: LoadXFormVec<v4f32>;
     def v2f64: LoadXFormVec<v2f64>;
 
-    def v2i32: LoadXFormVec<v2i32>;
-
     def r128:  LoadXForm<GPRC>;
     def r64:   LoadXForm<R64C>;
     def r32:   LoadXForm<R32C>;
@@ -180,8 +174,6 @@
   def v4f32: StoreDFormVec<v4f32>;
   def v2f64: StoreDFormVec<v2f64>;
 
-  def v2i32: StoreDFormVec<v2i32>;
-
   def r128:  StoreDForm<GPRC>;
   def r64:   StoreDForm<R64C>;
   def r32:   StoreDForm<R32C>;
@@ -212,8 +204,6 @@
   def v4f32: StoreAFormVec<v4f32>;
   def v2f64: StoreAFormVec<v2f64>;
 
-  def v2i32: StoreAFormVec<v2i32>;
-
   def r128:  StoreAForm<GPRC>;
   def r64:   StoreAForm<R64C>;
   def r32:   StoreAForm<R32C>;
@@ -246,8 +236,6 @@
   def v4f32: StoreXFormVec<v4f32>;
   def v2f64: StoreXFormVec<v2f64>;
 
-  def v2i32: StoreXFormVec<v2i32>;
-
   def r128:  StoreXForm<GPRC>;
   def r64:   StoreXForm<R64C>;
   def r32:   StoreXForm<R32C>;
@@ -607,7 +595,6 @@
 multiclass AddInstruction {
   def v4i32: AVecInst<v4i32>;
   def v16i8: AVecInst<v16i8>;
-  
   def r32:   ARegInst<R32C>;
 }
 
@@ -672,6 +659,7 @@
   "sf\t$rT, $rA, $rB", IntegerOp,
   [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
 
+
 def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
   "sf\t$rT, $rA, $rB", IntegerOp,
   [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
@@ -1397,56 +1385,6 @@
     ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
            [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
 
-// ORCvtForm: OR conversion form
-//
-// This is used to "convert" the preferred slot to its vector equivalent, as
-// well as convert a vector back to its preferred slot.
-//
-// These are effectively no-ops, but need to exist for proper type conversion
-// and type coercion.
-
-class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
-          : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
-  bits<7> RA;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-10} = 0b10000010000;
-  let Inst{11-17} = RA;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RT;
-}
-
-class ORPromoteScalar<RegisterClass rclass>:
-    ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
-
-class ORExtractElt<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
-
-/* class ORCvtRegGPRC<RegisterClass rclass>:
-    ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
-
-/* class ORCvtGPRCReg<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
-    
-class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
-    
-class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
-    
-class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtGPRCVec:
-    ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
-
-class ORCvtVecGPRC:
-    ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
 
 multiclass BitwiseOr
 {
@@ -1477,119 +1415,48 @@
 
   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
                   [/* no pattern */]>;
-
-  // scalar->vector promotion, prefslot2vec:
-  def v16i8_i8:  ORPromoteScalar<R8C>;
-  def v8i16_i16: ORPromoteScalar<R16C>;
-  def v4i32_i32: ORPromoteScalar<R32C>;
-  def v2i64_i64: ORPromoteScalar<R64C>;
-  def v4f32_f32: ORPromoteScalar<R32FP>;
-  def v2f64_f64: ORPromoteScalar<R64FP>;
-
-  // vector->scalar demotion, vec2prefslot:
-  def i8_v16i8:  ORExtractElt<R8C>;
-  def i16_v8i16: ORExtractElt<R16C>;
-  def i32_v4i32: ORExtractElt<R32C>;
-  def i64_v2i64: ORExtractElt<R64C>;
-  def f32_v4f32: ORExtractElt<R32FP>;
-  def f64_v2f64: ORExtractElt<R64FP>;
-
-  // Conversion from vector to GPRC
-  def i128_vec:  ORCvtVecGPRC;
-
-  // Conversion from GPRC to vector
-  def vec_i128:  ORCvtGPRCVec;
-
-/*
-  // Conversion from register to GPRC
-  def i128_r64:  ORCvtRegGPRC<R64C>;
-  def i128_f64:  ORCvtRegGPRC<R64FP>;
-  def i128_r32:  ORCvtRegGPRC<R32C>;
-  def i128_f32:  ORCvtRegGPRC<R32FP>;
-  def i128_r16:  ORCvtRegGPRC<R16C>;
-  def i128_r8:   ORCvtRegGPRC<R8C>;
-
-  // Conversion from GPRC to register
-  def r64_i128:  ORCvtGPRCReg<R64C>;
-  def f64_i128:  ORCvtGPRCReg<R64FP>;
-  def r32_i128:  ORCvtGPRCReg<R32C>;
-  def f32_i128:  ORCvtGPRCReg<R32FP>;
-  def r16_i128:  ORCvtGPRCReg<R16C>;
-  def r8_i128:   ORCvtGPRCReg<R8C>;
-*/
-/*
-  // Conversion from register to R32C:
-  def r32_r16:   ORCvtFormRegR32<R16C>;
-  def r32_r8:    ORCvtFormRegR32<R8C>;
-  
-  // Conversion from R32C to register
-  def r32_r16:   ORCvtFormR32Reg<R16C>;
-  def r32_r8:    ORCvtFormR32Reg<R8C>;
-*/
-  
-  // Conversion from R64C to register:
-  def r32_r64:   ORCvtFormR64Reg<R32C>;
-  // def r16_r64:   ORCvtFormR64Reg<R16C>;
-  // def r8_r64:    ORCvtFormR64Reg<R8C>;
-  
-  // Conversion to R64C from register:
-  def r64_r32:   ORCvtFormRegR64<R32C>;
-  // def r64_r16:   ORCvtFormRegR64<R16C>;
-  // def r64_r8:    ORCvtFormRegR64<R8C>;
-
-  // bitconvert patterns:
-  def r32_f32:   ORCvtFormR32Reg<R32FP,
-                                 [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
-  def f32_r32:   ORCvtFormRegR32<R32FP,
-                                 [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
-
-  def r64_f64:   ORCvtFormR64Reg<R64FP,
-                                 [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
-  def f64_r64:   ORCvtFormRegR64<R64FP,
-                                 [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
 }
 
 defm OR : BitwiseOr;
 
-// scalar->vector promotion patterns (preferred slot to vector):
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
 def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
-          (ORv16i8_i8 R8C:$rA)>;
+          (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
 
 def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
-          (ORv8i16_i16 R16C:$rA)>;
+          (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
 
 def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
-          (ORv4i32_i32 R32C:$rA)>;
+          (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
 
 def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
-          (ORv2i64_i64 R64C:$rA)>;
+          (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
 
 def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
-          (ORv4f32_f32 R32FP:$rA)>;
+          (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
 
 def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
-          (ORv2f64_f64 R64FP:$rA)>;
-
-// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
-// known as converting the vector back to its preferred slot
+          (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+ 
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
 
-def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
-          (ORi8_v16i8 VECREG:$rA)>;
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
 
-def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
-          (ORi16_v8i16 VECREG:$rA)>;
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
 
-def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
-          (ORi32_v4i32 VECREG:$rA)>;
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
 
-def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
-          (ORi64_v2i64 VECREG:$rA)>;
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
 
-def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
-          (ORf32_v4f32 VECREG:$rA)>;
-
-def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
-          (ORf64_v2f64 VECREG:$rA)>;
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
 
 // Load Register: This is an assembler alias for a bitwise OR of a register
 // against itself. It's here because it brings some clarity to assembly
@@ -3894,6 +3761,79 @@
 
 defm FS : SFPSub;
 
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01100011010, OOL, IOL,
+      "fm\t$rT, $rA, $rB", SPrecFP,
+      pattern>;
+
+class FMVecInst<ValueType type>:
+    FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (type VECREG:$rT),
+                 (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+  def v4f32: FMVecInst<v4f32>;
+  def f32:   FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                     [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; 
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fadd (v4f32 VECREG:$rC),
+                  (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+                  (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT,
+            (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+//     = - (a * b - c)
+//     = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+    RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+    RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fsub (v4f32 VECREG:$rC),
+                  (fmul (v4f32 VECREG:$rA),
+                        (v4f32 VECREG:$rB))))]>;
+
+
+
+
 // Floating point reciprocal estimate
 
 class FRESTInst<dag OOL, dag IOL>:
@@ -4019,72 +3959,6 @@
 // status and control register read
 
 //--------------------------------------
-// Floating point multiply instructions
-//--------------------------------------
-
-def FMv4f32:
-    RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "fm\t$rT, $rA, $rB", SPrecFP,
-      [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
-                                      (v4f32 VECREG:$rB)))]>;
-
-def FMf32 :
-    RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
-      "fm\t$rT, $rA, $rB", SPrecFP,
-      [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
-    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set (v4f32 VECREG:$rT),
-            (fadd (v4f32 VECREG:$rC),
-                  (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
-    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
-      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
-    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set (v4f32 VECREG:$rT),
-            (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
-                  (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
-    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
-      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set R32FP:$rT,
-            (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-//     = - (a * b - c)
-//     = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
-    RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
-      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
-    RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set (v4f32 VECREG:$rT),
-            (fsub (v4f32 VECREG:$rC),
-                  (fmul (v4f32 VECREG:$rA),
-                        (v4f32 VECREG:$rB))))]>;
-
-//--------------------------------------
 // Floating Point Conversions
 // Signed conversions:
 def CSiFv4f32:
@@ -4381,30 +4255,43 @@
 def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
 
 def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 
 def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
-          (v16i8 (ORvec_i128 GPRC:$src))>;
+          (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
-          (v8i16 (ORvec_i128 GPRC:$src))>;
+          (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
-          (v4i32 (ORvec_i128 GPRC:$src))>;
+          (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
-          (v2i64 (ORvec_i128 GPRC:$src))>;
+          (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
-          (v4f32 (ORvec_i128 GPRC:$src))>;
+          (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
-          (v2f64 (ORvec_i128 GPRC:$src))>;
+          (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+          (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+          (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+          (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+          (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
 
 //===----------------------------------------------------------------------===//
 // Instruction patterns:
@@ -4455,11 +4342,12 @@
 
 // zext 8->64: Zero extend bytes to double words
 def : Pat<(i64 (zext R8C:$rSrc)),
-          (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
-                                    (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+          (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+                                    (COPY_TO_REGCLASS 
+                                       (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
                                     0x4),
                                   (ILv4i32 0x0),
-                                  (FSMBIv4i32 0x0f0f)))>;
+                                  (FSMBIv4i32 0x0f0f)), R64C)>;
 
 // anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
 def : Pat<(i16 (anyext R8C:$rSrc)),
@@ -4467,7 +4355,7 @@
 
 // anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
 def : Pat<(i32 (anyext R8C:$rSrc)),
-          (ORIi8i32 R8C:$rSrc, 0)>;
+          (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
 
 // sext 16->64: Sign extend halfword to double word
 def : Pat<(sext_inreg R64C:$rSrc, i16),
@@ -4491,7 +4379,7 @@
 
 // anyext 16->32: Extend 16->32 bits, irrespective of sign
 def : Pat<(i32 (anyext R16C:$rSrc)),
-          (ORIi16i32 R16C:$rSrc, 0)>;
+          (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
 
 //===----------------------------------------------------------------------===//
 // Truncates:
@@ -4500,61 +4388,61 @@
 //===----------------------------------------------------------------------===//
 
 def : Pat<(i8 (trunc GPRC:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
 
 def : Pat<(i8 (trunc R64C:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBv2i64_m32
-              (ORv2i64_i64 R64C:$src),
-              (ORv2i64_i64 R64C:$src),
-              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
 
 def : Pat<(i8 (trunc R32C:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBv4i32_m32
-               (ORv4i32_i32 R32C:$src),
-               (ORv4i32_i32 R32C:$src),
-               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
 
 def : Pat<(i8 (trunc R16C:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBv4i32_m32
-               (ORv8i16_i16 R16C:$src),
-               (ORv8i16_i16 R16C:$src),
-               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
 
 def : Pat<(i16 (trunc GPRC:$src)),
-          (ORi16_v8i16
+          (COPY_TO_REGCLASS
             (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
 
 def : Pat<(i16 (trunc R64C:$src)),
-          (ORi16_v8i16
+          (COPY_TO_REGCLASS
             (SHUFBv2i64_m32
-              (ORv2i64_i64 R64C:$src),
-              (ORv2i64_i64 R64C:$src),
-              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
 
 def : Pat<(i16 (trunc R32C:$src)),
-          (ORi16_v8i16
+          (COPY_TO_REGCLASS
             (SHUFBv4i32_m32
-               (ORv4i32_i32 R32C:$src),
-               (ORv4i32_i32 R32C:$src),
-               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
 
 def : Pat<(i32 (trunc GPRC:$src)),
-          (ORi32_v4i32
+          (COPY_TO_REGCLASS
             (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
 
 def : Pat<(i32 (trunc R64C:$src)),
-          (ORi32_v4i32
+          (COPY_TO_REGCLASS
             (SHUFBv2i64_m32
-              (ORv2i64_i64 R64C:$src),
-              (ORv2i64_i64 R64C:$src),
-              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
 
 //===----------------------------------------------------------------------===//
 // Address generation: SPU, like PPC, has to split addresses into high and

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUOperands.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUOperands.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUOperands.td (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUOperands.td Tue Oct 26 19:48:03 2010
@@ -98,12 +98,6 @@
   return (N->getZExtValue() <= 0xff);
 }]>;
 
-// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
-// extended field.  Used by RI10Form instructions like 'ldq'.
-def i64ImmSExt10  : PatLeaf<(imm), [{
-  return isI64IntS10Immediate(N);
-}]>;
-
 // i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
 // extended field.  Used by RI10Form instructions like 'ldq'.
 def i32ImmSExt10  : PatLeaf<(imm), [{
@@ -660,7 +654,11 @@
 // A-form   : abs     (256K LSA offset)
 // D-form(2): [r+I7]  (7-bit signed offset + reg)
 
-def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",     [], []>;
-def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",     [], []>;
-def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",     [], []>;
-def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",    [], []>;
+def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+                                  [], [SDNPWantRoot]>;
+def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+                                  [], [SDNPWantRoot]>;
+def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+                                  [], [SDNPWantRoot]>;
+def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+                                  [], [SDNPWantRoot]>;

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -270,9 +270,8 @@
   MBB.erase(I);
 }
 
-unsigned
+void
 SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                                     FrameIndexValue *Value,
                                      RegScavenger *RS) const
 {
   unsigned i = 0;
@@ -328,7 +327,6 @@
   } else {
     MO.ChangeToImmediate(Offset);
   }
-  return 0;
 }
 
 /// determineFrameLayout - Determine the size of the frame and maximum call
@@ -587,6 +585,7 @@
     case SPU::LQDr32:    return SPU::LQXr32;
     case SPU::LQDr128:   return SPU::LQXr128;
     case SPU::LQDv16i8:  return SPU::LQXv16i8;
+    case SPU::LQDv4i32:  return SPU::LQXv4i32;
     case SPU::LQDv4f32:  return SPU::LQXv4f32;
     case SPU::STQDr32:   return SPU::STQXr32;
     case SPU::STQDr128:  return SPU::STQXr128;

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -63,9 +63,8 @@
                                        MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator I) const;
     //! Convert frame indicies into machine operands
-    unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                                 FrameIndexValue *Value = NULL,
-                                 RegScavenger *RS = NULL) const;
+    void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                             RegScavenger *RS = NULL) const;
     //! Determine the frame's layour
     void determineFrameLayout(MachineFunction &MF) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPURegisterInfo.td Tue Oct 26 19:48:03 2010
@@ -394,7 +394,7 @@
 
 // The SPU's registers as vector registers:
 def VECREG : RegisterClass<"SPU",
-                           [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+                           [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
                            128,
  [
    /* volatile register */

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSchedule.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSchedule.td (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSchedule.td Tue Oct 26 19:48:03 2010
@@ -36,7 +36,7 @@
 def ImmLoad      : InstrItinClass;              // EVEN_UNIT
 
 /* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
   InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSubtarget.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSubtarget.h (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUSubtarget.h Tue Oct 26 19:48:03 2010
@@ -81,7 +81,7 @@
     /// properties of this subtarget.
     const char *getTargetDataString() const {
       return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
-             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
+             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
              "-s:128:128-n32:64";
     }
   };

Modified: llvm/branches/wendling/eh/lib/Target/CellSPU/SPUTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CellSPU/SPUTargetMachine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CellSPU/SPUTargetMachine.h (original)
+++ llvm/branches/wendling/eh/lib/Target/CellSPU/SPUTargetMachine.h Tue Oct 26 19:48:03 2010
@@ -75,8 +75,8 @@
     return &DataLayout;
   }
 
-  virtual const InstrItineraryData getInstrItineraryData() const {
-    return InstrItins;
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
   }
   
   // Pass Pipeline Configuration

Modified: llvm/branches/wendling/eh/lib/Target/CppBackend/CPPBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/CppBackend/CPPBackend.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/CppBackend/CPPBackend.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/CppBackend/CPPBackend.cpp Tue Oct 26 19:48:03 2010
@@ -104,7 +104,7 @@
   public:
     static char ID;
     explicit CppWriter(formatted_raw_ostream &o) :
-      ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
 
     virtual const char *getPassName() const { return "C++ backend"; }
 
@@ -288,6 +288,8 @@
     Out << "GlobalValue::LinkerPrivateLinkage"; break;
   case GlobalValue::LinkerPrivateWeakLinkage:
     Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
   case GlobalValue::AvailableExternallyLinkage:
     Out << "GlobalValue::AvailableExternallyLinkage "; break;
   case GlobalValue::LinkOnceAnyLinkage:
@@ -356,6 +358,7 @@
     case Type::FloatTyID:    return "Type::getFloatTy(mod->getContext())";
     case Type::DoubleTyID:   return "Type::getDoubleTy(mod->getContext())";
     case Type::LabelTyID:    return "Type::getLabelTy(mod->getContext())";
+    case Type::X86_MMXTyID:  return "Type::getX86_MMXTy(mod->getContext())";
     default:
       error("Invalid primitive type");
       break;

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -5,7 +5,9 @@
 tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
 tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
 tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
 tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
 tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
 tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
@@ -23,6 +25,9 @@
   MBlazeTargetObjectFile.cpp
   MBlazeIntrinsicInfo.cpp
   MBlazeSelectionDAGInfo.cpp
+  MBlazeAsmPrinter.cpp
+  MBlazeAsmBackend.cpp
+  MBlazeMCInstLower.cpp
+  MBlazeELFWriterInfo.cpp
+  MBlazeMCCodeEmitter.cpp
   )
-
-target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.h (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.h Tue Oct 26 19:48:03 2010
@@ -21,8 +21,16 @@
   class MBlazeTargetMachine;
   class FunctionPass;
   class MachineCodeEmitter;
+  class MCCodeEmitter;
+  class TargetAsmBackend;
   class formatted_raw_ostream;
 
+  MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &,
+                                           TargetMachine &TM,
+                                           MCContext &Ctx);
+
+  TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
+
   FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
   FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
 

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlaze.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,35 +32,35 @@
 //===----------------------------------------------------------------------===//
 
 def FeaturePipe3       : SubtargetFeature<"pipe3", "HasPipe3", "true",
-                                "Implements 3-stage pipeline.">;
+                                "Implements 3-stage pipeline">;
 def FeatureBarrel      : SubtargetFeature<"barrel", "HasBarrel", "true",
-                                "Implements barrel shifter.">;
+                                "Implements barrel shifter">;
 def FeatureDiv         : SubtargetFeature<"div", "HasDiv", "true",
-                                "Implements hardware divider.">;
+                                "Implements hardware divider">;
 def FeatureMul         : SubtargetFeature<"mul", "HasMul", "true",
-                                "Implements hardware multiplier.">;
+                                "Implements hardware multiplier">;
 def FeatureFSL         : SubtargetFeature<"fsl", "HasFSL", "true",
-                                "Implements FSL instructions.">;
+                                "Implements FSL instructions">;
 def FeatureEFSL        : SubtargetFeature<"efsl", "HasEFSL", "true",
-                                "Implements extended FSL instructions.">;
+                                "Implements extended FSL instructions">;
 def FeatureMSRSet      : SubtargetFeature<"msrset", "HasMSRSet", "true",
-                                "Implements MSR register set and clear.">;
+                                "Implements MSR register set and clear">;
 def FeatureException   : SubtargetFeature<"exception", "HasException", "true",
-                                "Implements hardware exception support.">;
+                                "Implements hardware exception support">;
 def FeaturePatCmp      : SubtargetFeature<"patcmp", "HasPatCmp", "true",
-                                "Implements pattern compare instruction.">;
+                                "Implements pattern compare instruction">;
 def FeatureFPU         : SubtargetFeature<"fpu", "HasFPU", "true",
-                                "Implements floating point unit.">;
+                                "Implements floating point unit">;
 def FeatureESR         : SubtargetFeature<"esr", "HasESR", "true",
                                 "Implements ESR and EAR registers">;
 def FeaturePVR         : SubtargetFeature<"pvr", "HasPVR", "true",
-                                "Implements processor version register.">;
+                                "Implements processor version register">;
 def FeatureMul64       : SubtargetFeature<"mul64", "HasMul64", "true",
                                 "Implements multiplier with 64-bit result">;
 def FeatureSqrt        : SubtargetFeature<"sqrt", "HasSqrt", "true",
-                                "Implements sqrt and floating point convert.">;
+                                "Implements sqrt and floating point convert">;
 def FeatureMMU         : SubtargetFeature<"mmu", "HasMMU", "true",
-                                "Implements memory management unit.">;
+                                "Implements memory management unit">;
 
 //===----------------------------------------------------------------------===//
 // MBlaze processors supported.
@@ -69,13 +69,26 @@
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, MBlazeGenericItineraries, Features>;
 
-
 def : Proc<"v400", []>;
 def : Proc<"v500", []>;
 def : Proc<"v600", []>;
 def : Proc<"v700", []>;
 def : Proc<"v710", []>;
 
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
 def MBlaze : Target {
   let InstructionSet = MBlazeInstrInfo;
+  let AssemblyWriters = [MBlazeAsmWriter];
 }

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeCallingConv.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeCallingConv.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeCallingConv.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
 // 
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,6 +21,6 @@
   // i32 are returned in registers R3, R4
   CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
 
-  // f32 are returned in registers F3, F4
-  CCIfType<[f32], CCAssignToReg<[F3, F4]>>
+  // f32 are returned in registers R3, R4
+  CCIfType<[f32], CCAssignToReg<[R3, R4]>>
 ]>;

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp Tue Oct 26 19:48:03 2010
@@ -19,6 +19,10 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -32,7 +36,7 @@
 
     static char ID;
     Filler(TargetMachine &tm) 
-      : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
 
     virtual const char *getPassName() const {
       return "MBlaze Delay Slot Filler";
@@ -51,6 +55,91 @@
   char Filler::ID = 0;
 } // end of anonymous namespace
 
+static bool hasImmInstruction( MachineBasicBlock::iterator &candidate ) {
+    // Any instruction with an immediate mode operand greater than
+    // 16-bits requires an implicit IMM instruction.
+    unsigned numOper = candidate->getNumOperands();
+    for( unsigned op = 0; op < numOper; ++op ) {
+        if( candidate->getOperand(op).isImm() &&
+            (candidate->getOperand(op).getImm() & 0xFFFFFFFFFFFF0000LL) != 0 )
+            return true;
+
+        // FIXME: we could probably check to see if the FP value happens
+        //        to not need an IMM instruction. For now we just always
+        //        assume that FP values always do.
+        if( candidate->getOperand(op).isFPImm() )
+            return true;
+    }
+
+    return false;
+}
+
+static bool delayHasHazard( MachineBasicBlock::iterator &candidate,
+                            MachineBasicBlock::iterator &slot ) {
+
+    // Loop over all of the operands in the branch instruction
+    // and make sure that none of them are defined by the
+    // candidate instruction.
+    unsigned numOper = slot->getNumOperands();
+    for( unsigned op = 0; op < numOper; ++op ) {
+        if( !slot->getOperand(op).isReg() || 
+            !slot->getOperand(op).isUse() ||
+            slot->getOperand(op).isImplicit() )
+            continue;
+
+        unsigned cnumOper = candidate->getNumOperands();
+        for( unsigned cop = 0; cop < cnumOper; ++cop ) {
+            if( candidate->getOperand(cop).isReg() &&
+                candidate->getOperand(cop).isDef() &&
+                candidate->getOperand(cop).getReg() == 
+                slot->getOperand(op).getReg() )
+                return true;
+        }
+    }
+
+    // There are no hazards between the two instructions
+    return false;
+}
+
+static bool usedBeforeDelaySlot( MachineBasicBlock::iterator &candidate,
+                                 MachineBasicBlock::iterator &slot ) {
+  MachineBasicBlock::iterator I = candidate;
+  for (++I; I != slot; ++I) {
+        unsigned numOper = I->getNumOperands();
+        for( unsigned op = 0; op < numOper; ++op ) {
+            if( I->getOperand(op).isReg() &&
+                I->getOperand(op).isUse() ) {
+                unsigned reg = I->getOperand(op).getReg();
+                unsigned cops = candidate->getNumOperands();
+                for( unsigned cop = 0; cop < cops; ++cop ) {
+                    if( candidate->getOperand(cop).isReg() &&
+                        candidate->getOperand(cop).isDef() &&
+                        candidate->getOperand(cop).getReg() == reg )
+                        return true;
+                }
+            }
+        }
+  }
+
+  return false;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator &slot) {
+  MachineBasicBlock::iterator found = MBB.end();
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != slot; ++I) {
+      TargetInstrDesc desc = I->getDesc();
+      if( desc.hasDelaySlot() || desc.isBranch() || 
+          desc.mayLoad() || desc.    mayStore() || 
+          hasImmInstruction(I) || delayHasHazard(I,slot) || 
+          usedBeforeDelaySlot(I,slot)) continue;
+
+      found = I;
+  }
+
+  return found;
+}
+
 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
 /// Currently, we fill delay slots with NOPs. We assume there is only one
 /// delay slot per delayed instruction.
@@ -59,10 +148,16 @@
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
     if (I->getDesc().hasDelaySlot()) {
       MachineBasicBlock::iterator J = I;
+      MachineBasicBlock::iterator D = findDelayInstr(MBB,I);
+
       ++J;
-      BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
       ++FilledSlots;
       Changed = true;
+
+      if( D == MBB.end() )
+        BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+      else
+        MBB.splice( J, &MBB, D );
     }
   return Changed;
 }

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -81,13 +81,9 @@
   SDNode *getGlobalBaseReg();
   SDNode *Select(SDNode *N);
 
-  // Complex Pattern.
-  bool SelectAddr(SDNode *Op, SDValue N,
-                  SDValue &Base, SDValue &Offset);
-
   // Address Selection
-  bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index);
-  bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base);
+  bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+  bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
 
   // getI32Imm - Return a target constant with the specified value, of type i32.
   inline SDValue getI32Imm(unsigned Imm) {
@@ -122,7 +118,7 @@
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented with [r+imm].
 bool MBlazeDAGToDAGISel::
-SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
   if (N.getOpcode() == ISD::FrameIndex) return false;
   if (N.getOpcode() == ISD::TargetExternalSymbol ||
       N.getOpcode() == ISD::TargetGlobalAddress)
@@ -149,9 +145,9 @@
 /// a signed 32-bit displacement [r+imm], and if it is not better
 /// represented as reg+reg.
 bool MBlazeDAGToDAGISel::
-SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
+SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base) {
   // If this can be more profitably realized as r+r, fail.
-  if (SelectAddrRegReg(Op, N, Disp, Base))
+  if (SelectAddrRegReg(N, Disp, Base))
     return false;
 
   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
@@ -190,56 +186,6 @@
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-/// ComplexPattern used on MBlazeInstrInfo
-/// Used on MBlaze Load/Store instructions
-bool MBlazeDAGToDAGISel::
-SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
-  // if Address is FI, get the TargetFrameIndex.
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-
-  // on PIC code Load GA
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
-        (Addr.getOpcode() == ISD::TargetConstantPool) ||
-        (Addr.getOpcode() == ISD::TargetJumpTable)){
-      Base   = CurDAG->getRegister(MBlaze::R15, MVT::i32);
-      Offset = Addr;
-      return true;
-    }
-  } else {
-    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
-        Addr.getOpcode() == ISD::TargetGlobalAddress))
-      return false;
-  }
-
-  // Operand is a result from an ADD.
-  if (Addr.getOpcode() == ISD::ADD) {
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      if (Predicate_immSExt16(CN)) {
-
-        // If the first operand is a FI, get the TargetFI Node
-        if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
-                                    (Addr.getOperand(0))) {
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-        } else {
-          Base = Addr.getOperand(0);
-        }
-
-        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
-        return true;
-      }
-    }
-  }
-
-  Base   = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return true;
-}
-
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
 SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -56,9 +56,9 @@
   setBooleanContents(ZeroOrOneBooleanContent);
 
   // Set up the register classes
-  addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+  addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
   if (Subtarget->hasFPU()) {
-    addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+    addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
     setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
   }
 
@@ -86,6 +86,10 @@
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
 
+  // Sign extended loads must be expanded
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
   // MBlaze has no REM or DIVREM operations.
   setOperationAction(ISD::UREM,    MVT::i32, Expand);
   setOperationAction(ISD::SREM,    MVT::i32, Expand);
@@ -253,12 +257,12 @@
     loop->addSuccessor(finish);
     loop->addSuccessor(loop);
 
-    unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
     BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
       .addReg(MI->getOperand(2).getReg())
       .addImm(31);
 
-    unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
     BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
       .addReg(MI->getOperand(1).getReg())
       .addImm(0);
@@ -267,14 +271,14 @@
       .addReg(IAMT)
       .addMBB(finish);
 
-    unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
     BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
       .addReg(IVAL).addMBB(BB)
       .addReg(NDST).addMBB(loop);
 
-    unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
     BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
       .addReg(IAMT).addMBB(BB)
       .addReg(NAMT).addMBB(loop);
@@ -421,15 +425,12 @@
   SDValue HiPart;
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
 
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
 
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0 );
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
-  //return JTI;
 }
 
 SDValue MBlazeTargetLowering::
@@ -440,7 +441,7 @@
   DebugLoc dl = Op.getDebugLoc();
 
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
-                                         N->getOffset(), MBlazeII::MO_ABS_HILO);
+                                         N->getOffset(), 0 );
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
 }
 
@@ -456,7 +457,8 @@
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
                       false, false, 0);
 }
 
@@ -475,11 +477,6 @@
     MBlaze::R8, MBlaze::R9, MBlaze::R10
   };
 
-  static const unsigned FltRegs[] = {
-    MBlaze::F5, MBlaze::F6, MBlaze::F7,
-    MBlaze::F8, MBlaze::F9, MBlaze::F10
-  };
-
   unsigned Reg=0;
 
   // Promote i8 and i16
@@ -497,7 +494,7 @@
     Reg = State.AllocateReg(IntRegs, RegsSize);
     LocVT = MVT::i32;
   } else if (ValVT == MVT::f32) {
-    Reg = State.AllocateReg(FltRegs, RegsSize);
+    Reg = State.AllocateReg(IntRegs, RegsSize);
     LocVT = MVT::f32;
   }
 
@@ -591,7 +588,8 @@
 
       // emit ISD::STORE whichs stores the
       // parameter value to a stack Location
-      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
                                          false, false, 0));
     }
   }
@@ -616,13 +614,12 @@
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
-  unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                getPointerTy(), 0, OpFlag);
+                                getPointerTy(), 0, 0 );
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
-                                getPointerTy(), OpFlag);
+                                getPointerTy(), 0 );
 
   // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
   //             = Chain, Callee, Reg#1, Reg#2, ...
@@ -728,9 +725,9 @@
       TargetRegisterClass *RC = 0;
 
       if (RegVT == MVT::i32)
-        RC = MBlaze::CPURegsRegisterClass;
+        RC = MBlaze::GPRRegisterClass;
       else if (RegVT == MVT::f32)
-        RC = MBlaze::FGR32RegisterClass;
+        RC = MBlaze::GPRRegisterClass;
       else
         llvm_unreachable("RegVT not supported by LowerFormalArguments");
 
@@ -780,7 +777,8 @@
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -793,7 +791,7 @@
       StackPtr = DAG.getRegister(StackReg, getPointerTy());
 
     // The last register argument that must be saved is MBlaze::R10
-    TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass;
+    TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
 
     unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
     unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
@@ -808,7 +806,8 @@
       int FI = MFI->CreateFixedObject(4, 0, true);
       MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0));
 
       // Record the frame index of the first variable argument
@@ -917,10 +916,10 @@
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
-      return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+      return std::make_pair(0U, MBlaze::GPRRegisterClass);
     case 'f':
       if (VT == MVT::f32)
-        return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+        return std::make_pair(0U, MBlaze::GPRRegisterClass);
     }
   }
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
@@ -940,6 +939,7 @@
     // GCC MBlaze Constraint Letters
     case 'd':
     case 'y':
+    case 'f':
       return make_vector<unsigned>(
         MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
         MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
@@ -947,15 +947,6 @@
         MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
         MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
         MBlaze::R30, MBlaze::R31, 0);
-
-    case 'f':
-      return make_vector<unsigned>(
-        MBlaze::F3,  MBlaze::F4,  MBlaze::F5,  MBlaze::F6,
-        MBlaze::F7,  MBlaze::F9,  MBlaze::F10, MBlaze::F11,
-        MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
-        MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
-        MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
-        MBlaze::F30, MBlaze::F31, 0);
   }
   return std::vector<unsigned>();
 }

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.h (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeISelLowering.h Tue Oct 26 19:48:03 2010
@@ -15,6 +15,7 @@
 #ifndef MBlazeISELLOWERING_H
 #define MBlazeISELLOWERING_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
 #include "MBlaze.h"
@@ -31,6 +32,30 @@
       GE,
       LE
     };
+
+    inline static CC getOppositeCondition(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return NE;
+      case NE: return EQ;
+      case GT: return LE;
+      case LT: return GE;
+      case GE: return LT;
+      case LE: return GE;
+      }
+    }
+
+    inline static const char *MBlazeCCToString(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return "eq";
+      case NE: return "ne";
+      case GT: return "gt";
+      case LT: return "lt";
+      case GE: return "ge";
+      case LE: return "le";
+      }
+    }
   }
 
   namespace MBlazeISD {

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFPU.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFPU.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFPU.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,72 +19,72 @@
 // Memory Access Instructions
 //===----------------------------------------------------------------------===//
 class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
-             TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr),
+             TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>;
+                [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
 
 class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-              TAI<op, (outs FGR32:$dst), (ins memri:$addr),
-                  !strconcat(instr_asm, "   $dst, $addr"),
-                  [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>;
+              TB<op, (outs GPR:$dst), (ins memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
 
 class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
-              TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr),
+              TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
                  !strconcat(instr_asm, "   $dst, $addr"),
-                 [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>;
+                 [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>;
 
 class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-               TAI<op, (outs), (ins FGR32:$dst, memrr:$addr),
-                   !strconcat(instr_asm, "   $dst, $addr"),
-                   [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>;
+               TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+                  !strconcat(instr_asm, "   $dst, $addr"),
+                  [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>;
 
 class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
              InstrItinClass itin> :
-             TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
                [], itin>;
 
 class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
              InstrItinClass itin> :
-             TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                 !strconcat(instr_asm, "   $dst, $c, $b"),
-                [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
-
-class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
-              InstrItinClass itin> :
-              TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b),
-                 !strconcat(instr_asm, "   $dst, $b"),
-                 [], itin>;
-
-class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
-              InstrItinClass itin> :
-              TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b),
-                 !strconcat(instr_asm, "   $dst, $b"),
-                 [], itin>;
-
-class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
-              InstrItinClass itin> :
-              TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b),
-                 !strconcat(instr_asm, "   $dst, $b"),
-                 [], itin>;
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class LogicF<bits<6> op, string instr_asm> :
-             TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [],
-                 IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
 
 class LogicFI<bits<6> op, string instr_asm> :
-             TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [],
-                 IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
+
+let rb=0 in {
+  class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+}
 
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
@@ -94,7 +94,6 @@
 // FPU Arithmetic Instructions
 //===----------------------------------------------------------------------===//
 let Predicates=[HasFPU] in {
-  def FOR    :  LogicF<0x28, "or     ">;
   def FORI   : LogicFI<0x28, "ori    ">;
   def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIAlu>;
   def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIAlu>;
@@ -126,98 +125,98 @@
 
 
 let usesCustomInserter = 1 in {
-  def Select_FCC : MBlazePseudo<(outs FGR32:$dst),
-    (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC),
+  def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
     "; SELECT_FCC PSEUDO!",
     []>;
 }
 
 // Floating point conversions
 let Predicates=[HasFPU] in {
-  def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>;
-  def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>;
-  def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>;
+  def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+  def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+  def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
 }
 
 // SET_CC operations
 let Predicates=[HasFPU] in {
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_EQ FGR32:$L, FGR32:$R), 1)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (XOR (FCMP_UN FGR32:$L, FGR32:$R),
-                            (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_NE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_GT FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_LT FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_GE FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_LE FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETO),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_UN FGR32:$L, FGR32:$R), 1)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_UN FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (XOR (FCMP_UN GPR:$L, GPR:$R),
+                            (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_NE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+            (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 2)>;
 }
 
 // SELECT operations
-def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F),
-          (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>;
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+          (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
 
 //===----------------------------------------------------------------------===//
 // Patterns for Floating Point Instructions
 //===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>;
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFSL.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFSL.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFSL.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFSL.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,144 +10,208 @@
 //===----------------------------------------------------------------------===//
 // FSL Instruction Formats
 //===----------------------------------------------------------------------===//
-class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
-              TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b),
-                 !strconcat(instr_asm, " $dst, $b"),
-                 [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>;
-
-class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> :
-             TAI<op, (outs CPURegs:$dst), (ins fslimm:$b),
-                 !strconcat(instr_asm, " $dst, $b"),
-                 [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>;
-
-class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
-              TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b),
-                 !strconcat(instr_asm, " $v, $b"),
-                 [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>;
-
-class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> :
-             TAI<op, (outs), (ins CPURegs:$v, fslimm:$b),
-                 !strconcat(instr_asm, " $v, $b"),
-                 [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>;
-
-class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
-               TA<op, flags, (outs), (ins CPURegs:$b),
-                  !strconcat(instr_asm, " $b"),
-                  [(OpNode CPURegs:$b)], IIAlu>;
-
-class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> :
-              TAI<op, (outs), (ins fslimm:$b),
-                  !strconcat(instr_asm, " $b"),
-                  [(OpNode immZExt4:$b)], IIAlu>;
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FFSL, (outs GPR:$dst), (ins fslimm:$b),
+                        !strconcat(instr_asm, " $dst, $b"),
+                        [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu>
+{
+    bits<5> rd;
+    bits<4> fslno;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x0;
+    let Inst{17-21} = flags; // NCTAE
+    let Inst{22-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FFSLD, (outs GPR:$dst), (ins GPR:$b),
+                         !strconcat(instr_asm, " $dst, $b"),
+                         [(set GPR:$dst, (OpNode GPR:$b))], IIAlu>
+{
+    bits<5> rd;
+    bits<5> rb;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x0;
+    let Inst{22-26} = flags; // NCTAE
+    let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FFSL, (outs), (ins GPR:$v, fslimm:$b),
+                        !strconcat(instr_asm, " $v, $b"),
+                        [(OpNode GPR:$v, immZExt4:$b)], IIAlu>
+{
+    bits<5> ra;
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FFSLD, (outs), (ins GPR:$v, GPR:$b),
+                         !strconcat(instr_asm, " $v, $b"),
+                         [(OpNode GPR:$v, GPR:$b)], IIAlu>
+{
+    bits<5> ra;
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FFSLT, (outs), (ins fslimm:$b),
+                         !strconcat(instr_asm, " $b"),
+                         [(OpNode immZExt4:$b)], IIAlu>
+{
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+               MBlazeInst<op, FFSLTD, (outs), (ins GPR:$b),
+                          !strconcat(instr_asm, " $b"),
+                          [(OpNode GPR:$b)], IIAlu>
+{
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
 
 //===----------------------------------------------------------------------===//
 // FSL Get Instructions
 //===----------------------------------------------------------------------===//
-def GET      : FSLGet<0x1B, "get      ", int_mblaze_fsl_get>;
-def AGET     : FSLGet<0x1B, "aget     ", int_mblaze_fsl_aget>;
-def CGET     : FSLGet<0x1B, "cget     ", int_mblaze_fsl_cget>;
-def CAGET    : FSLGet<0x1B, "caget    ", int_mblaze_fsl_caget>;
-def EGET     : FSLGet<0x1B, "eget     ", int_mblaze_fsl_eget>;
-def EAGET    : FSLGet<0x1B, "eaget    ", int_mblaze_fsl_eaget>;
-def ECGET    : FSLGet<0x1B, "ecget    ", int_mblaze_fsl_ecget>;
-def ECAGET   : FSLGet<0x1B, "ecaget   ", int_mblaze_fsl_ecaget>;
-def NGET     : FSLGet<0x1B, "nget     ", int_mblaze_fsl_nget>;
-def NAGET    : FSLGet<0x1B, "naget    ", int_mblaze_fsl_naget>;
-def NCGET    : FSLGet<0x1B, "ncget    ", int_mblaze_fsl_ncget>;
-def NCAGET   : FSLGet<0x1B, "ncaget   ", int_mblaze_fsl_ncaget>;
-def NEGET    : FSLGet<0x1B, "neget    ", int_mblaze_fsl_neget>;
-def NEAGET   : FSLGet<0x1B, "neaget   ", int_mblaze_fsl_neaget>;
-def NECGET   : FSLGet<0x1B, "necget   ", int_mblaze_fsl_necget>;
-def NECAGET  : FSLGet<0x1B, "necaget  ", int_mblaze_fsl_necaget>;
-def TGET     : FSLGet<0x1B, "tget     ", int_mblaze_fsl_tget>;
-def TAGET    : FSLGet<0x1B, "taget    ", int_mblaze_fsl_taget>;
-def TCGET    : FSLGet<0x1B, "tcget    ", int_mblaze_fsl_tcget>;
-def TCAGET   : FSLGet<0x1B, "tcaget   ", int_mblaze_fsl_tcaget>;
-def TEGET    : FSLGet<0x1B, "teget    ", int_mblaze_fsl_teget>;
-def TEAGET   : FSLGet<0x1B, "teaget   ", int_mblaze_fsl_teaget>;
-def TECGET   : FSLGet<0x1B, "tecget   ", int_mblaze_fsl_tecget>;
-def TECAGET  : FSLGet<0x1B, "tecaget  ", int_mblaze_fsl_tecaget>;
-def TNGET    : FSLGet<0x1B, "tnget    ", int_mblaze_fsl_tnget>;
-def TNAGET   : FSLGet<0x1B, "tnaget   ", int_mblaze_fsl_tnaget>;
-def TNCGET   : FSLGet<0x1B, "tncget   ", int_mblaze_fsl_tncget>;
-def TNCAGET  : FSLGet<0x1B, "tncaget  ", int_mblaze_fsl_tncaget>;
-def TNEGET   : FSLGet<0x1B, "tneget   ", int_mblaze_fsl_tneget>;
-def TNEAGET  : FSLGet<0x1B, "tneaget  ", int_mblaze_fsl_tneaget>;
-def TNECGET  : FSLGet<0x1B, "tnecget  ", int_mblaze_fsl_tnecget>;
-def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>;
+def GET      : FSLGet<0x1B, 0x00, "get      ", int_mblaze_fsl_get>;
+def AGET     : FSLGet<0x1B, 0x02, "aget     ", int_mblaze_fsl_aget>;
+def CGET     : FSLGet<0x1B, 0x08, "cget     ", int_mblaze_fsl_cget>;
+def CAGET    : FSLGet<0x1B, 0x0A, "caget    ", int_mblaze_fsl_caget>;
+def EGET     : FSLGet<0x1B, 0x01, "eget     ", int_mblaze_fsl_eget>;
+def EAGET    : FSLGet<0x1B, 0x03, "eaget    ", int_mblaze_fsl_eaget>;
+def ECGET    : FSLGet<0x1B, 0x09, "ecget    ", int_mblaze_fsl_ecget>;
+def ECAGET   : FSLGet<0x1B, 0x0B, "ecaget   ", int_mblaze_fsl_ecaget>;
+def NGET     : FSLGet<0x1B, 0x10, "nget     ", int_mblaze_fsl_nget>;
+def NAGET    : FSLGet<0x1B, 0x12, "naget    ", int_mblaze_fsl_naget>;
+def NCGET    : FSLGet<0x1B, 0x18, "ncget    ", int_mblaze_fsl_ncget>;
+def NCAGET   : FSLGet<0x1B, 0x1A, "ncaget   ", int_mblaze_fsl_ncaget>;
+def NEGET    : FSLGet<0x1B, 0x11, "neget    ", int_mblaze_fsl_neget>;
+def NEAGET   : FSLGet<0x1B, 0x13, "neaget   ", int_mblaze_fsl_neaget>;
+def NECGET   : FSLGet<0x1B, 0x19, "necget   ", int_mblaze_fsl_necget>;
+def NECAGET  : FSLGet<0x1B, 0x1B, "necaget  ", int_mblaze_fsl_necaget>;
+def TGET     : FSLGet<0x1B, 0x04, "tget     ", int_mblaze_fsl_tget>;
+def TAGET    : FSLGet<0x1B, 0x06, "taget    ", int_mblaze_fsl_taget>;
+def TCGET    : FSLGet<0x1B, 0x0C, "tcget    ", int_mblaze_fsl_tcget>;
+def TCAGET   : FSLGet<0x1B, 0x0E, "tcaget   ", int_mblaze_fsl_tcaget>;
+def TEGET    : FSLGet<0x1B, 0x05, "teget    ", int_mblaze_fsl_teget>;
+def TEAGET   : FSLGet<0x1B, 0x07, "teaget   ", int_mblaze_fsl_teaget>;
+def TECGET   : FSLGet<0x1B, 0x0D, "tecget   ", int_mblaze_fsl_tecget>;
+def TECAGET  : FSLGet<0x1B, 0x0F, "tecaget  ", int_mblaze_fsl_tecaget>;
+def TNGET    : FSLGet<0x1B, 0x14, "tnget    ", int_mblaze_fsl_tnget>;
+def TNAGET   : FSLGet<0x1B, 0x16, "tnaget   ", int_mblaze_fsl_tnaget>;
+def TNCGET   : FSLGet<0x1B, 0x1C, "tncget   ", int_mblaze_fsl_tncget>;
+def TNCAGET  : FSLGet<0x1B, 0x1E, "tncaget  ", int_mblaze_fsl_tncaget>;
+def TNEGET   : FSLGet<0x1B, 0x15, "tneget   ", int_mblaze_fsl_tneget>;
+def TNEAGET  : FSLGet<0x1B, 0x17, "tneaget  ", int_mblaze_fsl_tneaget>;
+def TNECGET  : FSLGet<0x1B, 0x1D, "tnecget  ", int_mblaze_fsl_tnecget>;
+def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
 
 //===----------------------------------------------------------------------===//
 // FSL Dynamic Get Instructions
 //===----------------------------------------------------------------------===//
-def GETD      : FSLGetD<0x1B, 0x00, "getd     ", int_mblaze_fsl_get>;
-def AGETD     : FSLGetD<0x1B, 0x00, "agetd    ", int_mblaze_fsl_aget>;
-def CGETD     : FSLGetD<0x1B, 0x00, "cgetd    ", int_mblaze_fsl_cget>;
-def CAGETD    : FSLGetD<0x1B, 0x00, "cagetd   ", int_mblaze_fsl_caget>;
-def EGETD     : FSLGetD<0x1B, 0x00, "egetd    ", int_mblaze_fsl_eget>;
-def EAGETD    : FSLGetD<0x1B, 0x00, "eagetd   ", int_mblaze_fsl_eaget>;
-def ECGETD    : FSLGetD<0x1B, 0x00, "ecgetd   ", int_mblaze_fsl_ecget>;
-def ECAGETD   : FSLGetD<0x1B, 0x00, "ecagetd  ", int_mblaze_fsl_ecaget>;
-def NGETD     : FSLGetD<0x1B, 0x00, "ngetd    ", int_mblaze_fsl_nget>;
-def NAGETD    : FSLGetD<0x1B, 0x00, "nagetd   ", int_mblaze_fsl_naget>;
-def NCGETD    : FSLGetD<0x1B, 0x00, "ncgetd   ", int_mblaze_fsl_ncget>;
-def NCAGETD   : FSLGetD<0x1B, 0x00, "ncagetd  ", int_mblaze_fsl_ncaget>;
-def NEGETD    : FSLGetD<0x1B, 0x00, "negetd   ", int_mblaze_fsl_neget>;
-def NEAGETD   : FSLGetD<0x1B, 0x00, "neagetd  ", int_mblaze_fsl_neaget>;
-def NECGETD   : FSLGetD<0x1B, 0x00, "necgetd  ", int_mblaze_fsl_necget>;
-def NECAGETD  : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>;
-def TGETD     : FSLGetD<0x1B, 0x00, "tgetd    ", int_mblaze_fsl_tget>;
-def TAGETD    : FSLGetD<0x1B, 0x00, "tagetd   ", int_mblaze_fsl_taget>;
-def TCGETD    : FSLGetD<0x1B, 0x00, "tcgetd   ", int_mblaze_fsl_tcget>;
-def TCAGETD   : FSLGetD<0x1B, 0x00, "tcagetd  ", int_mblaze_fsl_tcaget>;
-def TEGETD    : FSLGetD<0x1B, 0x00, "tegetd   ", int_mblaze_fsl_teget>;
-def TEAGETD   : FSLGetD<0x1B, 0x00, "teagetd  ", int_mblaze_fsl_teaget>;
-def TECGETD   : FSLGetD<0x1B, 0x00, "tecgetd  ", int_mblaze_fsl_tecget>;
-def TECAGETD  : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>;
-def TNGETD    : FSLGetD<0x1B, 0x00, "tngetd   ", int_mblaze_fsl_tnget>;
-def TNAGETD   : FSLGetD<0x1B, 0x00, "tnagetd  ", int_mblaze_fsl_tnaget>;
-def TNCGETD   : FSLGetD<0x1B, 0x00, "tncgetd  ", int_mblaze_fsl_tncget>;
-def TNCAGETD  : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>;
-def TNEGETD   : FSLGetD<0x1B, 0x00, "tnegetd  ", int_mblaze_fsl_tneget>;
-def TNEAGETD  : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>;
-def TNECGETD  : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>;
-def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>;
+def GETD      : FSLGetD<0x13, 0x00, "getd     ", int_mblaze_fsl_get>;
+def AGETD     : FSLGetD<0x13, 0x02, "agetd    ", int_mblaze_fsl_aget>;
+def CGETD     : FSLGetD<0x13, 0x08, "cgetd    ", int_mblaze_fsl_cget>;
+def CAGETD    : FSLGetD<0x13, 0x0A, "cagetd   ", int_mblaze_fsl_caget>;
+def EGETD     : FSLGetD<0x13, 0x01, "egetd    ", int_mblaze_fsl_eget>;
+def EAGETD    : FSLGetD<0x13, 0x03, "eagetd   ", int_mblaze_fsl_eaget>;
+def ECGETD    : FSLGetD<0x13, 0x09, "ecgetd   ", int_mblaze_fsl_ecget>;
+def ECAGETD   : FSLGetD<0x13, 0x0B, "ecagetd  ", int_mblaze_fsl_ecaget>;
+def NGETD     : FSLGetD<0x13, 0x10, "ngetd    ", int_mblaze_fsl_nget>;
+def NAGETD    : FSLGetD<0x13, 0x12, "nagetd   ", int_mblaze_fsl_naget>;
+def NCGETD    : FSLGetD<0x13, 0x18, "ncgetd   ", int_mblaze_fsl_ncget>;
+def NCAGETD   : FSLGetD<0x13, 0x1A, "ncagetd  ", int_mblaze_fsl_ncaget>;
+def NEGETD    : FSLGetD<0x13, 0x11, "negetd   ", int_mblaze_fsl_neget>;
+def NEAGETD   : FSLGetD<0x13, 0x13, "neagetd  ", int_mblaze_fsl_neaget>;
+def NECGETD   : FSLGetD<0x13, 0x19, "necgetd  ", int_mblaze_fsl_necget>;
+def NECAGETD  : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+def TGETD     : FSLGetD<0x13, 0x04, "tgetd    ", int_mblaze_fsl_tget>;
+def TAGETD    : FSLGetD<0x13, 0x06, "tagetd   ", int_mblaze_fsl_taget>;
+def TCGETD    : FSLGetD<0x13, 0x0C, "tcgetd   ", int_mblaze_fsl_tcget>;
+def TCAGETD   : FSLGetD<0x13, 0x0E, "tcagetd  ", int_mblaze_fsl_tcaget>;
+def TEGETD    : FSLGetD<0x13, 0x05, "tegetd   ", int_mblaze_fsl_teget>;
+def TEAGETD   : FSLGetD<0x13, 0x07, "teagetd  ", int_mblaze_fsl_teaget>;
+def TECGETD   : FSLGetD<0x13, 0x0D, "tecgetd  ", int_mblaze_fsl_tecget>;
+def TECAGETD  : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+def TNGETD    : FSLGetD<0x13, 0x14, "tngetd   ", int_mblaze_fsl_tnget>;
+def TNAGETD   : FSLGetD<0x13, 0x16, "tnagetd  ", int_mblaze_fsl_tnaget>;
+def TNCGETD   : FSLGetD<0x13, 0x1C, "tncgetd  ", int_mblaze_fsl_tncget>;
+def TNCAGETD  : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+def TNEGETD   : FSLGetD<0x13, 0x15, "tnegetd  ", int_mblaze_fsl_tneget>;
+def TNEAGETD  : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+def TNECGETD  : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
 
 //===----------------------------------------------------------------------===//
 // FSL Put Instructions
 //===----------------------------------------------------------------------===//
-def PUT     :  FSLPut<0x1B, "put      ", int_mblaze_fsl_put>;
-def APUT    :  FSLPut<0x1B, "aput     ", int_mblaze_fsl_aput>;
-def CPUT    :  FSLPut<0x1B, "cput     ", int_mblaze_fsl_cput>;
-def CAPUT   :  FSLPut<0x1B, "caput    ", int_mblaze_fsl_caput>;
-def NPUT    :  FSLPut<0x1B, "nput     ", int_mblaze_fsl_nput>;
-def NAPUT   :  FSLPut<0x1B, "naput    ", int_mblaze_fsl_naput>;
-def NCPUT   :  FSLPut<0x1B, "ncput    ", int_mblaze_fsl_ncput>;
-def NCAPUT  :  FSLPut<0x1B, "ncaput   ", int_mblaze_fsl_ncaput>;
-def TPUT    : FSLPutT<0x1B, "tput     ", int_mblaze_fsl_tput>;
-def TAPUT   : FSLPutT<0x1B, "taput    ", int_mblaze_fsl_taput>;
-def TCPUT   : FSLPutT<0x1B, "tcput    ", int_mblaze_fsl_tcput>;
-def TCAPUT  : FSLPutT<0x1B, "tcaput   ", int_mblaze_fsl_tcaput>;
-def TNPUT   : FSLPutT<0x1B, "tnput    ", int_mblaze_fsl_tnput>;
-def TNAPUT  : FSLPutT<0x1B, "tnaput   ", int_mblaze_fsl_tnaput>;
-def TNCPUT  : FSLPutT<0x1B, "tncput   ", int_mblaze_fsl_tncput>;
-def TNCAPUT : FSLPutT<0x1B, "tncaput  ", int_mblaze_fsl_tncaput>;
+def PUT     :  FSLPut<0x1B, 0x0, "put      ", int_mblaze_fsl_put>;
+def APUT    :  FSLPut<0x1B, 0x1, "aput     ", int_mblaze_fsl_aput>;
+def CPUT    :  FSLPut<0x1B, 0x4, "cput     ", int_mblaze_fsl_cput>;
+def CAPUT   :  FSLPut<0x1B, 0x5, "caput    ", int_mblaze_fsl_caput>;
+def NPUT    :  FSLPut<0x1B, 0x8, "nput     ", int_mblaze_fsl_nput>;
+def NAPUT   :  FSLPut<0x1B, 0x9, "naput    ", int_mblaze_fsl_naput>;
+def NCPUT   :  FSLPut<0x1B, 0xC, "ncput    ", int_mblaze_fsl_ncput>;
+def NCAPUT  :  FSLPut<0x1B, 0xD, "ncaput   ", int_mblaze_fsl_ncaput>;
+def TPUT    : FSLPutT<0x1B, 0x2, "tput     ", int_mblaze_fsl_tput>;
+def TAPUT   : FSLPutT<0x1B, 0x3, "taput    ", int_mblaze_fsl_taput>;
+def TCPUT   : FSLPutT<0x1B, 0x6, "tcput    ", int_mblaze_fsl_tcput>;
+def TCAPUT  : FSLPutT<0x1B, 0x7, "tcaput   ", int_mblaze_fsl_tcaput>;
+def TNPUT   : FSLPutT<0x1B, 0xA, "tnput    ", int_mblaze_fsl_tnput>;
+def TNAPUT  : FSLPutT<0x1B, 0xB, "tnaput   ", int_mblaze_fsl_tnaput>;
+def TNCPUT  : FSLPutT<0x1B, 0xE, "tncput   ", int_mblaze_fsl_tncput>;
+def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput  ", int_mblaze_fsl_tncaput>;
 
 //===----------------------------------------------------------------------===//
 // FSL Dynamic Put Instructions
 //===----------------------------------------------------------------------===//
-def PUTD     :  FSLPutD<0x1B, 0x00, "putd     ", int_mblaze_fsl_put>;
-def APUTD    :  FSLPutD<0x1B, 0x00, "aputd    ", int_mblaze_fsl_aput>;
-def CPUTD    :  FSLPutD<0x1B, 0x00, "cputd    ", int_mblaze_fsl_cput>;
-def CAPUTD   :  FSLPutD<0x1B, 0x00, "caputd   ", int_mblaze_fsl_caput>;
-def NPUTD    :  FSLPutD<0x1B, 0x00, "nputd    ", int_mblaze_fsl_nput>;
-def NAPUTD   :  FSLPutD<0x1B, 0x00, "naputd   ", int_mblaze_fsl_naput>;
-def NCPUTD   :  FSLPutD<0x1B, 0x00, "ncputd   ", int_mblaze_fsl_ncput>;
-def NCAPUTD  :  FSLPutD<0x1B, 0x00, "ncaputd  ", int_mblaze_fsl_ncaput>;
-def TPUTD    : FSLPutTD<0x1B, 0x00, "tputd    ", int_mblaze_fsl_tput>;
-def TAPUTD   : FSLPutTD<0x1B, 0x00, "taputd   ", int_mblaze_fsl_taput>;
-def TCPUTD   : FSLPutTD<0x1B, 0x00, "tcputd   ", int_mblaze_fsl_tcput>;
-def TCAPUTD  : FSLPutTD<0x1B, 0x00, "tcaputd  ", int_mblaze_fsl_tcaput>;
-def TNPUTD   : FSLPutTD<0x1B, 0x00, "tnputd   ", int_mblaze_fsl_tnput>;
-def TNAPUTD  : FSLPutTD<0x1B, 0x00, "tnaputd  ", int_mblaze_fsl_tnaput>;
-def TNCPUTD  : FSLPutTD<0x1B, 0x00, "tncputd  ", int_mblaze_fsl_tncput>;
-def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>;
+def PUTD     :  FSLPutD<0x13, 0x0, "putd     ", int_mblaze_fsl_put>;
+def APUTD    :  FSLPutD<0x13, 0x1, "aputd    ", int_mblaze_fsl_aput>;
+def CPUTD    :  FSLPutD<0x13, 0x4, "cputd    ", int_mblaze_fsl_cput>;
+def CAPUTD   :  FSLPutD<0x13, 0x5, "caputd   ", int_mblaze_fsl_caput>;
+def NPUTD    :  FSLPutD<0x13, 0x8, "nputd    ", int_mblaze_fsl_nput>;
+def NAPUTD   :  FSLPutD<0x13, 0x9, "naputd   ", int_mblaze_fsl_naput>;
+def NCPUTD   :  FSLPutD<0x13, 0xC, "ncputd   ", int_mblaze_fsl_ncput>;
+def NCAPUTD  :  FSLPutD<0x13, 0xD, "ncaputd  ", int_mblaze_fsl_ncaput>;
+def TPUTD    : FSLPutTD<0x13, 0x2, "tputd    ", int_mblaze_fsl_tput>;
+def TAPUTD   : FSLPutTD<0x13, 0x3, "taputd   ", int_mblaze_fsl_taput>;
+def TCPUTD   : FSLPutTD<0x13, 0x6, "tcputd   ", int_mblaze_fsl_tcput>;
+def TCAPUTD  : FSLPutTD<0x13, 0x7, "tcaputd  ", int_mblaze_fsl_tcaput>;
+def TNPUTD   : FSLPutTD<0x13, 0xA, "tnputd   ", int_mblaze_fsl_tnput>;
+def TNAPUTD  : FSLPutTD<0x13, 0xB, "tnaputd  ", int_mblaze_fsl_tnaput>;
+def TNCPUTD  : FSLPutTD<0x13, 0xE, "tncputd  ", int_mblaze_fsl_tncput>;
+def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFormats.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFormats.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrFormats.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,6 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+      bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR    : Format<1>;
+def FRRI    : Format<2>;
+def FRIR    : Format<3>;
+def FFSL    : Format<4>;
+def FFSLD   : Format<5>;
+def FFSLT   : Format<6>;
+def FFSLTD  : Format<7>;
+def FR      : Format<8>;
+def FI      : Format<9>;
+def FRR     : Format<10>;
+def FRI     : Format<11>;
+
 //===----------------------------------------------------------------------===//
 //  Describe MBlaze instructions format
 //
@@ -21,14 +41,15 @@
 //===----------------------------------------------------------------------===//
 
 // Generic MBlaze Format
-class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern, 
-               InstrItinClass itin> : Instruction 
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr, 
+                 list<dag> pattern, InstrItinClass itin> : Instruction 
 {
-  field bits<32> Inst;
-
   let Namespace = "MBlaze";
+  field bits<32> Inst;
 
-  bits<6> opcode;
+  bits<6> opcode = op;
+  Format Form = form;
+  bits<6> FormBits = Form.Value;
 
   // Top 6 bits are the 'opcode' field
   let Inst{0-5} = opcode;   
@@ -39,13 +60,16 @@
   let AsmString   = asmstr;
   let Pattern     = pattern;
   let Itinerary   = itin;
+
+  // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+  let TSFlags{5-0}   = FormBits;
 }
 
 //===----------------------------------------------------------------------===//
 // Pseudo instruction class
 //===----------------------------------------------------------------------===//
 class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
-      MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>;
+      MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>;
 
 //===----------------------------------------------------------------------===//
 // Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
@@ -53,194 +77,49 @@
 
 class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
          list<dag> pattern, InstrItinClass itin> : 
-         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+         MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin> 
 {
   bits<5> rd;
   bits<5> ra;
   bits<5> rb;
 
-  let opcode = op;
-
   let Inst{6-10}  = rd;
   let Inst{11-15} = ra; 
   let Inst{16-20} = rb;
   let Inst{21-31} = flags;
 }
 
-class TAI<bits<6> op, dag outs, dag ins, string asmstr,
-          list<dag> pattern, InstrItinClass itin> :
-          MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> rd;
-  bits<5> ra;
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = rd;
-  let Inst{11-15} = ra; 
-  let Inst{16-31} = imm16;
-}
-
-class TIMM<bits<6> op, dag outs, dag ins, string asmstr,
-           list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-15}  = 0;
-  let Inst{16-31} = imm16;
-}
-
-class TADDR<bits<6> op, dag outs, dag ins, string asmstr,
-            list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<26> addr;
-
-  let opcode = op;
-
-  let Inst{6-31} = addr;
-}
-
 //===----------------------------------------------------------------------===//
 // Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
 //===----------------------------------------------------------------------===//
 
 class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
          InstrItinClass itin> : 
-         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+         MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> 
 {
   bits<5>  rd;
   bits<5>  ra;
   bits<16> imm16;
 
-  let opcode = op;
-
   let Inst{6-10}  = rd;
   let Inst{11-15} = ra; 
   let Inst{16-31} = imm16;
 }
 
 //===----------------------------------------------------------------------===//
-// Float instruction class in MBlaze : <|opcode|rd|ra|flags|>
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
 //===----------------------------------------------------------------------===//
-
-class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
-         list<dag> pattern, InstrItinClass itin> : 
-         MBlazeInst<outs, ins, asmstr, pattern, itin> 
-{
-  bits<5>  rd;
-  bits<5>  ra;
-
-  let opcode = op;
-
-  let Inst{6-10}  = rd;
-  let Inst{11-15} = ra; 
-  let Inst{16-20} = 0;
-  let Inst{21-31} = flags;
-}
-
-//===----------------------------------------------------------------------===//
-// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|>
-//===----------------------------------------------------------------------===//
-
-class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
-          string asmstr, list<dag> pattern, InstrItinClass itin> :
-          MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0;
-  let Inst{11-15} = br; 
-  let Inst{16-20} = ra;
-  let Inst{21-31} = flags;
-}
-
-class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
-           string asmstr, list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-  bits<5> rb;
-
-  let opcode = op;
-
-  let Inst{6-10}  = br;
-  let Inst{11-15} = ra; 
-  let Inst{16-20} = rb;
-  let Inst{21-31} = flags;
-}
-
-class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
-           string asmstr, list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0xF;
-  let Inst{11-15} = br; 
-  let Inst{16-20} = ra;
-  let Inst{21-31} = flags;
-}
-
-class TBRI<bits<6> op, bits<5> br, dag outs, dag ins,
-           string asmstr, list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0;
-  let Inst{11-15} = br; 
-  let Inst{16-31} = imm16;
-}
-
-class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins,
-            string asmstr, list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0xF;
-  let Inst{11-15} = br; 
-  let Inst{16-31} = imm16;
-}
-
-class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins,
-            string asmstr, list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = br;
-  let Inst{11-15} = ra; 
-  let Inst{16-31} = imm16;
-}
-
-class TRET<bits<6> op, dag outs, dag ins,
-            string asmstr, list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5>  ra;
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0x10;
-  let Inst{11-15} = ra; 
-  let Inst{16-31} = imm16;
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> : 
+         TB<op, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rrd;
+  bits<16> rimm16;
+  bits<5>  rra;
+
+  let Form = FRIR;
+
+  let rd = rrd;
+  let ra = rra;
+  let imm16 = rimm16;
 }

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -134,7 +134,7 @@
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
-  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
   BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
           GlobalBaseReg).addReg(MBlaze::R20);
   RegInfo.addLiveIn(MBlaze::R20);

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.h Tue Oct 26 19:48:03 2010
@@ -134,29 +134,47 @@
 /// instruction info tracks.
 ///
 namespace MBlazeII {
-  /// Target Operand Flag enum.
-  enum TOF {
+  enum {
+    // PseudoFrm - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    Pseudo         = 0,
+
+    RegRegReg      = 1,
+    RegRegImm      = 2,
+    RegImmReg      = 3,
+    FSL            = 4,
+    FSLD           = 5,
+    FSLT           = 6,
+    FSLTD          = 7,
+    Reg            = 8,
+    Imm            = 9,
+    RegReg         = 10,
+    RegImm         = 11,
+
+    FormMask       = 63
+
     //===------------------------------------------------------------------===//
     // MBlaze Specific MachineOperand flags.
-    MO_NO_FLAG,
+    // MO_NO_FLAG,
 
     /// MO_GOT - Represents the offset into the global offset table at which
     /// the address the relocation entry symbol resides during execution.
-    MO_GOT,
+    // MO_GOT,
 
     /// MO_GOT_CALL - Represents the offset into the global offset table at
     /// which the address of a call site relocation entry symbol resides
     /// during execution. This is different from the above since this flag
     /// can only be present in call instructions.
-    MO_GOT_CALL,
+    // MO_GOT_CALL,
 
     /// MO_GPREL - Represents the offset from the current gp value to be used
     /// for the relocatable object file being produced.
-    MO_GPREL,
+    // MO_GPREL,
 
     /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
     /// address.
-    MO_ABS_HILO
+    // MO_ABS_HILO
 
   };
 }

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,36 +13,33 @@
 include "MBlazeInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
-// MBlaze profiles and nodes
+// MBlaze type profiles
 //===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
 def SDT_MBlazeRet     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 
-// Call
-def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
-                               [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
 
-// Return
-def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+def MBlazeRet     : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
                            [SDNPHasChain, SDNPOptInFlag]>;
 
-// Hi and Lo nodes are used to handle global addresses. Used on 
-// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol 
-// static model.
-def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
-def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>;
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+                           [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
 
-def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
-def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
 
-// These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
                            [SDNPHasChain, SDNPOutFlag]>;
+
 def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
-def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
-
 //===----------------------------------------------------------------------===//
 // MBlaze Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
@@ -87,26 +84,15 @@
 // Address operand
 def memri : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops simm16, CPURegs);
+  let MIOperandInfo = (ops simm16, GPR);
 }
 
 def memrr : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops CPURegs, CPURegs);
+  let MIOperandInfo = (ops GPR, GPR);
 }
 
-// Transformation Function - get the lower 16 bits.
-def LO16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
-}]>;
-
-// Transformation Function - get the higher 16 bits.
-def HI16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
-}]>;
-
 // Node immediate fits as 16-bit sign extended on target immediate.
-// e.g. addi, andi
 def immSExt16  : PatLeaf<(imm), [{
   return (N->getZExtValue() >> 16) == 0;
 }]>;
@@ -117,19 +103,19 @@
 // e.g. addiu, sltiu
 def immZExt16  : PatLeaf<(imm), [{
   return (N->getZExtValue() >> 16) == 0;
-}], LO16>;
+}]>;
 
 // FSL immediate field must fit in 4 bits.
 def immZExt4 : PatLeaf<(imm), [{
-      return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+  return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
 }]>;
 
 // shamt field must fit in 5 bits.
 def immZExt5 : PatLeaf<(imm), [{
-      return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
 }]>;
 
-// MBlaze Address Mode! SDNode frameindex could possibily be a match
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
 // since load and store instructions from stack used it.
 def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
 def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
@@ -141,28 +127,14 @@
 // As stack alignment is always done with addiu, we need a 16-bit immediate
 let Defs = [R1], Uses = [R1] in {
 def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
-                                  "${:comment} ADJCALLSTACKDOWN $amt",
+                                  "#ADJCALLSTACKDOWN $amt",
                                   [(callseq_start timm:$amt)]>;
 def ADJCALLSTACKUP   : MBlazePseudo<(outs),
                                   (ins uimm16:$amt1, simm16:$amt2),
-                                  "${:comment} ADJCALLSTACKUP $amt1",
+                                  "#ADJCALLSTACKUP $amt1",
                                   [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-// Some assembly macros need to avoid pseudoinstructions and assembler
-// automatic reodering, we should reorder ourselves.
-def MACRO     : MBlazePseudo<(outs), (ins), ".set macro",     []>;
-def REORDER   : MBlazePseudo<(outs), (ins), ".set reorder",   []>;
-def NOMACRO   : MBlazePseudo<(outs), (ins), ".set nomacro",   []>;
-def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>;
-
-// When handling PIC code the assembler needs .cpload and .cprestore
-// directives. If the real instructions corresponding these directives
-// are used, we have the same behavior, but get also a bunch of warnings
-// from the assembler.
-def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>;
-def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
-
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
@@ -172,184 +144,194 @@
 //===----------------------------------------------------------------------===//
 class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
-               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
 
 class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
-               !strconcat(instr_asm, "   $dst, $c, $b"),
-               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+                !strconcat(instr_asm, "   $dst, $c, $b"),
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c),
+             TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
                  !strconcat(instr_asm, "   $dst, $c, $b"),
-                 [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>;
+                 [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>;
 
 class ArithN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
                [], itin>;
 
 class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [], IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
 
 class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
                !strconcat(instr_asm, "   $dst, $b, $c"),
                [], itin>;
 
 class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [], IIAlu>;
+             TB<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
 
 //===----------------------------------------------------------------------===//
 // Misc Arithmetic Instructions
 //===----------------------------------------------------------------------===//
 
 class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
-               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>;
 
 class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
-             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))],
-                 IIAlu>;
-
-class EffectiveAddress<string instr_asm> :
-          TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr),
-              instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+                IIAlu>;
 
 //===----------------------------------------------------------------------===//
 // Memory Access Instructions
 //===----------------------------------------------------------------------===//
 class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> :
-            TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr),
+            TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
                !strconcat(instr_asm, "   $dst, $addr"),
-               [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>;
+               [(set (i32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
 
 class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-             TAI<op, (outs CPURegs:$dst), (ins memri:$addr),
+             TBR<op, (outs GPR:$dst), (ins memri:$addr),
                  !strconcat(instr_asm, "   $dst, $addr"),
-                 [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>;
+                 [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
 
 class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> :
-             TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr),
+             TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>;
+                [(OpNode (i32 GPR:$dst), xaddr:$addr)], IIStore>;
 
 class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-              TAI<op, (outs), (ins CPURegs:$dst, memri:$addr),
+              TBR<op, (outs), (ins GPR:$dst, memri:$addr),
                   !strconcat(instr_asm, "   $dst, $addr"),
-                  [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>;
+                  [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>;
 
 //===----------------------------------------------------------------------===//
 // Branch Instructions
 //===----------------------------------------------------------------------===//
 class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
-             TBR<op, br, flags, (outs), (ins CPURegs:$target),
+             TA<op, flags, (outs), (ins GPR:$target),
                  !strconcat(instr_asm, "   $target"),
-                 [(brind CPURegs:$target)], IIBranch>;
+                 [], IIBranch> {
+  let rd = 0x0;
+  let ra = br;
+}
 
-class BranchI<bits<6> op, bits<5> brf, string instr_asm> :
-              TBRI<op, brf, (outs), (ins brtarget:$target),
-                   !strconcat(instr_asm, "   $target"),
-                   [(br bb:$target)], IIBranch>;
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+              TB<op, (outs), (ins brtarget:$target),
+                 !strconcat(instr_asm, "   $target"),
+                 [], IIBranch> {
+  let rd = 0;
+  let ra = br;
+}
 
 //===----------------------------------------------------------------------===//
 // Branch and Link Instructions
 //===----------------------------------------------------------------------===//
 class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
-              TBRL<op, br, flags, (outs), (ins CPURegs:$target),
-                   !strconcat(instr_asm, "   r15, $target"),
-                   [], IIBranch>;
+              TA<op, flags, (outs), (ins GPR:$target),
+                 !strconcat(instr_asm, "   r15, $target"),
+                 [], IIBranch> {
+  let rd = 15;
+  let ra = br;
+}
 
 class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
-               TBRLI<op, br, (outs), (ins calltarget:$target),
-                     !strconcat(instr_asm, "   r15, $target"),
-                     [], IIBranch>;
+               TB<op, (outs), (ins calltarget:$target),
+                  !strconcat(instr_asm, "   r15, $target"),
+                  [], IIBranch> {
+  let rd = 15;
+  let ra = br;
+}
 
 //===----------------------------------------------------------------------===//
 // Conditional Branch Instructions
 //===----------------------------------------------------------------------===//
 class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm,
               PatFrag cond_op> :
-              TBRC<op, br, flags, (outs),
-                   (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
-                   !strconcat(instr_asm, "   $a, $b, $offset"),
-                   [], IIBranch>; 
-                   //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
-                   //IIBranch>;
+              TA<op, flags, (outs),
+                 (ins GPR:$a, GPR:$b, brtarget:$offset),
+                 !strconcat(instr_asm, "   $a, $b, $offset"),
+                 [], IIBranch> {
+  let rd = br;
+}
 
 class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> :
-               TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset),
-                     !strconcat(instr_asm, "   $a, $offset"),
-                     [], IIBranch>;
+               TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+                  !strconcat(instr_asm, "   $a, $offset"),
+                  [], IIBranch> {
+  let rd = br;
+}
 
 //===----------------------------------------------------------------------===//
 // MBlaze arithmetic instructions
 //===----------------------------------------------------------------------===//
 
 let isCommutable = 1, isAsCheapAsAMove = 1 in {
-    def ADD    :  Arith<0x00, 0x000, "add    ", add,  IIAlu>;
-    def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
-    def ADDK   :  Arith<0x04, 0x000, "addk   ", addc, IIAlu>;
-    def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIAlu>;
-    def AND    :  Logic<0x21, 0x000, "and    ", and>;
-    def OR     :  Logic<0x20, 0x000, "or     ", or>;
-    def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
+  def ADD    :  Arith<0x00, 0x000, "add    ", add,  IIAlu>;
+  def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
+  def ADDK   :  Arith<0x04, 0x000, "addk   ", addc, IIAlu>;
+  def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIAlu>;
+  def AND    :  Logic<0x21, 0x000, "and    ", and>;
+  def OR     :  Logic<0x20, 0x000, "or     ", or>;
+  def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
 }
 
 let isAsCheapAsAMove = 1 in {
-    def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
-    def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
-    def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
-    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", sub,  IIAlu>;
-    def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
-    def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", subc, IIAlu>;
-    def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+  def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
+  def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
+  def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
+  def RSUB   :  ArithR<0x01, 0x000, "rsub   ", sub,  IIAlu>;
+  def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
+  def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", subc, IIAlu>;
+  def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
 }
 
 let isCommutable = 1, Predicates=[HasMul] in {
-    def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
+  def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
 }
 
 let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
-    def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
-    def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
+  def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
+  def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
 }
 
 let Predicates=[HasMul,HasMul64] in {
-    def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+  def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
 }
 
 let Predicates=[HasBarrel] in {
-    def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
-    def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
-    def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
-    def BSRLI  :  ArithI<0x11, "bsrli  ", srl, uimm5, immZExt5>;
-    def BSRAI  :  ArithI<0x11, "bsrai  ", sra, uimm5, immZExt5>;
-    def BSLLI  :  ArithI<0x11, "bslli  ", shl, uimm5, immZExt5>;
+  def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
+  def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
+  def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
+  def BSRLI  :  ArithI<0x11, "bsrli  ", srl, uimm5, immZExt5>;
+  def BSRAI  :  ArithI<0x11, "bsrai  ", sra, uimm5, immZExt5>;
+  def BSLLI  :  ArithI<0x11, "bslli  ", shl, uimm5, immZExt5>;
 }
 
 let Predicates=[HasDiv] in {
-    def IDIV   :  Arith<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
-    def IDIVU  :  Arith<0x12, 0x002, "idivu  ", udiv, IIAlu>;
+  def IDIV   :  Arith<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
+  def IDIVU  :  Arith<0x12, 0x002, "idivu  ", udiv, IIAlu>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -357,22 +339,22 @@
 //===----------------------------------------------------------------------===//
 
 let isAsCheapAsAMove = 1 in {
-    def ADDI    :   ArithI<0x08, "addi   ", add,  simm16, immSExt16>;
-    def ADDIC   :  ArithNI<0x0A, "addic  ", simm16, immSExt16>;
-    def ADDIK   :  ArithNI<0x0C, "addik  ", simm16, immSExt16>;
-    def ADDIKC  :   ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
-    def RSUBI   :   ArithRI<0x09, "rsubi  ", sub,  simm16, immSExt16>;
-    def RSUBIC  :  ArithRNI<0x0B, "rsubi  ", simm16, immSExt16>;
-    def RSUBIK  :  ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
-    def RSUBIKC :   ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
-    def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
-    def ANDI    :   LogicI<0x29, "andi   ", and>;
-    def ORI     :   LogicI<0x28, "ori    ", or>;
-    def XORI    :   LogicI<0x2A, "xori   ", xor>;
+  def ADDI    :   ArithI<0x08, "addi   ", add,  simm16, immSExt16>;
+  def ADDIC   :  ArithNI<0x0A, "addic  ", simm16, immSExt16>;
+  def ADDIK   :  ArithNI<0x0C, "addik  ", simm16, immSExt16>;
+  def ADDIKC  :   ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
+  def RSUBI   :  ArithRI<0x09, "rsubi  ", sub,  simm16, immSExt16>;
+  def RSUBIC  : ArithRNI<0x0B, "rsubi  ", simm16, immSExt16>;
+  def RSUBIK  : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
+  def RSUBIKC :  ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
+  def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
+  def ANDI    :   LogicI<0x29, "andi   ", and>;
+  def ORI     :   LogicI<0x28, "ori    ", or>;
+  def XORI    :   LogicI<0x2A, "xori   ", xor>;
 }
 
 let Predicates=[HasMul] in {
-    def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
+  def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -380,290 +362,306 @@
 //===----------------------------------------------------------------------===//
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-    def LBU  :  LoadM<0x30, "lbu    ", zextloadi8>;
-    def LHU  :  LoadM<0x31, "lhu    ", zextloadi16>;
-    def LW   :  LoadM<0x32, "lw     ", load>;
-
-    def LBUI : LoadMI<0x30, "lbui   ", zextloadi8>;
-    def LHUI : LoadMI<0x31, "lhui   ", zextloadi16>;
-    def LWI  : LoadMI<0x32, "lwi    ", load>;
-}
-
-    def SB  :  StoreM<0x34, "sb     ", truncstorei8>;
-    def SH  :  StoreM<0x35, "sh     ", truncstorei16>;
-    def SW  :  StoreM<0x36, "sw     ", store>;
-
-    def SBI : StoreMI<0x34, "sbi    ", truncstorei8>;
-    def SHI : StoreMI<0x35, "shi    ", truncstorei16>;
-    def SWI : StoreMI<0x36, "swi    ", store>;
+  def LBU  :  LoadM<0x30, "lbu    ", zextloadi8>;
+  def LHU  :  LoadM<0x31, "lhu    ", zextloadi16>;
+  def LW   :  LoadM<0x32, "lw     ", load>;
+
+  def LBUI : LoadMI<0x38, "lbui   ", zextloadi8>;
+  def LHUI : LoadMI<0x39, "lhui   ", zextloadi16>;
+  def LWI  : LoadMI<0x3A, "lwi    ", load>;
+}
+
+  def SB  :  StoreM<0x34, "sb     ", truncstorei8>;
+  def SH  :  StoreM<0x35, "sh     ", truncstorei16>;
+  def SW  :  StoreM<0x36, "sw     ", store>;
+
+  def SBI : StoreMI<0x3C, "sbi    ", truncstorei8>;
+  def SHI : StoreMI<0x3D, "shi    ", truncstorei16>;
+  def SWI : StoreMI<0x3E, "swi    ", store>;
 
 //===----------------------------------------------------------------------===//
 // MBlaze branch instructions
 //===----------------------------------------------------------------------===//
 
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-    def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
-    def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
-    def BEQI   : BranchCI<0x2F, 0x00, "beqi   ", seteq>;
-    def BNEI   : BranchCI<0x2F, 0x01, "bnei   ", setne>;
-    def BLTI   : BranchCI<0x2F, 0x02, "blti   ", setlt>;
-    def BLEI   : BranchCI<0x2F, 0x03, "blei   ", setle>;
-    def BGTI   : BranchCI<0x2F, 0x04, "bgti   ", setgt>;
-    def BGEI   : BranchCI<0x2F, 0x05, "bgei   ", setge>;
-}
-
-let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-    def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
-    def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
-    def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ", seteq>;
-    def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ", setne>;
-    def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ", setlt>;
-    def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ", setle>;
-    def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ", setgt>;
-    def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ", setge>;
-}
-
-let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
-    def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
-    def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
-    def BEQID  : BranchCI<0x2F, 0x10, "beqid  ", seteq>;
-    def BNEID  : BranchCI<0x2F, 0x11, "bneid  ", setne>;
-    def BLTID  : BranchCI<0x2F, 0x12, "bltid  ", setlt>;
-    def BLEID  : BranchCI<0x2F, 0x13, "bleid  ", setle>;
-    def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ", setgt>;
-    def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ", setge>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, 
+    Form = FI in {
+  def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
+  def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, Form = FRI in {
+  def BEQI   : BranchCI<0x2F, 0x00, "beqi   ", seteq>;
+  def BNEI   : BranchCI<0x2F, 0x01, "bnei   ", setne>;
+  def BLTI   : BranchCI<0x2F, 0x02, "blti   ", setlt>;
+  def BLEI   : BranchCI<0x2F, 0x03, "blei   ", setle>;
+  def BGTI   : BranchCI<0x2F, 0x04, "bgti   ", setgt>;
+  def BGEI   : BranchCI<0x2F, 0x05, "bgei   ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+    isBarrier = 1, Form = FR in {
+  def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
+  def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+    Form = FRR in {
+  def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ", seteq>;
+  def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ", setne>;
+  def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ", setlt>;
+  def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ", setle>;
+  def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ", setgt>;
+  def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ", setge>;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+    isBarrier = 1, Form = FI in {
+  def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
+  def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+    Form = FRI in {
+  def BEQID  : BranchCI<0x2F, 0x10, "beqid  ", seteq>;
+  def BNEID  : BranchCI<0x2F, 0x11, "bneid  ", setne>;
+  def BLTID  : BranchCI<0x2F, 0x12, "bltid  ", setlt>;
+  def BLEID  : BranchCI<0x2F, 0x13, "bleid  ", setle>;
+  def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ", setgt>;
+  def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, Form = FR,
+    hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
+  def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
 }
 
 let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
-    hasDelaySlot = 1, hasCtrlDep = 1 in {
-    def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
-    def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
-    def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ", seteq>;
-    def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ", setne>;
-    def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ", setlt>;
-    def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ", setle>;
-    def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ", setgt>;
-    def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ", setge>;
-}
-
-let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1,
-    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
-    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
-    def BRL    : BranchL<0x26, 0x04, 0x000, "brl    ">;
-    def BRAL   : BranchL<0x26, 0x0C, 0x000, "bral   ">;
+    hasDelaySlot = 1, hasCtrlDep = 1, Form = FRR in {
+  def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ", seteq>;
+  def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ", setne>;
+  def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ", setlt>;
+  def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ", setle>;
+  def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ", setgt>;
+  def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ", setge>;
 }
 
-let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1, Form = FI,
     Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
     Uses = [R1,R5,R6,R7,R8,R9,R10] in {
-    def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
-    def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+  def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
+  def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
 }
 
 let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+    isBarrier = 1, Form = FR,
     Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
     Uses = [R1,R5,R6,R7,R8,R9,R10] in {
-    def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
-    def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
+  def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
+  def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
 }
 
-let isReturn=1, isTerminator=1, hasDelaySlot=1,
-    isBarrier=1, hasCtrlDep=1, imm16=0x8 in {
-    def RTSD   : TRET<0x2D, (outs), (ins CPURegs:$target),
-                      "rtsd      $target, 8",
-                      [(MBlazeRet CPURegs:$target)],
-                      IIBranch>;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    hasCtrlDep=1, rd=0x10, imm16=0x8, Form=FR in {
+  def RTSD   : TB<0x2D, (outs), (ins GPR:$target),
+                  "rtsd      $target, 8",
+                  [(MBlazeRet GPR:$target)],
+                  IIBranch>;
 }
 
 //===----------------------------------------------------------------------===//
 // MBlaze misc instructions
 //===----------------------------------------------------------------------===//
 
-let addr = 0 in {
-    def NOP :  TADDR<0x00, (outs), (ins), "nop    ", [], IIAlu>;
+let neverHasSideEffects = 1 in {
+  def NOP :  MBlazeInst< 0x20, FRRR, (outs), (ins), "nop    ", [], IIAlu>;
 }
 
 let usesCustomInserter = 1 in {
-  //class PseudoSelCC<RegisterClass RC, string asmstr>:
-  //  MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr,
-  //  [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>;
-  //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">;
-
-  def Select_CC : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC),
+  def Select_CC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
     "; SELECT_CC PSEUDO!",
     []>;
 
-  def ShiftL : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$L, CPURegs:$R),
+  def ShiftL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
     "; ShiftL PSEUDO!",
     []>;
 
-  def ShiftRA : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$L, CPURegs:$R),
+  def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
     "; ShiftRA PSEUDO!",
     []>;
 
-  def ShiftRL : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$L, CPURegs:$R),
+  def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
     "; ShiftRL PSEUDO!",
     []>;
 }
 
 
 let rb = 0 in {
-    def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "sext16  $dst, $src", [], IIAlu>;
-    def SEXT8  : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "sext8   $dst, $src", [], IIAlu>;
-    def SRL    : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "srl     $dst, $src", [], IIAlu>;
-    def SRA    : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "sra     $dst, $src", [], IIAlu>;
-    def SRC    : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "src     $dst, $src", [], IIAlu>;
+  def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+                  "sext16  $dst, $src", [], IIAlu>;
+  def SEXT8  : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+                  "sext8   $dst, $src", [], IIAlu>;
+  def SRL    : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+                  "srl     $dst, $src", [], IIAlu>;
+  def SRA    : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+                  "sra     $dst, $src", [], IIAlu>;
+  def SRC    : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+                  "src     $dst, $src", [], IIAlu>;
+}
+
+let opcode=0x08 in {
+  def LEA_ADDI : TB<0x08, (outs GPR:$dst), (ins memri:$addr),
+                    "addi    $dst, ${addr:stackloc}",
+                    [(set GPR:$dst, iaddr:$addr)], IIAlu>;
 }
 
-def LEA_ADDI : EffectiveAddress<"addi    $dst, ${addr:stackloc}">;
-
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
 // Small immediates
-def : Pat<(i32 0), (ADD R0, R0)>;
-def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>;
-def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>;
+def : Pat<(i32 0), (ADD (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDI (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
 
 // Arbitrary immediates
-def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>;
+def : Pat<(i32 imm:$imm), (ADDI (i32 R0), imm:$imm)>;
 
 // In register sign extension
-def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>;
-def : Pat<(sext_inreg CPURegs:$src, i8),  (SEXT8 CPURegs:$src)>;
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8),  (SEXT8 GPR:$src)>;
 
 // Call
 def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>;
 def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>;
-def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>;
+def : Pat<(MBlazeJmpLink GPR:$dst), (BRLD GPR:$dst)>;
 
 // Shift Instructions
-def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>;
-def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>;
-def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>;
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
 
 // SET_CC operations
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 1)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 2)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 6)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMP GPR:$L, GPR:$R), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMP GPR:$L, GPR:$R), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMP GPR:$L, GPR:$R), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMP GPR:$L, GPR:$R), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMP GPR:$L, GPR:$R), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMP GPR:$L, GPR:$R), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMPU GPR:$L, GPR:$R), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMPU GPR:$L, GPR:$R), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMPU GPR:$L, GPR:$R), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+          (Select_CC (ADDI (i32 R0), 1), (ADDI (i32 R0), 0), 
+                     (CMPU GPR:$L, GPR:$R), 6)>;
 
 // SELECT operations
-def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F),
-          (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>;
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+          (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
 
 // SELECT_CC 
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), 
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$L, GPR:$R), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$L, GPR:$R), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$L, GPR:$R), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$L, GPR:$R), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$L, GPR:$R), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$L, GPR:$R), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$L, GPR:$R), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$L, GPR:$R), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$L, GPR:$R), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$L, GPR:$R), 6)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRD GPR:$T)>;
 
 // BRCOND instructions
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T),
-          (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T),
-          (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T),
-          (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T),
-          (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T),
-          (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T),
-          (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T),
-          (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T),
-          (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T),
-          (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T),
-          (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond CPURegs:$C, bb:$T),
-          (BNEID CPURegs:$C, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+          (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+          (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+          (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+          (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+          (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+          (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+          (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+          (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+          (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+          (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+          (BNEID GPR:$C, bb:$T)>;
 
 // Jump tables, global addresses, and constant pools
-def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>;
-def : Pat<(MBWrapper tjumptable:$in),  (ORI R0, tjumptable:$in)>;
-def : Pat<(MBWrapper tconstpool:$in),  (ORI R0, tconstpool:$in)>;
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in),  (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in),  (ORI (i32 R0), tconstpool:$in)>;
 
 // Misc instructions
-def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>;
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
 
 // Arithmetic with immediates
-def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>;
-def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>;
-def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>;
-
-// extended load and stores
-def : Pat<(extloadi1  iaddr:$src), (LBUI iaddr:$src)>;
-def : Pat<(extloadi8  iaddr:$src), (LBUI iaddr:$src)>;
-def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>;
-def : Pat<(extloadi1  xaddr:$src), (LBU  xaddr:$src)>;
-def : Pat<(extloadi8  xaddr:$src), (LBU  xaddr:$src)>;
-def : Pat<(extloadi16 xaddr:$src), (LHU  xaddr:$src)>;
-
-def : Pat<(sextloadi1  iaddr:$src), (SEXT8  (LBUI iaddr:$src))>;
-def : Pat<(sextloadi8  iaddr:$src), (SEXT8  (LBUI iaddr:$src))>;
-def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>;
-def : Pat<(sextloadi1  xaddr:$src), (SEXT8  (LBU xaddr:$src))>;
-def : Pat<(sextloadi8  xaddr:$src), (SEXT8  (LBU xaddr:$src))>;
-def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>;
+def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDI GPR:$in, imm:$imm)>;
+def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>;
+def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8  iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8  xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
 
-// peepholes
-def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>;
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Support

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeIntrinsics.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeIntrinsics.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeIntrinsics.td Tue Oct 26 19:48:03 2010
@@ -17,17 +17,11 @@
 
 // MBlaze intrinsic classes.
 let TargetPrefix = "mblaze", isTarget = 1 in { 
-  class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
-                                        [llvm_i32_ty],
-                                        [IntrWriteMem]>;
-
-  class MBFSL_Put_Intrinsic : Intrinsic<[],
-                                        [llvm_i32_ty, llvm_i32_ty],
-                                        [IntrWriteMem]>;
-
-  class MBFSL_PutT_Intrinsic : Intrinsic<[],
-                                         [llvm_i32_ty],
-                                         [IntrWriteMem]>;
+  class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+  class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+
+  class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -48,38 +48,38 @@
 /// MBlaze::R0, return the number that it corresponds to (e.g. 0).
 unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   switch (RegEnum) {
-    case MBlaze::R0  : case MBlaze::F0  : return 0;
-    case MBlaze::R1  : case MBlaze::F1  : return 1;
-    case MBlaze::R2  : case MBlaze::F2  : return 2;
-    case MBlaze::R3  : case MBlaze::F3  : return 3;
-    case MBlaze::R4  : case MBlaze::F4  : return 4;
-    case MBlaze::R5  : case MBlaze::F5  : return 5;
-    case MBlaze::R6  : case MBlaze::F6  : return 6;
-    case MBlaze::R7  : case MBlaze::F7  : return 7;
-    case MBlaze::R8  : case MBlaze::F8  : return 8;
-    case MBlaze::R9  : case MBlaze::F9  : return 9;
-    case MBlaze::R10 : case MBlaze::F10 : return 10;
-    case MBlaze::R11 : case MBlaze::F11 : return 11;
-    case MBlaze::R12 : case MBlaze::F12 : return 12;
-    case MBlaze::R13 : case MBlaze::F13 : return 13;
-    case MBlaze::R14 : case MBlaze::F14 : return 14;
-    case MBlaze::R15 : case MBlaze::F15 : return 15;
-    case MBlaze::R16 : case MBlaze::F16 : return 16;
-    case MBlaze::R17 : case MBlaze::F17 : return 17;
-    case MBlaze::R18 : case MBlaze::F18 : return 18;
-    case MBlaze::R19 : case MBlaze::F19 : return 19;
-    case MBlaze::R20 : case MBlaze::F20 : return 20;
-    case MBlaze::R21 : case MBlaze::F21 : return 21;
-    case MBlaze::R22 : case MBlaze::F22 : return 22;
-    case MBlaze::R23 : case MBlaze::F23 : return 23;
-    case MBlaze::R24 : case MBlaze::F24 : return 24;
-    case MBlaze::R25 : case MBlaze::F25 : return 25;
-    case MBlaze::R26 : case MBlaze::F26 : return 26;
-    case MBlaze::R27 : case MBlaze::F27 : return 27;
-    case MBlaze::R28 : case MBlaze::F28 : return 28;
-    case MBlaze::R29 : case MBlaze::F29 : return 29;
-    case MBlaze::R30 : case MBlaze::F30 : return 30;
-    case MBlaze::R31 : case MBlaze::F31 : return 31;
+    case MBlaze::R0  : return 0;
+    case MBlaze::R1  : return 1;
+    case MBlaze::R2  : return 2;
+    case MBlaze::R3  : return 3;
+    case MBlaze::R4  : return 4;
+    case MBlaze::R5  : return 5;
+    case MBlaze::R6  : return 6;
+    case MBlaze::R7  : return 7;
+    case MBlaze::R8  : return 8;
+    case MBlaze::R9  : return 9;
+    case MBlaze::R10 : return 10;
+    case MBlaze::R11 : return 11;
+    case MBlaze::R12 : return 12;
+    case MBlaze::R13 : return 13;
+    case MBlaze::R14 : return 14;
+    case MBlaze::R15 : return 15;
+    case MBlaze::R16 : return 16;
+    case MBlaze::R17 : return 17;
+    case MBlaze::R18 : return 18;
+    case MBlaze::R19 : return 19;
+    case MBlaze::R20 : return 20;
+    case MBlaze::R21 : return 21;
+    case MBlaze::R22 : return 22;
+    case MBlaze::R23 : return 23;
+    case MBlaze::R24 : return 24;
+    case MBlaze::R25 : return 25;
+    case MBlaze::R26 : return 26;
+    case MBlaze::R27 : return 27;
+    case MBlaze::R28 : return 28;
+    case MBlaze::R29 : return 29;
+    case MBlaze::R30 : return 30;
+    case MBlaze::R31 : return 31;
     default: llvm_unreachable("Unknown register number!");
   }
   return 0; // Not reached
@@ -242,9 +242,9 @@
 // FrameIndex represent objects inside a abstract stack.
 // We must replace FrameIndex with an stack/frame pointer
 // direct reference.
-unsigned MBlazeRegisterInfo::
+void MBlazeRegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                    FrameIndexValue *Value, RegScavenger *RS) const {
+                    RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
 
@@ -277,7 +277,6 @@
 
   MI.getOperand(oi).ChangeToImmediate(Offset);
   MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
-  return 0;
 }
 
 void MBlazeRegisterInfo::

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -63,9 +63,8 @@
                                      MachineBasicBlock::iterator I) const;
 
   /// Stack Frame Processing Methods
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeRegisterInfo.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,10 +23,12 @@
 }
 
 // MBlaze 32-bit (aliased) FPU Registers
+/*
 class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
   let Num = num;
   let Aliases = aliases;
 }
+*/
 
 //===----------------------------------------------------------------------===//
 //  Registers
@@ -69,45 +71,47 @@
   def R31 : MBlazeGPRReg< 31, "r31">,  DwarfRegNum<[31]>;
 
   /// MBlaze Single point precision FPU Registers
-  def F0  : FPR< 0,  "r0", [R0]>,  DwarfRegNum<[32]>;
-  def F1  : FPR< 1,  "r1", [R1]>,  DwarfRegNum<[33]>;
-  def F2  : FPR< 2,  "r2", [R2]>,  DwarfRegNum<[34]>;
-  def F3  : FPR< 3,  "r3", [R3]>,  DwarfRegNum<[35]>;
-  def F4  : FPR< 4,  "r4", [R4]>,  DwarfRegNum<[36]>;
-  def F5  : FPR< 5,  "r5", [R5]>,  DwarfRegNum<[37]>;
-  def F6  : FPR< 6,  "r6", [R6]>,  DwarfRegNum<[38]>;
-  def F7  : FPR< 7,  "r7", [R7]>,  DwarfRegNum<[39]>;
-  def F8  : FPR< 8,  "r8", [R8]>,  DwarfRegNum<[40]>;
-  def F9  : FPR< 9,  "r9", [R9]>,  DwarfRegNum<[41]>;
-  def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>;
-  def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>;
-  def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>;
-  def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>;
-  def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>;
-  def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>;
-  def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>;
-  def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>;
-  def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>;
-  def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>;
-  def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>;
-  def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>;
-  def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>;
-  def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>;
-  def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>;
-  def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>;
-  def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>;
-  def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>;
-  def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>;
-  def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>;
-  def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>;
-  def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>;
+  /*
+  def F0  : FPR< 0,  "f0", [R0]>,  DwarfRegNum<[32]>;
+  def F1  : FPR< 1,  "f1", [R1]>,  DwarfRegNum<[33]>;
+  def F2  : FPR< 2,  "f2", [R2]>,  DwarfRegNum<[34]>;
+  def F3  : FPR< 3,  "f3", [R3]>,  DwarfRegNum<[35]>;
+  def F4  : FPR< 4,  "f4", [R4]>,  DwarfRegNum<[36]>;
+  def F5  : FPR< 5,  "f5", [R5]>,  DwarfRegNum<[37]>;
+  def F6  : FPR< 6,  "f6", [R6]>,  DwarfRegNum<[38]>;
+  def F7  : FPR< 7,  "f7", [R7]>,  DwarfRegNum<[39]>;
+  def F8  : FPR< 8,  "f8", [R8]>,  DwarfRegNum<[40]>;
+  def F9  : FPR< 9,  "f9", [R9]>,  DwarfRegNum<[41]>;
+  def F10 : FPR<10, "f10", [R10]>, DwarfRegNum<[42]>;
+  def F11 : FPR<11, "f11", [R11]>, DwarfRegNum<[43]>;
+  def F12 : FPR<12, "f12", [R12]>, DwarfRegNum<[44]>;
+  def F13 : FPR<13, "f13", [R13]>, DwarfRegNum<[45]>;
+  def F14 : FPR<14, "f14", [R14]>, DwarfRegNum<[46]>;
+  def F15 : FPR<15, "f15", [R15]>, DwarfRegNum<[47]>;
+  def F16 : FPR<16, "f16", [R16]>, DwarfRegNum<[48]>;
+  def F17 : FPR<17, "f17", [R17]>, DwarfRegNum<[49]>;
+  def F18 : FPR<18, "f18", [R18]>, DwarfRegNum<[50]>;
+  def F19 : FPR<19, "f19", [R19]>, DwarfRegNum<[51]>;
+  def F20 : FPR<20, "f20", [R20]>, DwarfRegNum<[52]>;
+  def F21 : FPR<21, "f21", [R21]>, DwarfRegNum<[53]>;
+  def F22 : FPR<22, "f22", [R22]>, DwarfRegNum<[54]>;
+  def F23 : FPR<23, "f23", [R23]>, DwarfRegNum<[55]>;
+  def F24 : FPR<24, "f24", [R24]>, DwarfRegNum<[56]>;
+  def F25 : FPR<25, "f25", [R25]>, DwarfRegNum<[57]>;
+  def F26 : FPR<26, "f26", [R26]>, DwarfRegNum<[58]>;
+  def F27 : FPR<27, "f27", [R27]>, DwarfRegNum<[59]>;
+  def F28 : FPR<28, "f28", [R28]>, DwarfRegNum<[60]>;
+  def F29 : FPR<29, "f29", [R29]>, DwarfRegNum<[61]>;
+  def F30 : FPR<30, "f30", [R30]>, DwarfRegNum<[62]>;
+  def F31 : FPR<31, "f31", [R31]>, DwarfRegNum<[63]>;
+  */
 }
 
 //===----------------------------------------------------------------------===//
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def CPURegs : RegisterClass<"MBlaze", [i32], 32,
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32,
   [
   // Return Values and Arguments
   R3, R4, R5, R6, R7, R8, R9, R10,
@@ -135,14 +139,15 @@
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    CPURegsClass::iterator
-    CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+    GPRClass::iterator
+    GPRClass::allocation_order_end(const MachineFunction &MF) const {
       // The last 10 registers on the list above are reserved
       return end()-10;
     }
   }];
 }
 
+/*
 def FGR32 : RegisterClass<"MBlaze", [f32], 32,
   [
   // Return Values and Arguments
@@ -178,3 +183,4 @@
     }
   }];
 }
+*/

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeSchedule.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeSchedule.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeSchedule.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -41,7 +41,7 @@
 // MBlaze Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
 def MBlazeGenericItineraries : ProcessorItineraries<
-  [ALU, IMULDIV], [
+  [ALU, IMULDIV], [], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.cpp Tue Oct 26 19:48:03 2010
@@ -15,13 +15,51 @@
 #include "MBlazeMCAsmInfo.h"
 #include "MBlazeTargetMachine.h"
 #include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+    return NULL;
+  case Triple::MinGW32:
+  case Triple::MinGW64:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    llvm_unreachable("ARM does not support Windows COFF format");
+    return NULL;
+  default:
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+  }
+}
+
+
 extern "C" void LLVMInitializeMBlazeTarget() {
   // Register the target.
   RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
   RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget);
+
+  // Register the MC code emitter
+  TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
+                                      llvm::createMBlazeMCCodeEmitter);
+  
+  // Register the asm backend
+  TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
+                                     createMBlazeAsmBackend);
+
+  // Register the object streamer
+  TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
+                                         createMCStreamer);
+
 }
 
 // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -35,11 +73,10 @@
                     const std::string &FS):
   LLVMTargetMachine(T, TT),
   Subtarget(TT, FS),
-  DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-"
-             "f64:32:32-v64:32:32-v128:32:32-n32"),
+  DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
   InstrInfo(*this),
   FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
-  TLInfo(*this), TSInfo(*this) {
+  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) {
   if (getRelocationModel() == Reloc::Default) {
       setRelocationModel(Reloc::Static);
   }

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.h (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/MBlazeTargetMachine.h Tue Oct 26 19:48:03 2010
@@ -19,6 +19,8 @@
 #include "MBlazeISelLowering.h"
 #include "MBlazeSelectionDAGInfo.h"
 #include "MBlazeIntrinsicInfo.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -34,6 +36,7 @@
     MBlazeTargetLowering  TLInfo;
     MBlazeSelectionDAGInfo TSInfo;
     MBlazeIntrinsicInfo IntrinsicInfo;
+    MBlazeELFWriterInfo    ELFWriterInfo;
   public:
     MBlazeTargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS);
@@ -62,6 +65,10 @@
     const TargetIntrinsicInfo *getIntrinsicInfo() const
     { return &IntrinsicInfo; }
 
+    virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+      return &ELFWriterInfo;
+    }
+
     // Pass Pipeline Configuration
     virtual bool addInstSelector(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel);

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/Makefile?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/Makefile (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/Makefile Tue Oct 26 19:48:03 2010
@@ -14,10 +14,11 @@
 BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
                 MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
                 MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
-                MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \
+                MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
+		MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
                 MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = InstPrinter AsmParser TargetInfo
 
 include $(LEVEL)/Makefile.common
 

Modified: llvm/branches/wendling/eh/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MBlaze/TargetInfo/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MBlaze/TargetInfo/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/MBlaze/TargetInfo/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -1,4 +1,5 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMMBlazeInfo
   MBlazeTargetInfo.cpp

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -20,6 +20,6 @@
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
   MSP430SelectionDAGInfo.cpp
+  MSP430AsmPrinter.cpp
+  MSP430MCInstLower.cpp
   )
-
-target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430.td (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430.td Tue Oct 26 19:48:03 2010
@@ -52,6 +52,7 @@
 
 def MSP430InstPrinter : AsmWriter {
   string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430BranchSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430BranchSelector.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430BranchSelector.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430BranchSelector.cpp Tue Oct 26 19:48:03 2010
@@ -10,7 +10,7 @@
 // This file contains a pass that scans a machine function to determine which
 // conditional branches need more than 10 bits of displacement to reach their
 // target basic block.  It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass.  This
+// positions pass, and a branch pseudo op to machine branch opcode pass.  This
 // pass should be run last, just before the assembly printer.
 //
 //===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@
 namespace {
   struct MSP430BSel : public MachineFunctionPass {
     static char ID;
-    MSP430BSel() : MachineFunctionPass(&ID) {}
+    MSP430BSel() : MachineFunctionPass(ID) {}
 
     /// BlockSizes - The sizes of the basic blocks in the function.
     std::vector<unsigned> BlockSizes;

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -60,15 +60,6 @@
       return GV != 0 || CP != 0 || ES != 0 || JT != -1;
     }
 
-    bool hasBaseReg() const {
-      return Base.Reg.getNode() != 0;
-    }
-
-    void setBaseReg(SDValue Reg) {
-      BaseType = RegBase;
-      Base.Reg = Reg;
-    }
-
     void dump() {
       errs() << "MSP430ISelAddressMode " << this << '\n';
       if (BaseType == RegBase && Base.Reg.getNode() != 0) {
@@ -129,7 +120,7 @@
     SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
                                unsigned Opc8, unsigned Opc16);
 
-    bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
   };
 }  // end anonymous namespace
 
@@ -254,7 +245,7 @@
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
-bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
                                     SDValue &Base, SDValue &Disp) {
   MSP430ISelAddressMode AM;
 
@@ -272,7 +263,7 @@
     AM.Base.Reg;
 
   if (AM.GV)
-    Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(),
+    Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
                                           MVT::i16, AM.Disp,
                                           0/*AM.SymbolFlags*/);
   else if (AM.CP)
@@ -298,7 +289,7 @@
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-    if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+    if (!SelectAddr(Op, Op0, Op1))
       return true;
     break;
   }

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430ISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -376,7 +376,7 @@
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
       InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -507,8 +507,7 @@
 
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(),
-                                         VA.getLocMemOffset(), false, false, 0));
+                                         MachinePointerInfo(),false, false, 0));
     }
   }
 
@@ -914,13 +913,13 @@
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0, false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -934,7 +933,8 @@
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                                          MSP430::FPW, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
                             false, false, 0);
   return FrameAddr;
 }

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430InstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430InstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430InstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -40,8 +40,9 @@
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, 0,
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
 
@@ -68,8 +69,9 @@
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOLoad, 0,
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
 

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -163,10 +163,9 @@
   MBB.erase(I);
 }
 
-unsigned
+void
 MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                        int SPAdj, FrameIndexValue *Value,
-                                        RegScavenger *RS) const {
+                                        int SPAdj, RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -204,7 +203,7 @@
     MI.getOperand(i).ChangeToRegister(BasePtr, false);
 
     if (Offset == 0)
-      return 0;
+      return;
 
     // We need to materialize the offset via add instruction.
     unsigned DstReg = MI.getOperand(0).getReg();
@@ -215,12 +214,11 @@
       BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
         .addReg(DstReg).addImm(Offset);
 
-    return 0;
+    return;
   }
 
   MI.getOperand(i).ChangeToRegister(BasePtr, false);
   MI.getOperand(i+1).ChangeToImmediate(Offset);
-  return 0;
 }
 
 void

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/MSP430RegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -46,9 +46,8 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/Makefile?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/Makefile (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/Makefile Tue Oct 26 19:48:03 2010
@@ -18,7 +18,7 @@
 		MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
 		MSP430GenSubtarget.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = InstPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
 

Modified: llvm/branches/wendling/eh/lib/Target/MSP430/TargetInfo/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/MSP430/TargetInfo/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/MSP430/TargetInfo/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/MSP430/TargetInfo/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -4,4 +4,4 @@
   MSP430TargetInfo.cpp
   )
 
-add_dependencies(LLVMMSP430Info MSP430Table_gen)
+add_dependencies(LLVMMSP430Info MSP430CodeGenTable_gen)

Modified: llvm/branches/wendling/eh/lib/Target/Mangler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mangler.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mangler.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Mangler.cpp Tue Oct 26 19:48:03 2010
@@ -180,7 +180,8 @@
   ManglerPrefixTy PrefixTy = Mangler::Default;
   if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
     PrefixTy = Mangler::Private;
-  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
+  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+           GV->hasLinkerPrivateWeakDefAutoLinkage())
     PrefixTy = Mangler::LinkerPrivate;
   
   // If this global has a name, handle it simply.

Modified: llvm/branches/wendling/eh/lib/Target/Mips/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -22,5 +22,3 @@
   MipsTargetObjectFile.cpp
   MipsSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/Mips/Mips.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/Mips.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/Mips.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/Mips.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsCallingConv.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsCallingConv.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsCallingConv.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
 // 
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsDelaySlotFiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsDelaySlotFiller.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsDelaySlotFiller.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsDelaySlotFiller.cpp Tue Oct 26 19:48:03 2010
@@ -32,7 +32,7 @@
 
     static char ID;
     Filler(TargetMachine &tm) 
-      : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
 
     virtual const char *getPassName() const {
       return "Mips Delay Slot Filler";

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -84,8 +84,7 @@
   SDNode *Select(SDNode *N);
 
   // Complex Pattern.
-  bool SelectAddr(SDNode *Op, SDValue N, 
-                  SDValue &Base, SDValue &Offset);
+  bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
 
   SDNode *SelectLoadFp64(SDNode *N);
   SDNode *SelectStoreFp64(SDNode *N);
@@ -110,8 +109,7 @@
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
 bool MipsDAGToDAGISel::
-SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
-{
+SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -137,7 +135,7 @@
   // Operand is a result from an ADD.
   if (Addr.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      if (Predicate_immSExt16(CN)) {
+      if (isInt<16>(CN->getSExtValue())) {
 
         // If the first operand is a FI, get the TargetFI Node
         if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
@@ -184,15 +182,16 @@
   if (!Subtarget.isMips1() || NVT != MVT::f64)
     return NULL;
 
-  if (!Predicate_unindexedload(N) ||
-      !Predicate_load(N))
+  LoadSDNode *LN = cast<LoadSDNode>(N);
+  if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+      LN->getAddressingMode() != ISD::UNINDEXED)
     return NULL;
 
   SDValue Chain = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N, N1, Offset0, Base) ||
+  if (!SelectAddr(N1, Offset0, Base) ||
       N1.getValueType() != MVT::i32)
     return NULL;
 
@@ -248,15 +247,15 @@
 
   SDValue Chain = N->getOperand(0);
 
-  if (!Predicate_unindexedstore(N) ||
-      !Predicate_store(N))
+  StoreSDNode *SN = cast<StoreSDNode>(N);
+  if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
     return NULL;
 
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N, N2, Offset0, Base) ||
+  if (!SelectAddr(N2, Offset0, Base) ||
       N1.getValueType() != MVT::f64 ||
       N2.getValueType() != MVT::i32)
     return NULL;

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -506,7 +506,7 @@
     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
                                             MipsII::MO_GOT);
     SDValue ResNode = DAG.getLoad(MVT::i32, dl, 
-                                  DAG.getEntryNode(), GA, NULL, 0,
+                                  DAG.getEntryNode(), GA, MachinePointerInfo(),
                                   false, false, 0);
     // On functions and global targets not internal linked only
     // a load from got/GP is necessary for PIC to work.
@@ -546,7 +546,8 @@
     SDValue Ops[] = { JTI };
     HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
   } else // Emit Load from Global Pointer
-    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0,
+    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+                         MachinePointerInfo(),
                          false, false, 0);
 
   SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
@@ -584,7 +585,8 @@
     SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
                                       N->getOffset(), MipsII::MO_GOT);
     SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), 
-                               CP, NULL, 0, false, false, 0);
+                               CP, MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
   }
@@ -603,7 +605,8 @@
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
                       false, false, 0);
 }
 
@@ -863,7 +866,8 @@
 
     // emit ISD::STORE whichs stores the 
     // parameter value to a stack Location
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0));
   }
 
@@ -937,8 +941,9 @@
 
       // Reload GP value.
       FI = MipsFI->getGPFI();
-      SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
-      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0,
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0);
       Chain = GPLoad.getValue(1);
       Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), 
@@ -1104,7 +1109,8 @@
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -1140,7 +1146,8 @@
       int FI = MFI->CreateFixedObject(4, 0, true);
       MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0));
 
       // Record the frame index of the first variable argument

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFPU.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFPU.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFPU.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===//
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFormats.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFormats.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrFormats.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -96,12 +96,7 @@
 
 // Node immediate fits as 16-bit sign extended on target immediate.
 // e.g. addi, andi
-def immSExt16  : PatLeaf<(imm), [{
-  if (N->getValueType(0) == MVT::i32)
-    return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
-  else
-    return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
-}]>;
+def immSExt16  : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
 
 // Node immediate fits as 16-bit zero extended on target immediate.
 // The LO16 param means that only the lower 16 bits of the node

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsMachineFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsMachineFunction.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsMachineFunction.h (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsMachineFunction.h Tue Oct 26 19:48:03 2010
@@ -26,11 +26,11 @@
 class MipsFunctionInfo : public MachineFunctionInfo {
 
 private:
-  /// Holds for each function where on the stack the Frame Pointer must be 
+  /// Holds for each function where on the stack the Frame Pointer must be
   /// saved. This is used on Prologue and Epilogue to emit FP save/restore
   int FPStackOffset;
 
-  /// Holds for each function where on the stack the Return Address must be 
+  /// Holds for each function where on the stack the Return Address must be
   /// saved. This is used on Prologue and Epilogue to emit RA save/restore
   int RAStackOffset;
 
@@ -51,22 +51,22 @@
       : FI(FrameIndex), SPOffset(StackPointerOffset) {}
   };
 
-  /// When PIC is used the GP must be saved on the stack on the function 
-  /// prologue and must be reloaded from this stack location after every 
-  /// call. A reference to its stack location and frame index must be kept 
+  /// When PIC is used the GP must be saved on the stack on the function
+  /// prologue and must be reloaded from this stack location after every
+  /// call. A reference to its stack location and frame index must be kept
   /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
   MipsFIHolder GPHolder;
 
   /// On LowerFormalArguments the stack size is unknown, so the Stack
-  /// Pointer Offset calculation of "not in register arguments" must be 
-  /// postponed to emitPrologue. 
+  /// Pointer Offset calculation of "not in register arguments" must be
+  /// postponed to emitPrologue.
   SmallVector<MipsFIHolder, 16> FnLoadArgs;
   bool HasLoadArgs;
 
-  // When VarArgs, we must write registers back to caller stack, preserving 
-  // on register arguments. Since the stack size is unknown on 
+  // When VarArgs, we must write registers back to caller stack, preserving
+  // on register arguments. Since the stack size is unknown on
   // LowerFormalArguments, the Stack Pointer Offset calculation must be
-  // postponed to emitPrologue. 
+  // postponed to emitPrologue.
   SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
   bool HasStoreVarArgs;
 
@@ -84,9 +84,9 @@
   int VarArgsFrameIndex;
 
 public:
-  MipsFunctionInfo(MachineFunction& MF) 
-  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
-    FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), 
+  MipsFunctionInfo(MachineFunction& MF)
+  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+    FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
     HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0)
   {}
@@ -110,7 +110,7 @@
   bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
 
   bool hasLoadArgs() const { return HasLoadArgs; }
-  bool hasStoreVarArgs() const { return HasStoreVarArgs; } 
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; }
 
   void recordLoadArgsFI(int FI, int SPOffset) {
     if (!HasLoadArgs) HasLoadArgs=true;
@@ -123,12 +123,12 @@
 
   void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
     if (!hasLoadArgs()) return;
-    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) 
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
       MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
   }
   void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
-    if (!hasStoreVarArgs()) return; 
-    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) 
+    if (!hasStoreVarArgs()) return;
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
       MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
   }
 

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -327,10 +327,9 @@
 // FrameIndex represent objects inside a abstract stack.
 // We must replace FrameIndex with an stack/frame pointer
 // direct reference.
-unsigned MipsRegisterInfo::
+void MipsRegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                    FrameIndexValue *Value, RegScavenger *RS) const
-{
+                    RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
 
@@ -361,7 +360,6 @@
 
   MI.getOperand(i-1).ChangeToImmediate(Offset);
   MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
-  return 0;
 }
 
 void MipsRegisterInfo::

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -51,9 +51,8 @@
                                      MachineBasicBlock::iterator I) const;
 
   /// Stack Frame Processing Methods
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsRegisterInfo.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsSchedule.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsSchedule.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsSchedule.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===//
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -40,7 +40,7 @@
 //===----------------------------------------------------------------------===//
 // Mips Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/Mips/MipsTargetObjectFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Mips/MipsTargetObjectFile.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Mips/MipsTargetObjectFile.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Mips/MipsTargetObjectFile.cpp Tue Oct 26 19:48:03 2010
@@ -25,21 +25,21 @@
 
 void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- 
+
   SmallDataSection =
     getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
                                MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
                                SectionKind::getDataRel());
-  
+
   SmallBSSSection =
     getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
                                MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
                                SectionKind::getBSS());
-  
+
 }
 
-// A address must be loaded from a small section if its size is less than the 
-// small section size threshold. Data in this section must be addressed using 
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
 // gp_rel operator.
 static bool IsInSmallSection(uint64_t Size) {
   return Size > 0 && Size <= SSThreshold;
@@ -49,7 +49,7 @@
                                                 const TargetMachine &TM) const {
   if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
     return false;
-  
+
   return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
 }
 
@@ -68,11 +68,11 @@
   const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
   if (!GVA)
     return false;
-  
+
   // We can only do this for datarel or BSS objects for now.
   if (!Kind.isBSS() && !Kind.isDataRel())
     return false;
-  
+
   // If this is a internal constant string, there is a special
   // section for it, but not in small data/bss.
   if (Kind.isMergeable1ByteCString())
@@ -89,13 +89,13 @@
                        Mangler *Mang, const TargetMachine &TM) const {
   // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
   // sections?
-  
+
   // Handle Small Section classification here.
   if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallBSSSection;
   if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallDataSection;
-  
+
   // Otherwise, we work the same as ELF.
   return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
 }

Propchange: llvm/branches/wendling/eh/lib/Target/PTX/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Oct 26 19:48:03 2010
@@ -0,0 +1,10 @@
+Debug
+Debug+Checks
+Debug+Coverage
+Debug+Coverage-Asserts
+*.inc
+Release
+Release-Asserts
+Release+Coverage
+Debug+Asserts
+Release+Asserts

Propchange: llvm/branches/wendling/eh/lib/Target/PTX/AsmPrinter/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Oct 26 19:48:03 2010
@@ -0,0 +1,9 @@
+Debug
+Debug+Checks
+Debug+Coverage
+Debug+Coverage-Asserts
+Release
+Release-Asserts
+Release+Coverage
+Debug+Asserts
+Release+Asserts

Propchange: llvm/branches/wendling/eh/lib/Target/PTX/TargetInfo/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Oct 26 19:48:03 2010
@@ -0,0 +1,9 @@
+Debug
+Debug+Checks
+Debug+Coverage
+Debug+Coverage-Asserts
+Release
+Release-Asserts
+Release+Coverage
+Debug+Asserts
+Release+Asserts

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp Tue Oct 26 19:48:03 2010
@@ -43,6 +43,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -66,10 +67,6 @@
       return "PowerPC Assembly Printer";
     }
 
-    PPCTargetMachine &getTM() {
-      return static_cast<PPCTargetMachine&>(TM);
-    }
-
     unsigned enumRegToMachineReg(unsigned enumReg) {
       switch (enumReg) {
       default: llvm_unreachable("Unhandled register!");
@@ -327,6 +324,19 @@
 
     void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
                                raw_ostream &O, const char *Modifier);
+
+    MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+
+      MachineLocation Location;
+      assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+      // Frame address.  Currently handles register +- offset only.
+      if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+        Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+      else {
+        DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+      }
+      return Location;
+    }
   };
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -26,5 +26,3 @@
   PPCTargetMachine.cpp
   PPCSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCBranchSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCBranchSelector.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCBranchSelector.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCBranchSelector.cpp Tue Oct 26 19:48:03 2010
@@ -10,7 +10,7 @@
 // This file contains a pass that scans a machine function to determine which
 // conditional branches need more than 16 bits of displacement to reach their
 // target basic block.  It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass.  This
+// positions pass, and a branch pseudo op to machine branch opcode pass.  This
 // pass should be run last, just before the assembly printer.
 //
 //===----------------------------------------------------------------------===//
@@ -31,7 +31,7 @@
 namespace {
   struct PPCBSel : public MachineFunctionPass {
     static char ID;
-    PPCBSel() : MachineFunctionPass(&ID) {}
+    PPCBSel() : MachineFunctionPass(ID) {}
 
     /// BlockSizes - The sizes of the basic blocks in the function.
     std::vector<unsigned> BlockSizes;

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCallingConv.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCallingConv.td (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCallingConv.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
 // 
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCodeEmitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCCodeEmitter.cpp Tue Oct 26 19:48:03 2010
@@ -45,18 +45,18 @@
   public:
     
     PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
-      : MachineFunctionPass(&ID), TM(tm), MCE(mce) {}
+      : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
 
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
 
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
 
     /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
 
     unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO);
+                               const MachineOperand &MO) const;
 
     const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
 
@@ -67,10 +67,6 @@
     /// emitBasicBlock - emits the given MachineBasicBlock to memory
     ///
     void emitBasicBlock(MachineBasicBlock &MBB);
-
-    /// getValueBit - return the particular bit of Val
-    ///
-    unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
   };
 }
 
@@ -129,7 +125,7 @@
 }
 
 unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                           const MachineOperand &MO) {
+                                           const MachineOperand &MO) const {
 
   unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
                    // or things that get fixed up later by the JIT.

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -104,7 +104,7 @@
 
     /// SelectAddrImm - Returns true if the address N can be represented by
     /// a base register plus a signed 16-bit displacement [r+imm].
-    bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp,
+    bool SelectAddrImm(SDValue N, SDValue &Disp,
                        SDValue &Base) {
       return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
     }
@@ -112,7 +112,7 @@
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
     /// immediate field.  Because preinc imms have already been validated, just
     /// accept it.
-    bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const {
+    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
       Out = N;
       return true;
     }
@@ -120,23 +120,20 @@
     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
     /// represented as an indexed [r+r] operation.  Returns false if it can
     /// be represented by [r+imm], which are preferred.
-    bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base,
-                       SDValue &Index) {
+    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
       return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
     }
     
     /// SelectAddrIdxOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
-    bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base,
-                           SDValue &Index) {
+    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
       return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
     }
 
     /// SelectAddrImmShift - Returns true if the address N can be represented by
     /// a base register plus a signed 14-bit displacement [r+imm*4].  Suitable
     /// for use by STD and friends.
-    bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp,
-                            SDValue &Base) {
+    bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
       return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
     }
       
@@ -152,9 +149,6 @@
       return false;
     }
     
-    SDValue BuildSDIVSequence(SDNode *N);
-    SDValue BuildUDIVSequence(SDNode *N);
-    
     void InsertVRSaveCode(MachineFunction &MF);
 
     virtual const char *getPassName() const {

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -73,6 +73,10 @@
   setUseUnderscoreSetJmp(true);
   setUseUnderscoreLongJmp(true);
 
+  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+  // arguments are at least 4/8 bytes aligned.
+  setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+    
   // Set up the register classes.
   addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
   addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
@@ -1244,7 +1248,7 @@
 
   // If the global is weak or external, we have to go through the lazy
   // resolution stub.
-  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0,
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, MachinePointerInfo(),
                      false, false, 0);
 }
 
@@ -1353,7 +1357,8 @@
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV),
                         false, false, 0);
   }
 
@@ -1406,28 +1411,32 @@
 
   // Store first byte : number of int regs
   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
-                                         Op.getOperand(1), SV, 0, MVT::i8,
-                                         false, false, 0);
+                                         Op.getOperand(1),
+                                         MachinePointerInfo(SV),
+                                         MVT::i8, false, false, 0);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
-    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8,
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset), MVT::i8,
                       false, false, 0);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
   // Store second word : arguments given on stack
   SDValue thirdStore =
-    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset,
+    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+                 MachinePointerInfo(SV, nextOffset),
                  false, false, 0);
   nextOffset += FrameOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
 
   // Store third word : arguments given in registers
-  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset,
+  return DAG.getStore(thirdStore, dl, FR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset),
                       false, false, 0);
 
 }
@@ -1635,7 +1644,8 @@
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
                                    false, false, 0));
     }
   }
@@ -1705,28 +1715,18 @@
     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
-    // The fixed integer arguments of a variadic function are
-    // stored to the VarArgsFrameIndex on the stack.
-    unsigned GPRIndex = 0;
-    for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) {
-      SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
-      MemOps.push_back(Store);
-      // Increment the address by four for the next argument to store
-      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
-      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-    }
-
-    // If this function is vararg, store any remaining integer argument regs
-    // to their spots on the stack so that they may be loaded by deferencing the
-    // result of va_next.
-    for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
-      unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+    // The fixed integer arguments of a variadic function are stored to the
+    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+    // the result of va_next.
+    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1735,27 +1735,17 @@
 
     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
     // is set.
-    
     // The double arguments are stored to the VarArgsFrameIndex
     // on the stack.
-    unsigned FPRIndex = 0;
-    for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) {
-      SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
-      MemOps.push_back(Store);
-      // Increment the address by eight for the next argument to store
-      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
-                                         PtrVT);
-      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-    }
-
-    for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
-      unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1918,7 +1908,7 @@
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            NULL, 0,
+                                            MachinePointerInfo(),
                                             ObjSize==1 ? MVT::i8 : MVT::i16,
                                             false, false, 0);
           MemOps.push_back(Store);
@@ -1938,7 +1928,8 @@
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                       MachinePointerInfo(),
                                        false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -2063,7 +2054,7 @@
                                       CurArgOffset + (ArgSize - ObjSize),
                                       isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
                            false, false, 0);
     }
 
@@ -2111,8 +2102,8 @@
         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -2153,7 +2144,6 @@
   // 16-byte aligned.
   nAltivecParamsAtEnd = 0;
   for (unsigned i = 0; i != NumOps; ++i) {
-    SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
     // Varargs Altivec parameters are padded to a 16 byte boundary.
@@ -2292,8 +2282,8 @@
     int FI = TailCallArgs[i].FrameIdx;
     // Store relative to framepointer.
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
-                                       PseudoSourceValue::getFixedStack(FI),
-                                       0, false, false, 0));
+                                       MachinePointerInfo::getFixedStack(FI),
+                                       false, false, 0));
   }
 }
 
@@ -2318,7 +2308,7 @@
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
-                         PseudoSourceValue::getFixedStack(NewRetAddr), 0,
+                         MachinePointerInfo::getFixedStack(NewRetAddr),
                          false, false, 0);
 
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
@@ -2330,7 +2320,7 @@
                                                           true);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                           PseudoSourceValue::getFixedStack(NewFPIdx), 0,
+                           MachinePointerInfo::getFixedStack(NewFPIdx),
                            false, false, 0);
     }
   }
@@ -2369,7 +2359,7 @@
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
-    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0,
+    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
                           false, false, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
     
@@ -2377,7 +2367,7 @@
     // slot as the FP is never overwritten.
     if (isDarwinABI) {
       FPOpOut = getFramePointerFrameIndex(DAG);
-      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0,
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
                             false, false, 0);
       Chain = SDValue(FPOpOut.getNode(), 1);
     }
@@ -2397,7 +2387,8 @@
                           DebugLoc dl) {
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, false, NULL, 0, NULL, 0);
+                       false, false, MachinePointerInfo(0),
+                       MachinePointerInfo(0));
 }
 
 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -2407,7 +2398,7 @@
                  SDValue Arg, SDValue PtrOff, int SPDiff,
                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
                  bool isVector, SmallVector<SDValue, 8> &MemOpChains,
-                 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
+                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
                  DebugLoc dl) {
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   if (!isTailCall) {
@@ -2420,8 +2411,8 @@
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                            DAG.getConstant(ArgOffset, PtrVT));
     }
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
-                                       false, false, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0));
   // Calculate and remember argument location.
   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
                                   TailCallArguments);
@@ -2467,18 +2458,31 @@
 
   unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
 
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-  // node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 
-                                        Callee.getValueType());
-  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
-  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+  bool needIndirectCall = true;
+  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
     // If this is an absolute destination address, use the munged value.
     Callee = SDValue(Dest, 0);
-  else {
+    needIndirectCall = false;
+  }
+  // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+  // Use indirect calls for ALL functions calls in JIT mode, since the
+  // far-call stubs may be outside relocation limits for a BL instruction.
+  if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+    // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+    // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+    // node so that legalize doesn't hack it.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+					  Callee.getValueType());
+      needIndirectCall = false;
+    }
+  }
+  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+      Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+					   Callee.getValueType());
+      needIndirectCall = false;
+  }
+  if (needIndirectCall) {
     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
     // to do the call, we can't use PPCISD::CALL.
     SDValue MTCTROps[] = {Chain, Callee, InFlag};
@@ -2890,7 +2894,7 @@
         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 
         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                           PseudoSourceValue::getStack(), LocMemOffset,
+                                           MachinePointerInfo(),
                                            false, false, 0));
       } else {
         // Calculate and remember argument location.
@@ -3054,7 +3058,8 @@
         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg,
-                                        NULL, 0, VT, false, false, 0);
+                                        MachinePointerInfo(), VT,
+                                        false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
@@ -3091,7 +3096,8 @@
         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
-          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0,
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
                                      false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3123,21 +3129,22 @@
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
         if (isVarArg) {
-          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
-                                       false, false, 0);
+          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
           MemOpChains.push_back(Store);
 
           // Float varargs are always shadowed in available integer registers
           if (GPR_idx != NumGPRs) {
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
-                                       false, false, 0);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3181,11 +3188,12 @@
         // entirely in R registers.  Maybe later.
         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                             DAG.getConstant(ArgOffset, PtrVT));
-        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
-                                     false, false, 0);
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
         MemOpChains.push_back(Store);
         if (VR_idx != NumVRs) {
-          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0,
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 
+                                     MachinePointerInfo(),
                                      false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
@@ -3196,7 +3204,7 @@
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                   DAG.getConstant(i, PtrVT));
-          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0,
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
                                      false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3262,7 +3270,7 @@
     // TOC save area offset.
     SDValue PtrOff = DAG.getIntPtrConstant(40);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0,
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
                          false, false, 0);
   }
 
@@ -3349,14 +3357,15 @@
   SDValue SaveSP = Op.getOperand(1);
 
   // Load the old link SP.
-  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0,
+  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+                                   MachinePointerInfo(),
                                    false, false, 0);
 
   // Restore the stack pointer.
   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
 
   // Store the old link SP.
-  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0,
+  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
                       false, false, 0);
 }
 
@@ -3532,15 +3541,15 @@
   SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
 
   // Emit a store to the stack slot.
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0,
-                               false, false, 0);
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+                               MachinePointerInfo(), false, false, 0);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias.
   if (Op.getValueType() == MVT::i32)
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0,
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
                      false, false, 0);
 }
 
@@ -3578,14 +3587,15 @@
 
   // STD the extended value into the stack slot.
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, 0, 8, 8);
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOStore, 8, 8);
   SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
   SDValue Store =
     DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
                             Ops, 4, MVT::i64, MMO);
   // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0);
+  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+                           false, false, 0);
 
   // FCFID it and return it.
   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3631,12 +3641,12 @@
   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
-                               StackSlot, NULL, 0, false, false, 0);
+                               StackSlot, MachinePointerInfo(), false, false,0);
 
   // Load FP Control Word from low 32 bits of stack slot.
   SDValue Four = DAG.getConstant(4, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
-  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0,
+  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
                             false, false, 0);
 
   // Transform as necessary
@@ -3942,17 +3952,17 @@
     }
 
     // t = vsplti c, result = vsldoi t, t, 1
-    if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+    if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
       return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
     }
     // t = vsplti c, result = vsldoi t, t, 2
-    if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+    if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
       return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
     }
     // t = vsplti c, result = vsldoi t, t, 3
-    if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+    if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
       return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
     }
@@ -4304,10 +4314,10 @@
 
   // Store the input value into Value#0 of the stack slot.
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
-                               Op.getOperand(0), FIdx, NULL, 0,
+                               Op.getOperand(0), FIdx, MachinePointerInfo(),
                                false, false, 0);
   // Load it out.
-  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0,
+  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
                      false, false, 0);
 }
 
@@ -5529,13 +5539,13 @@
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0, false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -5558,7 +5568,7 @@
                                          PtrVT);
   while (Depth--)
     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
-                            FrameAddr, NULL, 0, false, false, 0);
+                            FrameAddr, MachinePointerInfo(), false, false, 0);
   return FrameAddr;
 }
 

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCInstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -469,8 +469,9 @@
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, /*Offset=*/0,
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);
@@ -590,8 +591,9 @@
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOLoad, /*Offset=*/0,
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -580,10 +580,9 @@
   MBB.erase(II);
 }
 
-unsigned
+void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                     int SPAdj, FrameIndexValue *Value,
-                                     RegScavenger *RS) const {
+                                     int SPAdj, RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   // Get the instruction.
@@ -622,14 +621,14 @@
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
     lowerDynamicAlloc(II, SPAdj, RS);
-    return 0;
+    return;
   }
 
   // Special case for pseudo-op SPILL_CR.
   if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
     if (OpC == PPC::SPILL_CR) {
       lowerCRSpilling(II, FrameIndex, SPAdj, RS);
-      return 0;
+      return;
     }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -674,7 +673,7 @@
     if (isIXAddr)
       Offset >>= 2;    // The actual encoded value has the low two bits zero.
     MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
-    return 0;
+    return;
   }
 
   // The offset doesn't fit into a single register, scavenge one to build the
@@ -710,11 +709,10 @@
   } else {
     OperandBase = OffsetOperandNo;
   }
-    
+
   unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
   MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
   MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
-  return 0;
 }
 
 /// VRRegNo - Map from a numbered VR register to its enum value.

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -63,9 +63,8 @@
                          int SPAdj, RegScavenger *RS) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
                        int SPAdj, RegScavenger *RS) const;
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   /// determineFrameLayout - Determine the size of the frame and maximum call
   /// frame size.

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG3.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG3.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG3.td (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG3.td Tue Oct 26 19:48:03 2010
@@ -13,7 +13,7 @@
 
 
 def G3Itineraries : ProcessorItineraries<
-  [IU1, IU2, FPU1, BPU, SRU, SLU], [
+  [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4.td (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4.td Tue Oct 26 19:48:03 2010
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 def G4Itineraries : ProcessorItineraries<
-  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [
+  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4Plus.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4Plus.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4Plus.td (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG4Plus.td Tue Oct 26 19:48:03 2010
@@ -15,7 +15,7 @@
 def IU4    : FuncUnit; // integer unit 4 (7450 simple)
 
 def G4PlusItineraries : ProcessorItineraries<
-  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [
+  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntDivW     , [InstrStage<23, [IU2]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG5.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG5.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG5.td (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCScheduleG5.td Tue Oct 26 19:48:03 2010
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 def G5Itineraries : ProcessorItineraries<
-  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [
+  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
   InstrItinData<IntGeneral  , [InstrStage<2, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<3, [IU1, IU2]>]>,
   InstrItinData<IntDivD     , [InstrStage<68, [IU1]>]>,

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.cpp Tue Oct 26 19:48:03 2010
@@ -69,6 +69,7 @@
   , HasFSQRT(false)
   , HasSTFIWX(false)
   , HasLazyResolverStubs(false)
+  , IsJITCodeModel(false)
   , DarwinVers(0) {
 
   // Determine default and user specified characteristics
@@ -117,6 +118,9 @@
   // everything is.  This matters for PPC64, which codegens in PIC mode without
   // stubs.
   HasLazyResolverStubs = false;
+
+  // Calls to external functions need to use indirect calls
+  IsJITCodeModel = true;
 }
 
 

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.h (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCSubtarget.h Tue Oct 26 19:48:03 2010
@@ -63,6 +63,7 @@
   bool HasFSQRT;
   bool HasSTFIWX;
   bool HasLazyResolverStubs;
+  bool IsJITCodeModel;
   
   /// DarwinVers - Nonzero if this is a darwin platform.  Otherwise, the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -124,6 +125,9 @@
   bool hasLazyResolverStub(const GlobalValue *GV, 
                            const TargetMachine &TM) const;
   
+  // isJITCodeModel - True if we're generating code for the JIT
+  bool isJITCodeModel() const { return IsJITCodeModel; }
+
   // Specific obvious features.
   bool hasFSQRT() const { return HasFSQRT; }
   bool hasSTFIWX() const { return HasSTFIWX; }

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/PPCTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/PPCTargetMachine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/PPCTargetMachine.h (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/PPCTargetMachine.h Tue Oct 26 19:48:03 2010
@@ -58,8 +58,8 @@
   
   virtual const TargetData    *getTargetData() const    { return &DataLayout; }
   virtual const PPCSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual const InstrItineraryData getInstrItineraryData() const {  
-    return InstrItins;
+  virtual const InstrItineraryData *getInstrItineraryData() const {  
+    return &InstrItins;
   }
 
   // Pass Pipeline Configuration

Modified: llvm/branches/wendling/eh/lib/Target/PowerPC/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/PowerPC/README.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/PowerPC/README.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/PowerPC/README.txt Tue Oct 26 19:48:03 2010
@@ -37,6 +37,31 @@
 	ori r3, r2, 65535
 	blr 
 
+===-------------------------------------------------------------------------===
+
+This code:
+
+unsigned add32carry(unsigned sum, unsigned x) {
+ unsigned z = sum + x;
+ if (sum + x < x)
+     z++;
+ return z;
+}
+
+Should compile to something like:
+
+	addc r3,r3,r4
+	addze r3,r3
+
+instead we get:
+
+	add r3, r4, r3
+	cmplw cr7, r3, r4
+	mfcr r4 ; 1
+	rlwinm r4, r4, 29, 31, 31
+	add r3, r3, r4
+
+Ick.
 
 ===-------------------------------------------------------------------------===
 

Modified: llvm/branches/wendling/eh/lib/Target/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/README.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/README.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/README.txt Tue Oct 26 19:48:03 2010
@@ -2,6 +2,38 @@
 
 //===---------------------------------------------------------------------===//
 
+We should recognize idioms for add-with-carry and turn it into the appropriate
+intrinsics.  This example:
+
+unsigned add32carry(unsigned sum, unsigned x) {
+ unsigned z = sum + x;
+ if (sum + x < x)
+     z++;
+ return z;
+}
+
+Compiles to: clang t.c -S -o - -O3 -fomit-frame-pointer -m64 -mkernel
+
+_add32carry:                            ## @add32carry
+	addl	%esi, %edi
+	cmpl	%esi, %edi
+	sbbl	%eax, %eax
+	andl	$1, %eax
+	addl	%edi, %eax
+	ret
+
+with clang, but to:
+
+_add32carry:
+	leal	(%rsi,%rdi), %eax
+	cmpl	%esi, %eax
+	adcl	$0, %eax
+	ret
+
+with gcc.
+
+//===---------------------------------------------------------------------===//
+
 Dead argument elimination should be enhanced to handle cases when an argument is
 dead to an externally visible function.  Though the argument can't be removed
 from the externally visible function, the caller doesn't need to pass it in.
@@ -1308,12 +1340,6 @@
 
 simplifylibcalls should do several optimizations for strspn/strcspn:
 
-strcspn(x, "") -> strlen(x)
-strcspn("", x) -> 0
-strspn("", x) -> 0
-strspn(x, "") -> strlen(x)
-strspn(x, "a") -> strchr(x, 'a')-x
-
 strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn):
 
 size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2,
@@ -1919,5 +1945,21 @@
 	ret
 .LBB0_2:
 	jmp	foo  # TAILCALL
+//===---------------------------------------------------------------------===//
+Given a branch where the two target blocks are identical ("ret i32 %b" in
+both), simplifycfg will simplify them away. But not so for a switch statement:
 
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+        switch i32 %a, label %bb3 [
+                i32 4, label %bb
+                i32 6, label %bb
+        ]
+
+bb:             ; preds = %entry, %entry
+        ret i32 %b
+
+bb3:            ; preds = %entry
+        ret i32 %b
+}
 //===---------------------------------------------------------------------===//

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -22,5 +22,3 @@
   SparcTargetMachine.cpp
   SparcSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/DelaySlotFiller.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/DelaySlotFiller.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/DelaySlotFiller.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/DelaySlotFiller.cpp Tue Oct 26 19:48:03 2010
@@ -32,7 +32,7 @@
 
     static char ID;
     Filler(TargetMachine &tm) 
-      : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
 
     virtual const char *getPassName() const {
       return "SPARC Delay Slot Filler";

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/FPMover.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/FPMover.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/FPMover.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/FPMover.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,7 @@
     
     static char ID;
     explicit FPMover(TargetMachine &tm) 
-      : MachineFunctionPass(&ID), TM(tm) { }
+      : MachineFunctionPass(ID), TM(tm) { }
 
     virtual const char *getPassName() const {
       return "Sparc Double-FP Move Fixer";

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/Sparc.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/Sparc.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/Sparc.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/Sparc.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
 // 
 //                     The LLVM Compiler Infrastructure
 //

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -44,9 +44,8 @@
   SDNode *Select(SDNode *N);
 
   // Complex Pattern Selectors.
-  bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2);
-  bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base,
-                    SDValue &Offset);
+  bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
@@ -71,7 +70,7 @@
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -84,7 +83,7 @@
 
   if (Addr.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      if (Predicate_simm13(CN)) {
+      if (isInt<13>(CN->getSExtValue())) {
         if (FrameIndexSDNode *FIN =
                 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
           // Constant offset from frame ref.
@@ -112,17 +111,16 @@
   return true;
 }
 
-bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
-                                     SDValue &R1,  SDValue &R2) {
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   if (Addr.getOpcode() == ISD::FrameIndex) return false;
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
       Addr.getOpcode() == ISD::TargetGlobalAddress)
     return false;  // direct calls.
 
   if (Addr.getOpcode() == ISD::ADD) {
-    if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
-        Predicate_simm13(Addr.getOperand(1).getNode()))
-      return false;  // Let the reg+imm pattern catch this!
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      if (isInt<13>(CN->getSExtValue()))
+        return false;  // Let the reg+imm pattern catch this!
     if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
         Addr.getOperand(1).getOpcode() == SPISD::Lo)
       return false;  // Let the reg+imm pattern catch this!
@@ -196,8 +194,8 @@
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-   if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1))
-     SelectADDRri(Op.getNode(), Op, Op0, Op1);
+   if (!SelectADDRrr(Op, Op0, Op1))
+     SelectADDRri(Op, Op0, Op1);
    break;
   }
 

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/SparcISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -138,7 +138,7 @@
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
         SDValue Load;
         if (ObjectVT == MVT::i32) {
-          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(),
                              false, false, 0);
         } else {
           ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
@@ -148,7 +148,7 @@
           FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
                               DAG.getConstant(Offset, MVT::i32));
           Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr,
-                                NULL, 0, ObjectVT, false, false, 0);
+                                MachinePointerInfo(), ObjectVT, false, false,0);
           Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
         }
         InVals.push_back(Load);
@@ -172,7 +172,8 @@
         int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
                                                             true);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
+        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr,
+                                   MachinePointerInfo(),
                                    false, false, 0);
         InVals.push_back(Load);
       }
@@ -195,7 +196,7 @@
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
                                                               true);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(),
                               false, false, 0);
         }
 
@@ -208,7 +209,7 @@
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
                                                               true);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(),
                               false, false, 0);
         }
 
@@ -243,7 +244,8 @@
                                                           true);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
-      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+                                       MachinePointerInfo(),
                                        false, false, 0));
       ArgOffset += 4;
     }
@@ -349,7 +351,7 @@
     // FIXME: VERIFY THAT 68 IS RIGHT.
     SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
     PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
-    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
+    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, MachinePointerInfo(),
                                        false, false, 0));
   }
 
@@ -396,17 +398,17 @@
       // out the parts as integers.  Top part goes in a reg.
       SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
       SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 
-                                   Val, StackPtr, NULL, 0,
+                                   Val, StackPtr, MachinePointerInfo(),
                                    false, false, 0);
       // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
-                               false, false, 0);
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                              DAG.getIntPtrConstant(4));
       // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
-                               false, false, 0);
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
 
       RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
 
@@ -449,7 +451,7 @@
       SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
       PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
       MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, 
-                                         PtrOff, NULL, 0,
+                                         PtrOff, MachinePointerInfo(),
                                          false, false, 0));
     }
     ArgOffset += ObjSize;
@@ -774,7 +776,7 @@
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, 
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0, false, false, 0);
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -795,7 +797,7 @@
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0, false, false, 0);
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -891,8 +893,8 @@
                 DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
                                 MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
-                      false, false, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
 }
 
 static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -902,22 +904,23 @@
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
-                               false, false, 0);
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+                               MachinePointerInfo(SV), false, false, 0);
   // Increment the pointer, VAList, to the next vaarg
   SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                   DAG.getConstant(VT.getSizeInBits()/8,
                                                   MVT::i32));
   // Store the incremented VAList to the legalized pointer
   InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
-                         VAListPtr, SV, 0, false, false, 0);
+                         VAListPtr, MachinePointerInfo(SV), false, false, 0);
   // Load the actual argument out of the pointer VAList, unless this is an
   // f64 load.
   if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
+    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+                       false, false, 0);
 
   // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
                           false, false, 0);
 
   // Bit-Convert the value to f64.

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/SparcInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/SparcInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/SparcInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/SparcInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -43,17 +43,9 @@
 // Instruction Pattern Stuff
 //===----------------------------------------------------------------------===//
 
-def simm11  : PatLeaf<(imm), [{
-  // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
-  return (((int)N->getZExtValue() << (32-11)) >> (32-11)) ==
-         (int)N->getZExtValue();
-}]>;
+def simm11  : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
 
-def simm13  : PatLeaf<(imm), [{
-  // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
-  return (((int)N->getZExtValue() << (32-13)) >> (32-13)) ==
-         (int)N->getZExtValue();
-}]>;
+def simm13  : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
 
 def LO10 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -69,10 +69,9 @@
   MBB.erase(I);
 }
 
-unsigned
+void
 SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                       int SPAdj, FrameIndexValue *Value,
-                                       RegScavenger *RS) const {
+                                       int SPAdj, RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   unsigned i = 0;
@@ -108,7 +107,6 @@
     MI.getOperand(i).ChangeToRegister(SP::G1, false);
     MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
   }
-  return 0;
 }
 
 void SparcRegisterInfo::

Modified: llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/Sparc/SparcRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -40,9 +40,8 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 

Modified: llvm/branches/wendling/eh/lib/Target/SystemZ/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/SystemZ/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/SystemZ/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/SystemZ/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -20,5 +20,3 @@
   SystemZTargetMachine.cpp
   SystemZSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -120,18 +120,17 @@
     #include "SystemZGenDAGISel.inc"
 
   private:
-    bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
+    bool SelectAddrRI12Only(SDValue& Addr,
                             SDValue &Base, SDValue &Disp);
-    bool SelectAddrRI12(SDNode *Op, SDValue& Addr,
+    bool SelectAddrRI12(SDValue& Addr,
                         SDValue &Base, SDValue &Disp,
                         bool is12BitOnly = false);
-    bool SelectAddrRI(SDNode *Op, SDValue& Addr,
-                      SDValue &Base, SDValue &Disp);
-    bool SelectAddrRRI12(SDNode *Op, SDValue Addr,
+    bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddrRRI12(SDValue Addr,
                          SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectAddrRRI20(SDNode *Op, SDValue Addr,
+    bool SelectAddrRRI20(SDValue Addr,
                          SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectLAAddr(SDNode *Op, SDValue Addr,
+    bool SelectLAAddr(SDValue Addr,
                       SDValue &Base, SDValue &Disp, SDValue &Index);
 
     SDNode *Select(SDNode *Node);
@@ -142,8 +141,6 @@
     bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
                       bool is12Bit, unsigned Depth = 0);
     bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
-    bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
-                        bool is12Bit);
   };
 }  // end anonymous namespace
 
@@ -355,12 +352,12 @@
 
 /// Returns true if the address can be represented by a base register plus
 /// an unsigned 12-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
                                              SDValue &Base, SDValue &Disp) {
-  return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true);
+  return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
 }
 
-bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
                                          SDValue &Base, SDValue &Disp,
                                          bool is12BitOnly) {
   SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
@@ -410,7 +407,7 @@
 
 /// Returns true if the address can be represented by a base register plus
 /// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
                                        SDValue &Base, SDValue &Disp) {
   SystemZRRIAddressMode AM(/*isRI*/true);
   bool Done = false;
@@ -453,7 +450,7 @@
 
 /// Returns true if the address can be represented by a base register plus
 /// index register plus an unsigned 12-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM20, AM12;
   bool Done = false;
@@ -502,7 +499,7 @@
 
 /// Returns true if the address can be represented by a base register plus
 /// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM;
   bool Done = false;
@@ -546,7 +543,7 @@
 
 /// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LA/LAY instruction.
-bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
                                   SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM;
 
@@ -583,7 +580,7 @@
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
       IsLegalToFold(N, P, P, OptLevel))
-    return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
+    return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
   return false;
 }
 

Modified: llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -341,7 +341,7 @@
       // from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
-                             PseudoSourceValue::getFixedStack(FI), 0,
+                             MachinePointerInfo::getFixedStack(FI),
                              false, false, 0);
     }
 
@@ -441,7 +441,7 @@
                                    DAG.getIntPtrConstant(Offset));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), Offset,
+                                         MachinePointerInfo(),
                                          false, false, 0));
     }
   }
@@ -747,7 +747,7 @@
 
   if (ExtraLoadRequired)
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0, false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.

Modified: llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZInstrBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZInstrBuilder.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZInstrBuilder.h (original)
+++ llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZInstrBuilder.h Tue Oct 26 19:48:03 2010
@@ -115,9 +115,9 @@
   if (TID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                            Flags, Offset,
-                            MFI.getObjectSize(FI),
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                PseudoSourceValue::getFixedStack(FI), Offset),
+                            Flags, MFI.getObjectSize(FI),
                             MFI.getObjectAlignment(FI));
   return addOffset(MIB.addFrameIndex(FI), Offset)
             .addMemOperand(MMO);

Modified: llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -92,10 +92,9 @@
   return Offset;
 }
 
-unsigned
+void
 SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                         int SPAdj, FrameIndexValue *Value,
-                                         RegScavenger *RS) const {
+                                         int SPAdj, RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unxpected");
 
   unsigned i = 0;
@@ -117,13 +116,13 @@
 
   // Offset is a either 12-bit unsigned or 20-bit signed integer.
   // FIXME: handle "too long" displacements.
-  int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+  int Offset =
+    getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
 
   // Check whether displacement is too long to fit into 12 bit zext field.
   MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
 
   MI.getOperand(i+1).ChangeToImmediate(Offset);
-  return 0;
 }
 
 void

Modified: llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/SystemZ/SystemZRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -43,9 +43,8 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
-                               int SPAdj, FrameIndexValue *Value = NULL,
-                               RegScavenger *RS = NULL) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
 
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

Modified: llvm/branches/wendling/eh/lib/Target/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/Target.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/Target.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/Target.cpp Tue Oct 26 19:48:03 2010
@@ -7,12 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMTarget.a, which implements
-// target information.
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMTarget.a, which implements target information.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/LLVMContext.h"
@@ -20,6 +22,14 @@
 
 using namespace llvm;
 
+void llvm::initializeTarget(PassRegistry &Registry) {
+  initializeTargetDataPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+  initializeTarget(*unwrap(R));
+}
+
 LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
   return wrap(new TargetData(StringRep));
 }

Modified: llvm/branches/wendling/eh/lib/Target/TargetData.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/TargetData.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/TargetData.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/TargetData.cpp Tue Oct 26 19:48:03 2010
@@ -34,7 +34,7 @@
 // Handle the Pass registration stuff necessary to use TargetData's.
 
 // Register the default SparcV9 implementation...
-INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true);
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
 char TargetData::ID = 0;
 
 //===----------------------------------------------------------------------===//
@@ -83,7 +83,7 @@
   assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
          (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
          "Upper bound didn't work!");
-  
+
   // Multiple fields can have the same offset if any of them are zero sized.
   // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
   // at the i32 element, because it is the last element at that offset.  This is
@@ -97,8 +97,8 @@
 //===----------------------------------------------------------------------===//
 
 TargetAlignElem
-TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
-                     unsigned char pref_align, uint32_t bit_width) {
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+                     unsigned pref_align, uint32_t bit_width) {
   assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
   TargetAlignElem retval;
   retval.AlignType = align_type;
@@ -131,6 +131,8 @@
 }
 
 void TargetData::init(StringRef Desc) {
+  initializeTargetDataPass(*PassRegistry::getPassRegistry());
+  
   LayoutMap = 0;
   LittleEndian = false;
   PointerMemSize = 8;
@@ -153,16 +155,16 @@
     std::pair<StringRef, StringRef> Split = Desc.split('-');
     StringRef Token = Split.first;
     Desc = Split.second;
-    
+
     if (Token.empty())
       continue;
-    
+
     Split = Token.split(':');
     StringRef Specifier = Split.first;
     Token = Split.second;
-    
+
     assert(!Specifier.empty() && "Can't be empty here");
-    
+
     switch (Specifier[0]) {
     case 'E':
       LittleEndian = false;
@@ -196,10 +198,10 @@
       }
       unsigned Size = getInt(Specifier.substr(1));
       Split = Token.split(':');
-      unsigned char ABIAlign = getInt(Split.first) / 8;
-      
+      unsigned ABIAlign = getInt(Split.first) / 8;
+
       Split = Split.second.split(':');
-      unsigned char PrefAlign = getInt(Split.first) / 8;
+      unsigned PrefAlign = getInt(Split.first) / 8;
       if (PrefAlign == 0)
         PrefAlign = ABIAlign;
       setAlignment(AlignType, ABIAlign, PrefAlign, Size);
@@ -215,7 +217,7 @@
         Token = Split.second;
       } while (!Specifier.empty() || !Token.empty());
       break;
-        
+
     default:
       break;
     }
@@ -226,19 +228,19 @@
 ///
 /// @note This has to exist, because this is a pass, but it should never be
 /// used.
-TargetData::TargetData() : ImmutablePass(&ID) {
+TargetData::TargetData() : ImmutablePass(ID) {
   report_fatal_error("Bad TargetData ctor used.  "
                     "Tool did not specify a TargetData to use?");
 }
 
-TargetData::TargetData(const Module *M) 
-  : ImmutablePass(&ID) {
+TargetData::TargetData(const Module *M)
+  : ImmutablePass(ID) {
   init(M->getDataLayout());
 }
 
 void
-TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
-                         unsigned char pref_align, uint32_t bit_width) {
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+                         unsigned pref_align, uint32_t bit_width) {
   assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
   for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
     if (Alignments[i].AlignType == align_type &&
@@ -249,14 +251,14 @@
       return;
     }
   }
-  
+
   Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
                                             pref_align, bit_width));
 }
 
-/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or 
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
 /// preferred if ABIInfo = false) the target wants for the specified datatype.
-unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, 
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
                                       uint32_t BitWidth, bool ABIInfo,
                                       const Type *Ty) const {
   // Check to see if we have an exact match and remember the best match we see.
@@ -266,18 +268,18 @@
     if (Alignments[i].AlignType == AlignType &&
         Alignments[i].TypeBitWidth == BitWidth)
       return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
-    
+
     // The best match so far depends on what we're looking for.
-     if (AlignType == INTEGER_ALIGN && 
+     if (AlignType == INTEGER_ALIGN &&
          Alignments[i].AlignType == INTEGER_ALIGN) {
       // The "best match" for integers is the smallest size that is larger than
       // the BitWidth requested.
-      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || 
+      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
            Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
         BestMatchIdx = i;
       // However, if there isn't one that's larger, then we must use the
       // largest one we have (see below)
-      if (LargestInt == -1 || 
+      if (LargestInt == -1 ||
           Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
         LargestInt = i;
     }
@@ -322,8 +324,8 @@
       I->first->removeAbstractTypeUser(this);
     LayoutInfo.erase(I);
   }
-  
-  
+
+
   /// refineAbstractType - The callback method invoked when an abstract type is
   /// resolved to another type.  An object must override this method to update
   /// its internal state to reference NewType instead of OldType.
@@ -385,21 +387,21 @@
 const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
   if (!LayoutMap)
     LayoutMap = new StructLayoutMap();
-  
+
   StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
   StructLayout *&SL = (*STM)[Ty];
   if (SL) return SL;
 
-  // Otherwise, create the struct layout.  Because it is variable length, we 
+  // Otherwise, create the struct layout.  Because it is variable length, we
   // malloc it, then use placement new.
   int NumElts = Ty->getNumElements();
   StructLayout *L =
     (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
-  
+
   // Set SL before calling StructLayout's ctor.  The ctor could cause other
   // entries to be added to TheMap, invalidating our reference.
   SL = L;
-  
+
   new (L) StructLayout(Ty, *this);
 
   if (Ty->isAbstract())
@@ -414,14 +416,14 @@
 /// avoid a dangling pointer in this cache.
 void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
   if (!LayoutMap) return;  // No cache.
-  
+
   static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty);
 }
 
 std::string TargetData::getStringRepresentation() const {
   std::string Result;
   raw_string_ostream OS(Result);
-  
+
   OS << (LittleEndian ? "e" : "E")
      << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
      << ':' << PointerPrefAlign*8;
@@ -430,10 +432,10 @@
     OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
        << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
   }
-  
+
   if (!LegalIntWidths.empty()) {
     OS << "-n" << (unsigned)LegalIntWidths[0];
-    
+
     for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
       OS << ':' << (unsigned)LegalIntWidths[i];
   }
@@ -454,15 +456,6 @@
   case Type::StructTyID:
     // Get the layout annotation... which is lazily created on demand.
     return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
-  case Type::UnionTyID: {
-    const UnionType *UnTy = cast<UnionType>(Ty);
-    uint64_t Size = 0;
-    for (UnionType::element_iterator i = UnTy->element_begin(),
-             e = UnTy->element_end(); i != e; ++i) {
-      Size = std::max(Size, getTypeSizeInBits(*i));
-    }
-    return Size;
-  }
   case Type::IntegerTyID:
     return cast<IntegerType>(Ty)->getBitWidth();
   case Type::VoidTyID:
@@ -470,6 +463,7 @@
   case Type::FloatTyID:
     return 32;
   case Type::DoubleTyID:
+  case Type::X86_MMXTyID:
     return 64;
   case Type::PPC_FP128TyID:
   case Type::FP128TyID:
@@ -495,7 +489,7 @@
   Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
   == false) for the requested type \a Ty.
  */
-unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
   int AlignType = -1;
 
   assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -517,18 +511,7 @@
     // Get the layout annotation... which is lazily created on demand.
     const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
     unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
-    return std::max(Align, (unsigned)Layout->getAlignment());
-  }
-  case Type::UnionTyID: {
-    const UnionType *UnTy = cast<UnionType>(Ty);
-    unsigned Align = 1;
-
-    // Unions need the maximum alignment of all their entries
-    for (UnionType::element_iterator i = UnTy->element_begin(), 
-             e = UnTy->element_end(); i != e; ++i) {
-      Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
-    }
-    return Align;
+    return std::max(Align, Layout->getAlignment());
   }
   case Type::IntegerTyID:
   case Type::VoidTyID:
@@ -543,6 +526,7 @@
   case Type::X86_FP80TyID:
     AlignType = FLOAT_ALIGN;
     break;
+  case Type::X86_MMXTyID:
   case Type::VectorTyID:
     AlignType = VECTOR_ALIGN;
     break;
@@ -555,18 +539,18 @@
                           abi_or_pref, Ty);
 }
 
-unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
   return getAlignment(Ty, true);
 }
 
 /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
 /// an integer type of the specified bitwidth.
-unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
   return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
 }
 
 
-unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
   for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
     if (Alignments[i].AlignType == STACK_ALIGN)
       return Alignments[i].ABIAlign;
@@ -574,12 +558,12 @@
   return getABITypeAlignment(Ty);
 }
 
-unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
   return getAlignment(Ty, false);
 }
 
-unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
-  unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+  unsigned Align = getPrefTypeAlignment(Ty);
   assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
   return Log2_32(Align);
 }
@@ -614,11 +598,6 @@
 
       // Update Ty to refer to current element
       Ty = STy->getElementType(FieldNo);
-    } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
-        unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
-
-        // Offset into union is canonically 0, but type changes
-        Ty = UnTy->getElementType(FieldNo);
     } else {
       // Update Ty to refer to current element
       Ty = cast<SequentialType>(Ty)->getElementType();

Modified: llvm/branches/wendling/eh/lib/Target/TargetELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/TargetELFWriterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/TargetELFWriterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/TargetELFWriterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -17,9 +17,8 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
-  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
-  isLittleEndian = TM.getTargetData()->isLittleEndian();
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+  is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
 }
 
 TargetELFWriterInfo::~TargetELFWriterInfo() {}

Modified: llvm/branches/wendling/eh/lib/Target/TargetInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/TargetInstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/TargetInstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/TargetInstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -47,6 +49,62 @@
 TargetInstrInfo::~TargetInstrInfo() {
 }
 
+unsigned
+TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
+                                const InstrItineraryData *ItinData) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Class = MI->getDesc().getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  // The # of u-ops is dynamically determined. The specific target should
+  // override this function to return the right number.
+  return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  unsigned UseClass = UseMI->getDesc().getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+  return (DefCycle != -1 && DefCycle <= 1);
+}
+
 /// insertNoop - Insert a noop into the instruction stream at the specified
 /// point.
 void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, 

Modified: llvm/branches/wendling/eh/lib/Target/TargetLoweringObjectFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/TargetLoweringObjectFile.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/TargetLoweringObjectFile.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/TargetLoweringObjectFile.cpp Tue Oct 26 19:48:03 2010
@@ -45,6 +45,7 @@
   LSDASection = 0;
   EHFrameSection = 0;
 
+  CommDirectiveSupportsAlignment = true;
   DwarfAbbrevSection = 0;
   DwarfInfoSection = 0;
   DwarfLineSection = 0;

Modified: llvm/branches/wendling/eh/lib/Target/TargetRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/TargetRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/TargetRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/TargetRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -63,7 +63,7 @@
 /// getAllocatableSetForRC - Toggle the bits that represent allocatable
 /// registers for the specific register class.
 static void getAllocatableSetForRC(const MachineFunction &MF,
-                                   const TargetRegisterClass *RC, BitVector &R){  
+                                   const TargetRegisterClass *RC, BitVector &R){
   for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
          E = RC->allocation_order_end(MF); I != E; ++I)
     R.set(*I);
@@ -74,12 +74,16 @@
   BitVector Allocatable(NumRegs);
   if (RC) {
     getAllocatableSetForRC(MF, RC, Allocatable);
-    return Allocatable;
+  } else {
+    for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+         E = regclass_end(); I != E; ++I)
+      getAllocatableSetForRC(MF, *I, Allocatable);
   }
 
-  for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
-         E = regclass_end(); I != E; ++I)
-    getAllocatableSetForRC(MF, *I, Allocatable);
+  // Mask out the reserved registers
+  BitVector Reserved = getReservedRegs(MF);
+  Allocatable ^= Reserved & Allocatable;
+
   return Allocatable;
 }
 

Modified: llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmLexer.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmLexer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmLexer.cpp Tue Oct 26 19:48:03 2010
@@ -65,9 +65,10 @@
   }
 };
 
-}
+} // end anonymous namespace
 
-static unsigned MatchRegisterName(StringRef Name);
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
 
 AsmToken X86AsmLexer::LexTokenATT() {
   AsmToken lexedToken = lexDefinite();
@@ -162,7 +163,3 @@
   RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
   RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
 }
-
-#define REGISTERS_ONLY
-#include "X86GenAsmMatcher.inc"
-#undef REGISTERS_ONLY

Modified: llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/AsmParser/X86AsmParser.cpp Tue Oct 26 19:48:03 2010
@@ -10,18 +10,21 @@
 #include "llvm/Target/TargetAsmParser.h"
 #include "X86.h"
 #include "X86Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 namespace {
@@ -39,8 +42,6 @@
 
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
-  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
 
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
@@ -50,16 +51,15 @@
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
-  bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCInst &Inst);
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
 
   /// @name Auto-generated Matcher Functions
   /// {
 
-  unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
-
-  bool MatchInstructionImpl(
-    const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
 
   /// }
 
@@ -148,6 +148,8 @@
   /// getEndLoc - Get the location of the last token of this operand.
   SMLoc getEndLoc() const { return EndLoc; }
 
+  virtual void dump(raw_ostream &OS) const {}
+
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
     return StringRef(Tok.Data, Tok.Length);
@@ -267,10 +269,6 @@
       !getMemIndexReg() && getMemScale() == 1;
   }
 
-  bool isNoSegMem() const {
-    return Kind == Memory && !getMemSegReg();
-  }
-
   bool isReg() const { return Kind == Register; }
 
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
@@ -305,14 +303,6 @@
     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
   }
 
-  void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
-    assert((N == 4) && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
-    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
-    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
-    addExpr(Inst, getMemDisp());
-  }
-
   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
     X86Operand *Res = new X86Operand(Token, Loc, Loc);
     Res->Tok.Data = Str.data();
@@ -384,14 +374,18 @@
   // validation later, so maybe there is no need for this here.
   RegNo = MatchRegisterName(Tok.getString());
 
+  // If the match failed, try the register name as lowercase.
+  if (RegNo == 0)
+    RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
   // FIXME: This should be done using Requires<In32BitMode> and
   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
   // can be also checked.
   if (RegNo == X86::RIZ && !Is64Bit)
     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 
-  // Parse %st(1) and "%st" as "%st(0)"
-  if (RegNo == 0 && Tok.getString() == "st") {
+  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
     RegNo = X86::ST0;
     EndLoc = Tok.getLoc();
     Parser.Lex(); // Eat 'st'
@@ -597,7 +591,7 @@
         }
       }
     } else if (getLexer().isNot(AsmToken::RParen)) {
-      // Otherwise we have the unsupported form of a scale amount without an
+      // A scale amount without an index is ignored.
       // index.
       SMLoc Loc = Parser.getTok().getLoc();
 
@@ -605,8 +599,9 @@
       if (getParser().ParseAbsoluteExpression(Value))
         return 0;
 
-      Error(Loc, "cannot have scale factor without index register");
-      return 0;
+      if (Value != 1)
+        Warning(Loc, "scale factor without index register is ignored");
+      Scale = 1;
     }
   }
 
@@ -625,33 +620,6 @@
 bool X86ATTAsmParser::
 ParseInstruction(StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  // The various flavors of pushf and popf use Requires<In32BitMode> and
-  // Requires<In64BitMode>, but the assembler doesn't yet implement that.
-  // For now, just do a manual check to prevent silent misencoding.
-  if (Is64Bit) {
-    if (Name == "popfl")
-      return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
-    else if (Name == "pushfl")
-      return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
-    else if (Name == "pusha")
-      return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
-  } else {
-    if (Name == "popfq")
-      return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
-    else if (Name == "pushfq")
-      return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
-  }
-
-  // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
-  // the form jrcxz is not allowed in 32-bit mode.
-  if (Is64Bit) {
-    if (Name == "jcxz")
-      return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
-  } else {
-    if (Name == "jrcxz")
-      return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
-  }
-
   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
   // represent alternative syntaxes in the .td file, without requiring
   // instruction duplication.
@@ -664,47 +632,77 @@
     .Case("repe", "rep")
     .Case("repz", "rep")
     .Case("repnz", "repne")
+    .Case("iret", "iretl")
+    .Case("sysret", "sysretl")
+    .Case("cbw",  "cbtw")
+    .Case("cwd",  "cwtd")
+    .Case("cdq", "cltd")
+    .Case("cwde", "cwtl")
+    .Case("cdqe", "cltq")
+    .Case("smovb", "movsb")
+    .Case("smovw", "movsw")
+    .Case("smovl", "movsl")
+    .Case("smovq", "movsq")
+    .Case("push", Is64Bit ? "pushq" : "pushl")
+    .Case("pop", Is64Bit ? "popq" : "popl")
     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
+    .Case("pushfd", "pushfl")
+    .Case("popfd",  "popfl")
     .Case("retl", Is64Bit ? "retl" : "ret")
     .Case("retq", Is64Bit ? "ret" : "retq")
-    .Case("setz", "sete")
-    .Case("setnz", "setne")
-    .Case("jz", "je")
-    .Case("jnz", "jne")
-    .Case("jc", "jb")
-    // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
-    // jecxz requires an AdSize prefix but jecxz does not have a prefix in
-    // 32-bit mode.
-    .Case("jecxz", "jcxz")
-    .Case("jrcxz", "jcxz")
-    .Case("jna", "jbe")
-    .Case("jnae", "jb")
-    .Case("jnb", "jae")
-    .Case("jnbe", "ja")
-    .Case("jnc", "jae")
-    .Case("jng", "jle")
-    .Case("jnge", "jl")
-    .Case("jnl", "jge")
-    .Case("jnle", "jg")
-    .Case("jpe", "jp")
-    .Case("jpo", "jnp")
-    .Case("cmovcl", "cmovbl")
-    .Case("cmovcl", "cmovbl")
-    .Case("cmovnal", "cmovbel")
-    .Case("cmovnbl", "cmovael")
-    .Case("cmovnbel", "cmoval")
-    .Case("cmovncl", "cmovael")
-    .Case("cmovngl", "cmovlel")
-    .Case("cmovnl", "cmovgel")
-    .Case("cmovngl", "cmovlel")
-    .Case("cmovngel", "cmovll")
-    .Case("cmovnll", "cmovgel")
-    .Case("cmovnlel", "cmovgl")
-    .Case("cmovnzl", "cmovnel")
-    .Case("cmovzl", "cmovel")
+    .Case("setz", "sete")  .Case("setnz", "setne")
+    .Case("setc", "setb")  .Case("setna", "setbe")
+    .Case("setnae", "setb").Case("setnb", "setae")
+    .Case("setnbe", "seta").Case("setnc", "setae")
+    .Case("setng", "setle").Case("setnge", "setl")
+    .Case("setnl", "setge").Case("setnle", "setg")
+    .Case("setpe", "setp") .Case("setpo", "setnp")
+    .Case("jz", "je")  .Case("jnz", "jne")
+    .Case("jc", "jb")  .Case("jna", "jbe")
+    .Case("jnae", "jb").Case("jnb", "jae")
+    .Case("jnbe", "ja").Case("jnc", "jae")
+    .Case("jng", "jle").Case("jnge", "jl")
+    .Case("jnl", "jge").Case("jnle", "jg")
+    .Case("jpe", "jp") .Case("jpo", "jnp")
+    // Condition code aliases for 16-bit, 32-bit, 64-bit and unspec operands.
+    .Case("cmovcw",  "cmovbw") .Case("cmovcl",  "cmovbl")
+    .Case("cmovcq",  "cmovbq") .Case("cmovc",   "cmovb")
+    .Case("cmovnaew","cmovbw") .Case("cmovnael","cmovbl")
+    .Case("cmovnaeq","cmovbq") .Case("cmovnae", "cmovb")
+    .Case("cmovnaw", "cmovbew").Case("cmovnal", "cmovbel")
+    .Case("cmovnaq", "cmovbeq").Case("cmovna",  "cmovbe")
+    .Case("cmovnbw", "cmovaew").Case("cmovnbl", "cmovael")
+    .Case("cmovnbq", "cmovaeq").Case("cmovnb",  "cmovae")
+    .Case("cmovnbew","cmovaw") .Case("cmovnbel","cmoval")
+    .Case("cmovnbeq","cmovaq") .Case("cmovnbe", "cmova")
+    .Case("cmovncw", "cmovaew").Case("cmovncl", "cmovael")
+    .Case("cmovncq", "cmovaeq").Case("cmovnc",  "cmovae")
+    .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
+    .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
+    .Case("cmovnw",  "cmovgew").Case("cmovnl",  "cmovgel")
+    .Case("cmovnq",  "cmovgeq").Case("cmovn",   "cmovge")
+    .Case("cmovngw", "cmovlew").Case("cmovngl", "cmovlel")
+    .Case("cmovngq", "cmovleq").Case("cmovng",  "cmovle")
+    .Case("cmovngew","cmovlw") .Case("cmovngel","cmovll")
+    .Case("cmovngeq","cmovlq") .Case("cmovnge", "cmovl")
+    .Case("cmovnlw", "cmovgew").Case("cmovnll", "cmovgel")
+    .Case("cmovnlq", "cmovgeq").Case("cmovnl",  "cmovge")
+    .Case("cmovnlew","cmovgw") .Case("cmovnlel","cmovgl")
+    .Case("cmovnleq","cmovgq") .Case("cmovnle", "cmovg")
+    .Case("cmovnzw", "cmovnew").Case("cmovnzl", "cmovnel")
+    .Case("cmovnzq", "cmovneq").Case("cmovnz",  "cmovne")
+    .Case("cmovzw",  "cmovew") .Case("cmovzl",  "cmovel")
+    .Case("cmovzq",  "cmoveq") .Case("cmovz",   "cmove")
+    // Floating point stack cmov aliases.
+    .Case("fcmovz", "fcmove")
+    .Case("fcmova", "fcmovnbe")
+    .Case("fcmovnae", "fcmovb")
+    .Case("fcmovna", "fcmovbe")
+    .Case("fcmovae", "fcmovnb")
     .Case("fwait", "wait")
-    .Case("movzx", "movzb")
+    .Case("movzx", "movzb")  // FIXME: Not correct.
+    .Case("fildq", "fildll")
     .Default(Name);
 
   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
@@ -781,12 +779,24 @@
       PatchedName = "vpclmulqdq";
     }
   }
+
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
   if (ExtraImmOp)
     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+  // Determine whether this is an instruction prefix.
+  bool isPrefix =
+    PatchedName == "lock" || PatchedName == "rep" ||
+    PatchedName == "repne";
+
+
+  // This does the actual operand parsing.  Don't parse any more if we have a
+  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+  // just want to parse the "lock" as the first instruction and the "incl" as
+  // the next one.
+  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
 
     // Parse '*' modifier.
     if (getLexer().is(AsmToken::Star)) {
@@ -798,8 +808,10 @@
     // Read the first operand.
     if (X86Operand *Op = ParseOperand())
       Operands.push_back(Op);
-    else
+    else {
+      Parser.EatToEndOfStatement();
       return true;
+    }
 
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();  // Eat the comma.
@@ -807,20 +819,103 @@
       // Parse and remember the operand.
       if (X86Operand *Op = ParseOperand())
         Operands.push_back(Op);
-      else
+      else {
+        Parser.EatToEndOfStatement();
         return true;
+      }
     }
+
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      Parser.EatToEndOfStatement();
+      return TokError("unexpected token in argument list");
+    }
+  }
+
+  if (getLexer().is(AsmToken::EndOfStatement))
+    Parser.Lex(); // Consume the EndOfStatement
+
+  // Hack to allow 'movq <largeimm>, <reg>' as an alias for movabsq.
+  if ((Name == "movq" || Name == "mov") && Operands.size() == 3 &&
+      static_cast<X86Operand*>(Operands[2])->isReg() &&
+      static_cast<X86Operand*>(Operands[1])->isImm() &&
+      !static_cast<X86Operand*>(Operands[1])->isImmSExti64i32()) {
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken("movabsq", NameLoc);
   }
 
-  // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
+  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
+  // "shift <op>".
   if ((Name.startswith("shr") || Name.startswith("sar") ||
        Name.startswith("shl")) &&
-      Operands.size() == 3 &&
-      static_cast<X86Operand*>(Operands[1])->isImm() &&
-      isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
-      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
-    delete Operands[1];
-    Operands.erase(Operands.begin() + 1);
+      Operands.size() == 3) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+      delete Operands[1];
+      Operands.erase(Operands.begin() + 1);
+    }
+  }
+
+  // FIXME: Hack to handle recognize "rc[lr] <op>" -> "rcl $1, <op>".
+  if ((Name.startswith("rcl") || Name.startswith("rcr")) &&
+      Operands.size() == 2) {
+    const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext());
+    Operands.push_back(X86Operand::CreateImm(One, NameLoc, NameLoc));
+    std::swap(Operands[1], Operands[2]);
+  }
+
+  // FIXME: Hack to handle recognize "sh[lr]d op,op" -> "shld $1, op,op".
+  if ((Name.startswith("shld") || Name.startswith("shrd")) &&
+      Operands.size() == 3) {
+    const MCExpr *One = MCConstantExpr::Create(1, getParser().getContext());
+    Operands.insert(Operands.begin()+1,
+                    X86Operand::CreateImm(One, NameLoc, NameLoc));
+  }
+
+
+  // FIXME: Hack to handle recognize "in[bwl] <op>".  Canonicalize it to
+  // "inb <op>, %al".
+  if ((Name == "inb" || Name == "inw" || Name == "inl") &&
+      Operands.size() == 2) {
+    unsigned Reg;
+    if (Name[2] == 'b')
+      Reg = MatchRegisterName("al");
+    else if (Name[2] == 'w')
+      Reg = MatchRegisterName("ax");
+    else
+      Reg = MatchRegisterName("eax");
+    SMLoc Loc = Operands.back()->getEndLoc();
+    Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
+  }
+
+  // FIXME: Hack to handle recognize "out[bwl] <op>".  Canonicalize it to
+  // "outb %al, <op>".
+  if ((Name == "outb" || Name == "outw" || Name == "outl") &&
+      Operands.size() == 2) {
+    unsigned Reg;
+    if (Name[3] == 'b')
+      Reg = MatchRegisterName("al");
+    else if (Name[3] == 'w')
+      Reg = MatchRegisterName("ax");
+    else
+      Reg = MatchRegisterName("eax");
+    SMLoc Loc = Operands.back()->getEndLoc();
+    Operands.push_back(X86Operand::CreateReg(Reg, Loc, Loc));
+    std::swap(Operands[1], Operands[2]);
+  }
+
+  // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+      Operands.size() == 3) {
+    X86Operand &Op = *(X86Operand*)Operands.back();
+    if (Op.isMem() && Op.Mem.SegReg == 0 &&
+        isa<MCConstantExpr>(Op.Mem.Disp) &&
+        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+      SMLoc Loc = Op.getEndLoc();
+      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+      delete &Op;
+    }
   }
 
   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
@@ -834,6 +929,206 @@
     Operands.erase(Operands.begin() + 2);
   }
 
+  // FIXME: Hack to handle "f{mulp,addp} st(0), $op" the same as
+  // "f{mulp,addp} $op", since they commute.  We also allow fdivrp/fsubrp even
+  // though they don't commute, solely because gas does support this.
+  if ((Name=="fmulp" || Name=="faddp" || Name=="fsubrp" || Name=="fdivrp") &&
+      Operands.size() == 3 &&
+      static_cast<X86Operand*>(Operands[1])->isReg() &&
+      static_cast<X86Operand*>(Operands[1])->getReg() == X86::ST0) {
+    delete Operands[1];
+    Operands.erase(Operands.begin() + 1);
+  }
+
+  // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
+  // B".
+  if (Name.startswith("imul") && Operands.size() == 3 &&
+      static_cast<X86Operand*>(Operands[1])->isImm() &&
+      static_cast<X86Operand*>(Operands.back())->isReg()) {
+    X86Operand *Op = static_cast<X86Operand*>(Operands.back());
+    Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
+                                             Op->getEndLoc()));
+  }
+
+  // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+  // effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
+  // errors, since its encoding is the most compact.
+  if (Name == "sldt" && Operands.size() == 2 &&
+      static_cast<X86Operand*>(Operands[1])->isMem()) {
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken("sldtw", NameLoc);
+  }
+
+  // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
+  // synonyms.  Our tables only have the "<reg>, <mem>" form, so if we see the
+  // other operand order, swap them.
+  if (Name == "xchgb" || Name == "xchgw" || Name == "xchgl" || Name == "xchgq"||
+      Name == "xchg")
+    if (Operands.size() == 3 &&
+        static_cast<X86Operand*>(Operands[1])->isMem() &&
+        static_cast<X86Operand*>(Operands[2])->isReg()) {
+      std::swap(Operands[1], Operands[2]);
+    }
+
+  // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
+  // synonyms.  Our tables only have the "<mem>, <reg>" form, so if we see the
+  // other operand order, swap them.
+  if (Name == "testb" || Name == "testw" || Name == "testl" || Name == "testq"||
+      Name == "test")
+    if (Operands.size() == 3 &&
+        static_cast<X86Operand*>(Operands[1])->isReg() &&
+        static_cast<X86Operand*>(Operands[2])->isMem()) {
+      std::swap(Operands[1], Operands[2]);
+    }
+
+  // The assembler accepts these instructions with no operand as a synonym for
+  // an instruction acting on st(1).  e.g. "fxch" -> "fxch %st(1)".
+  if ((Name == "fxch" || Name == "fucom" || Name == "fucomp" ||
+       Name == "faddp" || Name == "fsubp" || Name == "fsubrp" ||
+       Name == "fmulp" || Name == "fdivp" || Name == "fdivrp") &&
+      Operands.size() == 1) {
+    Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
+                                             NameLoc, NameLoc));
+  }
+
+  // The assembler accepts these instructions with two few operands as a synonym
+  // for taking %st(1),%st(0) or X, %st(0).
+  if ((Name == "fcomi" || Name == "fucomi") && Operands.size() < 3) {
+    if (Operands.size() == 1)
+      Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
+                                               NameLoc, NameLoc));
+    Operands.push_back(X86Operand::CreateReg(MatchRegisterName("st(0)"),
+                                             NameLoc, NameLoc));
+  }
+
+  // The assembler accepts various amounts of brokenness for fnstsw.
+  if (Name == "fnstsw") {
+    if (Operands.size() == 2 &&
+        static_cast<X86Operand*>(Operands[1])->isReg()) {
+      // "fnstsw al" and "fnstsw eax" -> "fnstw"
+      unsigned Reg = static_cast<X86Operand*>(Operands[1])->Reg.RegNo;
+      if (Reg == MatchRegisterName("eax") ||
+          Reg == MatchRegisterName("al")) {
+        delete Operands[1];
+        Operands.pop_back();
+      }
+    }
+
+    // "fnstw" -> "fnstw %ax"
+    if (Operands.size() == 1)
+      Operands.push_back(X86Operand::CreateReg(MatchRegisterName("ax"),
+                                               NameLoc, NameLoc));
+  }
+
+  // jmp $42,$5 -> ljmp, similarly for call.
+  if ((Name.startswith("call") || Name.startswith("jmp")) &&
+      Operands.size() == 3 &&
+      static_cast<X86Operand*>(Operands[1])->isImm() &&
+      static_cast<X86Operand*>(Operands[2])->isImm()) {
+    const char *NewOpName = StringSwitch<const char *>(Name)
+      .Case("jmp", "ljmp")
+      .Case("jmpw", "ljmpw")
+      .Case("jmpl", "ljmpl")
+      .Case("jmpq", "ljmpq")
+      .Case("call", "lcall")
+      .Case("callw", "lcallw")
+      .Case("calll", "lcalll")
+      .Case("callq", "lcallq")
+    .Default(0);
+    if (NewOpName) {
+      delete Operands[0];
+      Operands[0] = X86Operand::CreateToken(NewOpName, NameLoc);
+      Name = NewOpName;
+    }
+  }
+
+  // lcall  and ljmp  -> lcalll and ljmpl
+  if ((Name == "lcall" || Name == "ljmp") && Operands.size() == 3) {
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(Name == "lcall" ? "lcalll" : "ljmpl",
+                                          NameLoc);
+  }
+
+  // call foo is not ambiguous with callw.
+  if (Name == "call" && Operands.size() == 2) {
+    const char *NewName = Is64Bit ? "callq" : "calll";
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(NewName, NameLoc);
+    Name = NewName;
+  }
+
+  // movsd -> movsl (when no operands are specified).
+  if (Name == "movsd" && Operands.size() == 1) {
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken("movsl", NameLoc);
+  }
+
+  // fstp <mem> -> fstps <mem>.  Without this, we'll default to fstpl due to
+  // suffix searching.
+  if (Name == "fstp" && Operands.size() == 2 &&
+      static_cast<X86Operand*>(Operands[1])->isMem()) {
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken("fstps", NameLoc);
+  }
+
+
+  // "clr <reg>" -> "xor <reg>, <reg>".
+  if ((Name == "clrb" || Name == "clrw" || Name == "clrl" || Name == "clrq" ||
+       Name == "clr") && Operands.size() == 2 &&
+      static_cast<X86Operand*>(Operands[1])->isReg()) {
+    unsigned RegNo = static_cast<X86Operand*>(Operands[1])->getReg();
+    Operands.push_back(X86Operand::CreateReg(RegNo, NameLoc, NameLoc));
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken("xor", NameLoc);
+  }
+
+  // FIXME: Hack to handle recognize "aa[dm]" -> "aa[dm] $0xA".
+  if ((Name.startswith("aad") || Name.startswith("aam")) &&
+      Operands.size() == 1) {
+    const MCExpr *A = MCConstantExpr::Create(0xA, getParser().getContext());
+    Operands.push_back(X86Operand::CreateImm(A, NameLoc, NameLoc));
+  }
+
+  // "lgdtl" is not ambiguous 32-bit mode and is the same as "lgdt".
+  // "lgdtq" is not ambiguous 64-bit mode and is the same as "lgdt".
+  if ((Name == "lgdtl" && Is64Bit == false) ||
+      (Name == "lgdtq" && Is64Bit == true)) {
+    const char *NewName = "lgdt";
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(NewName, NameLoc);
+    Name = NewName;
+  }
+
+  // "lidtl" is not ambiguous 32-bit mode and is the same as "lidt".
+  // "lidtq" is not ambiguous 64-bit mode and is the same as "lidt".
+  if ((Name == "lidtl" && Is64Bit == false) ||
+      (Name == "lidtq" && Is64Bit == true)) {
+    const char *NewName = "lidt";
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(NewName, NameLoc);
+    Name = NewName;
+  }
+
+  // "sgdtl" is not ambiguous 32-bit mode and is the same as "sgdt".
+  // "sgdtq" is not ambiguous 64-bit mode and is the same as "sgdt".
+  if ((Name == "sgdtl" && Is64Bit == false) ||
+      (Name == "sgdtq" && Is64Bit == true)) {
+    const char *NewName = "sgdt";
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(NewName, NameLoc);
+    Name = NewName;
+  }
+
+  // "sidtl" is not ambiguous 32-bit mode and is the same as "sidt".
+  // "sidtq" is not ambiguous 64-bit mode and is the same as "sidt".
+  if ((Name == "sidtl" && Is64Bit == false) ||
+      (Name == "sidtq" && Is64Bit == true)) {
+    const char *NewName = "sidt";
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(NewName, NameLoc);
+    Name = NewName;
+  }
+
   return false;
 }
 
@@ -869,20 +1164,55 @@
   return false;
 }
 
-bool
-X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
-                                    &Operands,
-                                  MCInst &Inst) {
-  // First, try a direct match.
-  if (!MatchInstructionImpl(Operands, Inst))
-    return false;
 
-  // Ignore anything which is obviously not a suffix match.
-  if (Operands.size() == 0)
-    return true;
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
-  if (!Op->isToken() || Op->getToken().size() > 15)
+  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+  // First, handle aliases that expand to multiple instructions.
+  // FIXME: This should be replaced with a real .td file alias mechanism.
+  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+      Op->getToken() == "fstenv") {
+    MCInst Inst;
+    Inst.setOpcode(X86::WAIT);
+    Out.EmitInstruction(Inst);
+
+    const char *Repl =
+      StringSwitch<const char*>(Op->getToken())
+        .Case("finit", "fninit")
+        .Case("fsave", "fnsave")
+        .Case("fstcw", "fnstcw")
+        .Case("fstenv", "fnstenv")
+        .Case("fstsw", "fnstsw")
+        .Default(0);
+    assert(Repl && "Unknown wait-prefixed instruction");
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+  }
+
+  bool WasOriginallyInvalidOperand = false;
+  unsigned OrigErrorInfo;
+  MCInst Inst;
+
+  // First, try a direct match.
+  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
     return true;
+  case Match_InvalidOperand:
+    WasOriginallyInvalidOperand = true;
+    break;
+  case Match_MnemonicFail:
+    break;
+  }
 
   // FIXME: Ideally, we would only attempt suffix matches for things which are
   // valid prefixes, and we could just infer the right unambiguous
@@ -890,20 +1220,22 @@
   // following hack.
 
   // Change the operand to point to a temporary token.
-  char Tmp[16];
   StringRef Base = Op->getToken();
-  memcpy(Tmp, Base.data(), Base.size());
-  Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
+  SmallString<16> Tmp;
+  Tmp += Base;
+  Tmp += ' ';
+  Op->setTokenValue(Tmp.str());
 
   // Check for the various suffix matches.
   Tmp[Base.size()] = 'b';
-  bool MatchB = MatchInstructionImpl(Operands, Inst);
+  unsigned BErrorInfo, WErrorInfo, LErrorInfo, QErrorInfo;
+  MatchResultTy MatchB = MatchInstructionImpl(Operands, Inst, BErrorInfo);
   Tmp[Base.size()] = 'w';
-  bool MatchW = MatchInstructionImpl(Operands, Inst);
+  MatchResultTy MatchW = MatchInstructionImpl(Operands, Inst, WErrorInfo);
   Tmp[Base.size()] = 'l';
-  bool MatchL = MatchInstructionImpl(Operands, Inst);
+  MatchResultTy MatchL = MatchInstructionImpl(Operands, Inst, LErrorInfo);
   Tmp[Base.size()] = 'q';
-  bool MatchQ = MatchInstructionImpl(Operands, Inst);
+  MatchResultTy MatchQ = MatchInstructionImpl(Operands, Inst, QErrorInfo);
 
   // Restore the old token.
   Op->setTokenValue(Base);
@@ -911,10 +1243,88 @@
   // If exactly one matched, then we treat that as a successful match (and the
   // instruction will already have been filled in correctly, since the failing
   // matches won't have modified it).
-  if (MatchB + MatchW + MatchL + MatchQ == 3)
+  unsigned NumSuccessfulMatches =
+    (MatchB == Match_Success) + (MatchW == Match_Success) +
+    (MatchL == Match_Success) + (MatchQ == Match_Success);
+  if (NumSuccessfulMatches == 1) {
+    Out.EmitInstruction(Inst);
     return false;
+  }
+
+  // Otherwise, the match failed, try to produce a decent error message.
+
+  // If we had multiple suffix matches, then identify this as an ambiguous
+  // match.
+  if (NumSuccessfulMatches > 1) {
+    char MatchChars[4];
+    unsigned NumMatches = 0;
+    if (MatchB == Match_Success)
+      MatchChars[NumMatches++] = 'b';
+    if (MatchW == Match_Success)
+      MatchChars[NumMatches++] = 'w';
+    if (MatchL == Match_Success)
+      MatchChars[NumMatches++] = 'l';
+    if (MatchQ == Match_Success)
+      MatchChars[NumMatches++] = 'q';
+
+    SmallString<126> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ambiguous instructions require an explicit suffix (could be ";
+    for (unsigned i = 0; i != NumMatches; ++i) {
+      if (i != 0)
+        OS << ", ";
+      if (i + 1 == NumMatches)
+        OS << "or ";
+      OS << "'" << Base << MatchChars[i] << "'";
+    }
+    OS << ")";
+    Error(IDLoc, OS.str());
+    return true;
+  }
+
+  // Okay, we know that none of the variants matched successfully.
+
+  // If all of the instructions reported an invalid mnemonic, then the original
+  // mnemonic was invalid.
+  if ((MatchB == Match_MnemonicFail) && (MatchW == Match_MnemonicFail) &&
+      (MatchL == Match_MnemonicFail) && (MatchQ == Match_MnemonicFail)) {
+    if (!WasOriginallyInvalidOperand) {
+      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+      return true;
+    }
+
+    // Recover location info for the operand if we know which was the problem.
+    SMLoc ErrorLoc = IDLoc;
+    if (OrigErrorInfo != ~0U) {
+      if (OrigErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  // If one instruction matched with a missing feature, report this as a
+  // missing feature.
+  if ((MatchB == Match_MissingFeature) + (MatchW == Match_MissingFeature) +
+      (MatchL == Match_MissingFeature) + (MatchQ == Match_MissingFeature) == 1){
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  }
+
+  // If one instruction matched with an invalid operand, report this as an
+  // operand failure.
+  if ((MatchB == Match_InvalidOperand) + (MatchW == Match_InvalidOperand) +
+      (MatchL == Match_InvalidOperand) + (MatchQ == Match_InvalidOperand) == 1){
+    Error(IDLoc, "invalid operand for instruction");
+    return true;
+  }
 
-  // Otherwise, the match failed.
+  // If all of these were an outright failure, report it in a useless way.
+  // FIXME: We should give nicer diagnostics about the exact failure.
+  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
   return true;
 }
 
@@ -928,4 +1338,6 @@
   LLVMInitializeX86AsmLexer();
 }
 
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
 #include "X86GenAsmMatcher.inc"

Modified: llvm/branches/wendling/eh/lib/Target/X86/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -50,4 +50,3 @@
 
 add_llvm_target(X86CodeGen ${sources})
 
-target_link_libraries (LLVMX86CodeGen LLVMX86AsmPrinter LLVMSelectionDAG)

Modified: llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.cpp Tue Oct 26 19:48:03 2010
@@ -157,9 +157,8 @@
 /// @param immediate    - The immediate value to append.
 /// @param operand      - The operand, as stored in the descriptor table.
 /// @param insn         - The internal instruction.
-static void translateImmediate(MCInst &mcInst, 
-                               uint64_t immediate, 
-                               OperandSpecifier &operand,
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+                               const OperandSpecifier &operand,
                                InternalInstruction &insn) {
   // Sign-extend the immediate if necessary.
 
@@ -392,9 +391,8 @@
 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
 ///                       from.
 /// @return             - 0 on success; nonzero otherwise
-static bool translateRM(MCInst &mcInst,
-                       OperandSpecifier &operand,
-                       InternalInstruction &insn) {
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+                        InternalInstruction &insn) {
   switch (operand.type) {
   default:
     debug("Unexpected type for a R/M operand");
@@ -461,9 +459,8 @@
 /// @param operand      - The operand, as stored in the descriptor table.
 /// @param insn         - The internal instruction.
 /// @return             - false on success; true otherwise.
-static bool translateOperand(MCInst &mcInst,
-                            OperandSpecifier &operand,
-                            InternalInstruction &insn) {
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+                             InternalInstruction &insn) {
   switch (operand.encoding) {
   default:
     debug("Unhandled operand encoding during translation");

Modified: llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86Disassembler.h Tue Oct 26 19:48:03 2010
@@ -78,7 +78,7 @@
   const char*             name;
 
 #define INSTRUCTION_IDS               \
-  InstrUID*  instructionIDs;
+  const InstrUID *instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
 

Modified: llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c Tue Oct 26 19:48:03 2010
@@ -27,12 +27,6 @@
 
 typedef int8_t bool;
 
-#ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
-#else
-#define NORETURN
-#endif
-
 #ifndef NDEBUG
 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
 #else
@@ -103,7 +97,7 @@
                        InstructionContext insnContext,
                        uint8_t opcode,
                        uint8_t modRM) {
-  struct ModRMDecision* dec;
+  const struct ModRMDecision* dec;
   
   switch (type) {
   default:
@@ -147,7 +141,7 @@
  *              decode(); specifierForUID will not check bounds.
  * @return    - A pointer to the specification for that instruction.
  */
-static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
   return &INSTRUCTIONS_SYM[uid];
 }
 
@@ -405,7 +399,7 @@
     insn->registerSize       = (hasOpSize ? 2 : 4);
     insn->addressSize        = (hasAdSize ? 2 : 4);
     insn->displacementSize   = (hasAdSize ? 2 : 4);
-    insn->immediateSize      = (hasAdSize ? 2 : 4);
+    insn->immediateSize      = (hasOpSize ? 2 : 4);
   } else if (insn->mode == MODE_64BIT) {
     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
       insn->registerSize       = 8;
@@ -632,9 +626,9 @@
      * instead of F2 changes a 32 to a 64, we adopt the new encoding.
      */
     
-    struct InstructionSpecifier* spec;
+    const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithREXw;
-    struct InstructionSpecifier* specWithREXw;
+    const struct InstructionSpecifier *specWithREXw;
     
     spec = specifierForUID(instructionID);
     
@@ -672,9 +666,9 @@
      * in the right place we check if there's a 16-bit operation.
      */
     
-    struct InstructionSpecifier* spec;
+    const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithOpsize;
-    struct InstructionSpecifier* specWithOpsize;
+    const struct InstructionSpecifier *specWithOpsize;
     
     spec = specifierForUID(instructionID);
     
@@ -1067,7 +1061,7 @@
  *                invalid for its class.
  */
 static int fixupReg(struct InternalInstruction *insn, 
-                    struct OperandSpecifier *op) {
+                    const struct OperandSpecifier *op) {
   uint8_t valid;
   
   dbgprintf(insn, "fixupReg()");

Modified: llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h Tue Oct 26 19:48:03 2010
@@ -24,7 +24,7 @@
   const char*             name;
 
 #define INSTRUCTION_IDS     \
-  InstrUID*  instructionIDs;
+  const InstrUID *instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
   
@@ -423,7 +423,7 @@
   /* The instruction ID, extracted from the decode table */
   uint16_t instructionID;
   /* The specifier for the instruction, from the instruction info table */
-  struct InstructionSpecifier* spec;
+  const struct InstructionSpecifier *spec;
   
   /* state for additional bytes, consumed during operand decode.  Pattern:
      consumed___ indicates that the byte was already consumed and does not

Modified: llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h Tue Oct 26 19:48:03 2010
@@ -248,6 +248,7 @@
   ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
   ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
   ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
+  ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
   ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
   ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
   ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \

Propchange: llvm/branches/wendling/eh/lib/Target/X86/InstPrinter/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Oct 26 19:48:03 2010
@@ -0,0 +1,9 @@
+Debug
+Release
+Release-Asserts
+Debug+Coverage-Asserts
+Debug+Coverage
+Release+Coverage
+Debug+Checks
+Debug+Asserts
+Release+Asserts

Modified: llvm/branches/wendling/eh/lib/Target/X86/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/Makefile?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/Makefile (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/Makefile Tue Oct 26 19:48:03 2010
@@ -20,6 +20,6 @@
                 X86GenCallingConv.inc X86GenSubtarget.inc \
 		X86GenEDInfo.inc
 
-DIRS = AsmPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common

Modified: llvm/branches/wendling/eh/lib/Target/X86/README-SSE.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/README-SSE.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/README-SSE.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/README-SSE.txt Tue Oct 26 19:48:03 2010
@@ -2,8 +2,65 @@
 // Random ideas for the X86 backend: SSE-specific stuff.
 //===---------------------------------------------------------------------===//
 
-- Consider eliminating the unaligned SSE load intrinsics, replacing them with
-  unaligned LLVM load instructions.
+//===---------------------------------------------------------------------===//
+
+SSE Variable shift can be custom lowered to something like this, which uses a
+small table + unaligned load + shuffle instead of going through memory.
+
+__m128i_shift_right:
+	.byte	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15
+	.byte	 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+
+...
+__m128i shift_right(__m128i value, unsigned long offset) {
+  return _mm_shuffle_epi8(value,
+               _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
+}
+
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them.   For example, this should turn into a horizontal add:
+
+typedef float __attribute__((vector_size(16))) v4f32;
+float f32(v4f32 A) {
+  return A[0]+A[1]+A[2]+A[3];
+}
+
+Instead we get this:
+
+_f32:                                   ## @f32
+	pshufd	$1, %xmm0, %xmm1        ## xmm1 = xmm0[1,0,0,0]
+	addss	%xmm0, %xmm1
+	pshufd	$3, %xmm0, %xmm2        ## xmm2 = xmm0[3,0,0,0]
+	movhlps	%xmm0, %xmm0            ## xmm0 = xmm0[1,1]
+	movaps	%xmm0, %xmm3
+	addss	%xmm1, %xmm3
+	movdqa	%xmm2, %xmm0
+	addss	%xmm3, %xmm0
+	ret
+
+Also, there are cases where some simple local SLP would improve codegen a bit.
+compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+  return A+B;
+}
+
+into:
+
+_f32:                                   ## @f32
+	movdqa	%xmm0, %xmm2
+	addss	%xmm1, %xmm2
+	pshufd	$1, %xmm1, %xmm1        ## xmm1 = xmm1[1,0,0,0]
+	pshufd	$1, %xmm0, %xmm3        ## xmm3 = xmm0[1,0,0,0]
+	addss	%xmm1, %xmm3
+	movaps	%xmm2, %xmm0
+	unpcklps	%xmm3, %xmm0    ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+	ret
+
+seems silly when it could just be one addps.
+
 
 //===---------------------------------------------------------------------===//
 

Modified: llvm/branches/wendling/eh/lib/Target/X86/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/README.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/README.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/README.txt Tue Oct 26 19:48:03 2010
@@ -1863,3 +1863,100 @@
 shows up with bitfields.
 
 //===---------------------------------------------------------------------===//
+
+Take the following C code:
+int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %tmp = xor i32 %b, %a                           ; <i32> [#uses=1]
+  %tmp6 = and i32 %tmp, 255                       ; <i32> [#uses=1]
+  %cmp = icmp eq i32 %tmp6, 0                     ; <i1> [#uses=1]
+  %conv5 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  ret i32 %conv5
+}
+
+And the following x86 code:
+	xorl	%esi, %edi
+	testb	$-1, %dil
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+A cmpb instead of the xorl+testb would be one instruction shorter.
+
+//===---------------------------------------------------------------------===//
+
+Given the following C code:
+int f(int a, int b) { return (signed char)a == (signed char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %sext = shl i32 %a, 24                          ; <i32> [#uses=1]
+  %conv1 = ashr i32 %sext, 24                     ; <i32> [#uses=1]
+  %sext6 = shl i32 %b, 24                         ; <i32> [#uses=1]
+  %conv4 = ashr i32 %sext6, 24                    ; <i32> [#uses=1]
+  %cmp = icmp eq i32 %conv1, %conv4               ; <i1> [#uses=1]
+  %conv5 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  ret i32 %conv5
+}
+
+And the following x86 code:
+	movsbl	%sil, %eax
+	movsbl	%dil, %ecx
+	cmpl	%eax, %ecx
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+
+It should be possible to eliminate the sign extensions.
+
+//===---------------------------------------------------------------------===//
+
+LLVM misses a load+store narrowing opportunity in this code:
+
+%struct.bf = type { i64, i16, i16, i32 }
+
+ at bfi = external global %struct.bf*                ; <%struct.bf**> [#uses=2]
+
+define void @t1() nounwind ssp {
+entry:
+  %0 = load %struct.bf** @bfi, align 8            ; <%struct.bf*> [#uses=1]
+  %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1]
+  %2 = bitcast i16* %1 to i32*                    ; <i32*> [#uses=2]
+  %3 = load i32* %2, align 1                      ; <i32> [#uses=1]
+  %4 = and i32 %3, -65537                         ; <i32> [#uses=1]
+  store i32 %4, i32* %2, align 1
+  %5 = load %struct.bf** @bfi, align 8            ; <%struct.bf*> [#uses=1]
+  %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1]
+  %7 = bitcast i16* %6 to i32*                    ; <i32*> [#uses=2]
+  %8 = load i32* %7, align 1                      ; <i32> [#uses=1]
+  %9 = and i32 %8, -131073                        ; <i32> [#uses=1]
+  store i32 %9, i32* %7, align 1
+  ret void
+}
+
+LLVM currently emits this:
+
+  movq  bfi(%rip), %rax
+  andl  $-65537, 8(%rax)
+  movq  bfi(%rip), %rax
+  andl  $-131073, 8(%rax)
+  ret
+
+It could narrow the loads and stores to emit this:
+
+  movq  bfi(%rip), %rax
+  andb  $-2, 10(%rax)
+  movq  bfi(%rip), %rax
+  andb  $-3, 10(%rax)
+  ret
+
+The trouble is that there is a TokenFactor between the store and the
+load, making it non-trivial to determine if there's anything between
+the load and the store which would prohibit narrowing.
+
+//===---------------------------------------------------------------------===//

Modified: llvm/branches/wendling/eh/lib/Target/X86/SSEDomainFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/SSEDomainFix.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/SSEDomainFix.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/SSEDomainFix.cpp Tue Oct 26 19:48:03 2010
@@ -115,7 +115,7 @@
   unsigned Distance;
 
 public:
-  SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+  SSEDomainFixPass() : MachineFunctionPass(ID) {}
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.setPreservesAll();

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86.td Tue Oct 26 19:48:03 2010
@@ -182,7 +182,6 @@
 // Currently the X86 assembly parser only supports ATT syntax.
 def ATTAsmParser : AsmParser {
   string AsmParserClassName = "ATTAsmParser";
-  string MatchInstructionName = "MatchInstructionImpl";
   int Variant = 0;
 
   // Discard comments in assembly strings.
@@ -197,10 +196,12 @@
 def ATTAsmWriter : AsmWriter {
   string AsmWriterClassName  = "ATTInstPrinter";
   int Variant = 0;
+  bit isMCAsmWriter = 1;
 }
 def IntelAsmWriter : AsmWriter {
   string AsmWriterClassName  = "IntelInstPrinter";
   int Variant = 1;
+  bit isMCAsmWriter = 1;
 }
 
 def X86 : Target {

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86AsmBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86AsmBackend.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86AsmBackend.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86AsmBackend.cpp Tue Oct 26 19:48:03 2010
@@ -11,13 +11,16 @@
 #include "X86.h"
 #include "X86FixupKinds.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/ELFObjectWriter.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFormat.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MachObjectWriter.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -35,6 +38,8 @@
   case X86::reloc_pcrel_4byte:
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case X86::reloc_global_offset_table:
   case FK_Data_4: return 2;
   case FK_Data_8: return 3;
   }
@@ -62,9 +67,9 @@
 
   bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
 };
-} // end anonymous namespace 
+} // end anonymous namespace
 
-static unsigned getRelaxedOpcode(unsigned Op) {
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
   switch (Op) {
   default:
     return Op;
@@ -89,16 +94,101 @@
   }
 }
 
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+    // IMUL
+  case X86::IMUL16rri8: return X86::IMUL16rri;
+  case X86::IMUL16rmi8: return X86::IMUL16rmi;
+  case X86::IMUL32rri8: return X86::IMUL32rri;
+  case X86::IMUL32rmi8: return X86::IMUL32rmi;
+  case X86::IMUL64rri8: return X86::IMUL64rri32;
+  case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+    // AND
+  case X86::AND16ri8: return X86::AND16ri;
+  case X86::AND16mi8: return X86::AND16mi;
+  case X86::AND32ri8: return X86::AND32ri;
+  case X86::AND32mi8: return X86::AND32mi;
+  case X86::AND64ri8: return X86::AND64ri32;
+  case X86::AND64mi8: return X86::AND64mi32;
+
+    // OR
+  case X86::OR16ri8: return X86::OR16ri;
+  case X86::OR16mi8: return X86::OR16mi;
+  case X86::OR32ri8: return X86::OR32ri;
+  case X86::OR32mi8: return X86::OR32mi;
+  case X86::OR64ri8: return X86::OR64ri32;
+  case X86::OR64mi8: return X86::OR64mi32;
+
+    // XOR
+  case X86::XOR16ri8: return X86::XOR16ri;
+  case X86::XOR16mi8: return X86::XOR16mi;
+  case X86::XOR32ri8: return X86::XOR32ri;
+  case X86::XOR32mi8: return X86::XOR32mi;
+  case X86::XOR64ri8: return X86::XOR64ri32;
+  case X86::XOR64mi8: return X86::XOR64mi32;
+
+    // ADD
+  case X86::ADD16ri8: return X86::ADD16ri;
+  case X86::ADD16mi8: return X86::ADD16mi;
+  case X86::ADD32ri8: return X86::ADD32ri;
+  case X86::ADD32mi8: return X86::ADD32mi;
+  case X86::ADD64ri8: return X86::ADD64ri32;
+  case X86::ADD64mi8: return X86::ADD64mi32;
+
+    // SUB
+  case X86::SUB16ri8: return X86::SUB16ri;
+  case X86::SUB16mi8: return X86::SUB16mi;
+  case X86::SUB32ri8: return X86::SUB32ri;
+  case X86::SUB32mi8: return X86::SUB32mi;
+  case X86::SUB64ri8: return X86::SUB64ri32;
+  case X86::SUB64mi8: return X86::SUB64mi32;
+
+    // CMP
+  case X86::CMP16ri8: return X86::CMP16ri;
+  case X86::CMP16mi8: return X86::CMP16mi;
+  case X86::CMP32ri8: return X86::CMP32ri;
+  case X86::CMP32mi8: return X86::CMP32mi;
+  case X86::CMP64ri8: return X86::CMP64ri32;
+  case X86::CMP64mi8: return X86::CMP64mi32;
+  }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+  unsigned R = getRelaxedOpcodeArith(Op);
+  if (R != Op)
+    return R;
+  return getRelaxedOpcodeBranch(Op);
+}
+
 bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // Branches can always be relaxed.
+  if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
+
   // Check if this instruction is ever relaxable.
-  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+  if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
     return false;
 
-  // If so, just assume it can be relaxed. Once we support relaxing more complex
-  // instructions we should check that the instruction actually has symbolic
-  // operands before doing this, but we need to be careful about things like
-  // PCrel.
-  return true;
+
+  // Check if it has an expression and is not RIP relative.
+  bool hasExp = false;
+  bool hasRIP = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+    const MCOperand &Op = Inst.getOperand(i);
+    if (Op.isExpr())
+      hasExp = true;
+
+    if (Op.isReg() && Op.getReg() == X86::RIP)
+      hasRIP = true;
+  }
+
+  // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+  // how we do relaxations?
+  return hasExp && !hasRIP;
 }
 
 // FIXME: Can tblgen help at all here to verify there aren't other instructions
@@ -184,44 +274,89 @@
 
 namespace {
 class ELFX86AsmBackend : public X86AsmBackend {
+  MCELFObjectFormat Format;
+
 public:
-  ELFX86AsmBackend(const Target &T)
-    : X86AsmBackend(T) {
-    HasAbsolutizedSet = true;
+  Triple::OSType OSType;
+  ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+    : X86AsmBackend(T), OSType(_OSType) {
     HasScatteredSymbols = true;
+    HasReliableSymbolDifference = true;
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return 0;
+  virtual const MCObjectFormat &getObjectFormat() const {
+    return Format;
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+    return ES.getFlags() & MCSectionELF::SHF_MERGE;
   }
 
   bool isVirtualSection(const MCSection &Section) const {
     const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
-    return SE.getType() == MCSectionELF::SHT_NOBITS;;
+    return SE.getType() == MCSectionELF::SHT_NOBITS;
   }
 };
 
 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_32AsmBackend(const Target &T)
-    : ELFX86AsmBackend(T) {}
+  ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
+
+  unsigned getPointerSize() const {
+    return 4;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return new ELFObjectWriter(OS, /*Is64Bit=*/false,
+                               OSType, ELF::EM_386,
+                               /*IsLittleEndian=*/true,
+                               /*HasRelocationAddend=*/false);
+  }
 };
 
 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_64AsmBackend(const Target &T)
-    : ELFX86AsmBackend(T) {}
+  ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
+
+  unsigned getPointerSize() const {
+    return 8;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return new ELFObjectWriter(OS, /*Is64Bit=*/true,
+                               OSType, ELF::EM_X86_64,
+                               /*IsLittleEndian=*/true,
+                               /*HasRelocationAddend=*/true);
+  }
 };
 
 class WindowsX86AsmBackend : public X86AsmBackend {
+  bool Is64Bit;
+  MCCOFFObjectFormat Format;
+
 public:
-  WindowsX86AsmBackend(const Target &T)
-    : X86AsmBackend(T) {
+  WindowsX86AsmBackend(const Target &T, bool is64Bit)
+    : X86AsmBackend(T)
+    , Is64Bit(is64Bit) {
     HasScatteredSymbols = true;
   }
 
+  virtual const MCObjectFormat &getObjectFormat() const {
+    return Format;
+  }
+
+  unsigned getPointerSize() const {
+    if (Is64Bit)
+      return 8;
+    else
+      return 4;
+  }
+
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createWinCOFFObjectWriter (OS);
+    return createWinCOFFObjectWriter(OS, Is64Bit);
   }
 
   bool isVirtualSection(const MCSection &Section) const {
@@ -231,13 +366,18 @@
 };
 
 class DarwinX86AsmBackend : public X86AsmBackend {
+  MCMachOObjectFormat Format;
+
 public:
   DarwinX86AsmBackend(const Target &T)
     : X86AsmBackend(T) {
-    HasAbsolutizedSet = true;
     HasScatteredSymbols = true;
   }
 
+  virtual const MCObjectFormat &getObjectFormat() const {
+    return Format;
+  }
+
   bool isVirtualSection(const MCSection &Section) const {
     const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
     return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
@@ -251,6 +391,10 @@
   DarwinX86_32AsmBackend(const Target &T)
     : DarwinX86AsmBackend(T) {}
 
+  unsigned getPointerSize() const {
+    return 4;
+  }
+
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
     return new MachObjectWriter(OS, /*Is64Bit=*/false);
   }
@@ -263,6 +407,10 @@
     HasReliableSymbolDifference = true;
   }
 
+  unsigned getPointerSize() const {
+    return 8;
+  }
+
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
     return new MachObjectWriter(OS, /*Is64Bit=*/true);
   }
@@ -301,17 +449,19 @@
   }
 };
 
-} // end anonymous namespace 
+} // end anonymous namespace
 
 TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
                                                const std::string &TT) {
   switch (Triple(TT).getOS()) {
   case Triple::Darwin:
     return new DarwinX86_32AsmBackend(T);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
   case Triple::Win32:
-    return new WindowsX86AsmBackend(T);
+    return new WindowsX86AsmBackend(T, false);
   default:
-    return new ELFX86_32AsmBackend(T);
+    return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
   }
 }
 
@@ -320,7 +470,11 @@
   switch (Triple(TT).getOS()) {
   case Triple::Darwin:
     return new DarwinX86_64AsmBackend(T);
+  case Triple::MinGW64:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    return new WindowsX86AsmBackend(T, true);
   default:
-    return new ELFX86_64AsmBackend(T);
+    return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
   }
 }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86AsmPrinter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86AsmPrinter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86AsmPrinter.cpp Tue Oct 26 19:48:03 2010
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "AsmPrinter/X86ATTInstPrinter.h"
-#include "AsmPrinter/X86IntelInstPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetOptions.h"
@@ -61,7 +62,7 @@
   if (Subtarget->isTargetCOFF()) {
     bool Intrn = MF.getFunction()->hasInternalLinkage();
     OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC 
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
                                               : COFF::IMAGE_SYM_CLASS_EXTERNAL);
     OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
                                                << COFF::SCT_COMPLEX_TYPE_SHIFT);
@@ -94,7 +95,7 @@
     break;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
-    
+
     MCSymbol *GVSym;
     if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
       GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
@@ -108,11 +109,11 @@
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
       GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
-    
+
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      MachineModuleInfoImpl::StubValueTy &StubSym = 
+      MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
       if (StubSym.getPointer() == 0)
         StubSym = MachineModuleInfoImpl::
@@ -132,7 +133,7 @@
         StubSym = MachineModuleInfoImpl::
           StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
     }
-    
+
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
     // to avoid having it look like an integer immediate to the assembler.
     if (GVSym->getName()[0] != '$')
@@ -148,7 +149,7 @@
       SmallString<128> TempNameStr;
       TempNameStr += StringRef(MO.getSymbolName());
       TempNameStr += StringRef("$stub");
-      
+
       MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
       MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
@@ -162,17 +163,17 @@
     } else {
       SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
     }
-    
+
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
     // to avoid having it look like an integer immediate to the assembler.
-    if (SymToPrint->getName()[0] != '$') 
+    if (SymToPrint->getName()[0] != '$')
       O << *SymToPrint;
     else
       O << '(' << *SymToPrint << '(';
     break;
   }
   }
-  
+
   switch (MO.getTargetFlags()) {
   default:
     llvm_unreachable("Unknown target flag on GV operand");
@@ -187,7 +188,7 @@
     O << " + [.-";
     PrintPICBaseSymbol(O);
     O << ']';
-    break;      
+    break;
   case X86II::MO_PIC_BASE_OFFSET:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
@@ -261,7 +262,7 @@
 
   case MachineOperand::MO_JumpTableIndex:
   case MachineOperand::MO_ConstantPoolIndex:
-  case MachineOperand::MO_GlobalAddress: 
+  case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol: {
     O << '$';
     printSymbolOperand(MO, O);
@@ -297,10 +298,10 @@
   if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
       BaseReg.getReg() == X86::RIP)
     HasBaseReg = false;
-  
+
   // HasParenPart - True if we will print out the () part of the mem ref.
   bool HasParenPart = IndexReg.getReg() || HasBaseReg;
-  
+
   if (DispSpec.isImm()) {
     int DispVal = DispSpec.getImm();
     if (DispVal || !HasParenPart)
@@ -385,14 +386,14 @@
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
 
     const MachineOperand &MO = MI->getOperand(OpNo);
-    
+
     switch (ExtraCode[0]) {
     default: return true;  // Unknown modifier.
     case 'a': // This is an address.  Currently only 'i' and 'r' are expected.
       if (MO.isImm()) {
         O << MO.getImm();
         return false;
-      } 
+      }
       if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
         printSymbolOperand(MO, O);
         if (Subtarget->isPICStyleRIPRel())
@@ -489,13 +490,13 @@
     // All darwin targets use mach-o.
     MachineModuleInfoMachO &MMIMacho =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
-    
+
     // Output stubs for dynamically-linked functions.
     MachineModuleInfoMachO::SymbolListTy Stubs;
 
     Stubs = MMIMacho.GetFnStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection = 
+      const MCSection *TheSection =
         OutContext.getMachOSection("__IMPORT", "__jump_table",
                                    MCSectionMachO::S_SYMBOL_STUBS |
                                    MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
@@ -513,7 +514,7 @@
         const char HltInsts[] = { -12, -12, -12, -12, -12 };
         OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
       }
-      
+
       Stubs.clear();
       OutStreamer.AddBlankLine();
     }
@@ -521,7 +522,7 @@
     // Output stubs for external and common global variables.
     Stubs = MMIMacho.GetGVStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection = 
+      const MCSection *TheSection =
         OutContext.getMachOSection("__IMPORT", "__pointers",
                                    MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
                                    SectionKind::getMetadata());
@@ -579,6 +580,14 @@
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
 
+  if (Subtarget->isTargetWindows()
+   && !Subtarget->isTargetCygMing()
+   && MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+    StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+    MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+    OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+  }
+
   if (Subtarget->isTargetCOFF()) {
     X86COFFMachineModuleInfo &COFFMMI =
       MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
@@ -660,14 +669,17 @@
   }
 }
 
-MachineLocation 
+MachineLocation
 X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
   MachineLocation Location;
   assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
   // Frame address.  Currently handles register +- offset only.
-  
+
   if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
     Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+  else {
+    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+  }
   return Location;
 }
 
@@ -686,9 +698,9 @@
   O << V.getName();
   O << " <- ";
   // Frame address.  Currently handles register +- offset only.
-  O << '['; 
+  O << '[';
   if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
-    printOperand(MI, 0, O); 
+    printOperand(MI, 0, O);
   else
     O << "undef";
   O << '+'; printOperand(MI, 3, O);
@@ -714,10 +726,10 @@
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() { 
+extern "C" void LLVMInitializeX86AsmPrinter() {
   RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
   RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-  
+
   TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
   TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
 }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86CallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86CallingConv.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86CallingConv.td Tue Oct 26 19:48:03 2010
@@ -33,16 +33,22 @@
   CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
   CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
   CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
-  
-  // Vector types are returned in XMM0 and XMM1, when they fit.  XMMM2 and XMM3
+
+  // Vector types are returned in XMM0 and XMM1, when they fit.  XMM2 and XMM3
   // can only be used by ABI non-compliant code. If the target doesn't have XMM
   // registers, it won't have vector types.
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
 
+  // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+  // can only be used by ABI non-compliant code. This vector type is only
+  // supported while using the AVX target feature.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+
   // MMX vector types are always returned in MM0. If the target doesn't have
   // MM0, it doesn't support these vector types.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
+  CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
 
   // Long double types are always returned in ST0 (even with SSE).
   CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -89,14 +95,14 @@
   // returned in RAX. This disagrees with ABI documentation but is bug
   // compatible with gcc.
   CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
-  CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>,
+  CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
   CCDelegateTo<RetCC_X86Common>
 ]>;
 
 // X86-Win64 C return-value convention.
 def RetCC_X86_Win64_C : CallingConv<[
   // The X86-Win64 calling convention always returns __m64 values in RAX.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
+  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
 
   // And FP in XMM0 only.
   CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -155,7 +161,7 @@
 
   // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
   // registers on Darwin.
-  CCIfType<[v8i8, v4i16, v2i32],
+  CCIfType<[x86mmx],
             CCIfSubtarget<"isTargetDarwin()",
             CCIfSubtarget<"hasSSE2()",
             CCPromoteToType<v2i64>>>>,
@@ -164,11 +170,16 @@
   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCIfSubtarget<"hasSSE1()",
             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
- 
+
+  // The first 8 256-bit vector arguments are passed in YMM registers.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCIfSubtarget<"hasAVX()",
+            CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
   CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-  
+
   // Long doubles get stack slots whose size and alignment depends on the
   // subtarget.
   CCIfType<[f80], CCAssignToStack<0, 0>>,
@@ -176,8 +187,12 @@
   // Vectors get 16-byte stack slots that are 16-byte aligned.
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
 
+  // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>,
+
   // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
 ]>;
 
 // Calling convention used on Win64
@@ -195,8 +210,7 @@
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
 
   // The first 4 MMX vector arguments are passed in GPRs.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64],
-           CCBitConvertToType<i64>>,
+  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
 
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
@@ -218,7 +232,7 @@
   CCIfType<[f80], CCAssignToStack<0, 0>>,
 
   // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
 ]>;
 
 def CC_X86_64_GHC : CallingConv<[
@@ -254,7 +268,7 @@
 
   // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
   // registers if the call is not a vararg call.
-  CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32],
+  CCIfNotVarArg<CCIfType<[x86mmx],
                 CCAssignToReg<[MM0, MM1, MM2]>>>,
 
   // Integer/Float values get stored in stack slots that are 4 bytes in
@@ -271,12 +285,21 @@
   CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
                 CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
 
+  // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                CCIfSubtarget<"hasAVX()",
+                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
   // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
 
+  // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>,
+
   // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
   // passed in the parameter area.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
 
 def CC_X86_32_C : CallingConv<[
   // Promote i8/i16 arguments to i32.

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86CodeEmitter.cpp Tue Oct 26 19:48:03 2010
@@ -53,12 +53,12 @@
   public:
     static char ID;
     explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
-      : MachineFunctionPass(&ID), II(0), TD(0), TM(tm), 
+      : MachineFunctionPass(ID), II(0), TD(0), TM(tm), 
       MCE(mce), PICBaseOffset(0), Is64BitMode(false),
       IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
     Emitter(X86TargetMachine &tm, CodeEmitter &mce,
             const X86InstrInfo &ii, const TargetData &td, bool is64)
-      : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm), 
+      : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), 
       MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
       IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
 
@@ -68,8 +68,7 @@
       return "X86 Machine Code Emitter";
     }
 
-    void emitInstruction(const MachineInstr &MI,
-                         const TargetInstrDesc *Desc);
+    void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -131,7 +130,7 @@
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
          MBB != E; ++MBB) {
       MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I) {
         const TargetInstrDesc &Desc = I->getDesc();
         emitInstruction(*I, &Desc);
@@ -598,9 +597,23 @@
 }
 
 template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
                                            const TargetInstrDesc *Desc) {
   DEBUG(dbgs() << MI);
+  
+  // If this is a pseudo instruction, lower it.
+  switch (Desc->getOpcode()) {
+  case X86::ADD16rr_DB:   Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+  case X86::ADD32rr_DB:   Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+  case X86::ADD64rr_DB:   Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+  case X86::ADD16ri_DB:   Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+  case X86::ADD32ri_DB:   Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+  case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+  case X86::ADD16ri8_DB:  Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+  case X86::ADD32ri8_DB:  Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+  case X86::ADD64ri8_DB:  Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+  }
+  
 
   MCE.processDebugLoc(MI.getDebugLoc(), true);
 
@@ -702,9 +715,15 @@
     // base address.
     switch (Opcode) {
     default: 
-      llvm_unreachable("psuedo instructions should be removed before code"
+      llvm_unreachable("pseudo instructions should be removed before code"
                        " emission");
       break;
+    // Do nothing for Int_MemBarrier - it's just a comment.  Add a debug
+    // to make it slightly easier to see.
+    case X86::Int_MemBarrier:
+      DEBUG(dbgs() << "#MEMBARRIER\n");
+      break;
+    
     case TargetOpcode::INLINEASM:
       // We allow inline assembler nodes with empty bodies - they can
       // implicitly define registers, which is ok for JIT.
@@ -716,7 +735,7 @@
     case TargetOpcode::EH_LABEL:
       MCE.emitLabel(MI.getOperand(0).getMCSymbol());
       break;
-        
+    
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
       break;
@@ -770,7 +789,8 @@
     }
     
     assert(MO.isImm() && "Unknown RawFrm operand!");
-    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+        Opcode == X86::WINCALL64pcrel32) {
       // Fix up immediate operand for pc relative calls.
       intptr_t Imm = (intptr_t)MO.getImm();
       Imm = Imm - MCE.getCurrentPCValue() - 4;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -24,8 +24,8 @@
 //  Implementation of the X86ELFWriterInfo class
 //===----------------------------------------------------------------------===//
 
-X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
-  : TargetELFWriterInfo(TM) {
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+  : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
     EMachine = is64Bit ? EM_X86_64 : EM_386;
   }
 

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ELFWriterInfo.h Tue Oct 26 19:48:03 2010
@@ -38,7 +38,7 @@
     };
 
   public:
-    X86ELFWriterInfo(TargetMachine &TM);
+    X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
     virtual ~X86ELFWriterInfo();
 
     /// getRelocationType - Returns the target specific ELF Relocation type.

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FastISel.cpp Tue Oct 26 19:48:03 2010
@@ -63,6 +63,13 @@
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+  
 #include "X86GenFastISel.inc"
 
 private:
@@ -960,9 +967,11 @@
   MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 
-  // Fold the common case of a conditional branch with a comparison.
+  // Fold the common case of a conditional branch with a comparison
+  // in the same block (values defined on other blocks may not have
+  // initialized registers).
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    if (CI->hasOneUse()) {
+    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
 
       // Try to take advantage of fallthrough opportunities.
@@ -1185,6 +1194,9 @@
   if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
     return false;
   
+  // We only use cmov here, if we don't have a cmov instruction bail.
+  if (!Subtarget->hasCMov()) return false;
+  
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
   if (VT.getSimpleVT() == MVT::i16) {
@@ -1590,6 +1602,9 @@
       break;
     }
     case CCValAssign::AExt: {
+      // We don't handle MMX parameters yet.
+      if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
+        return false;
       bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
                                        Arg, ArgVT, Arg);
       if (!Emitted)
@@ -1939,6 +1954,34 @@
   return ResultReg;
 }
 
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+  
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+  
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+  
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+  
+  MI->getParent()->insert(MI, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
 namespace llvm {
   llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
     return new X86FastISel(funcInfo);

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FixupKinds.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FixupKinds.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FixupKinds.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FixupKinds.h Tue Oct 26 19:48:03 2010
@@ -19,7 +19,13 @@
   reloc_pcrel_1byte,                         // 8-bit pcrel, e.g. branch_1
   reloc_pcrel_2byte,                         // 16-bit pcrel, e.g. callw
   reloc_riprel_4byte,                        // 32-bit rip-relative
-  reloc_riprel_4byte_movq_load               // 32-bit rip-relative in movq
+  reloc_riprel_4byte_movq_load,              // 32-bit rip-relative in movq
+  reloc_signed_4byte,                        // 32-bit signed. Unlike FK_Data_4
+                                             // this will be sign extended at
+                                             // runtime.
+  reloc_global_offset_table                  // 32-bit, relative to the start
+                                             // of the instruction. Used only
+                                             // for _GLOBAL_OFFSET_TABLE_.
 };
 }
 }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86FloatingPoint.cpp Tue Oct 26 19:48:03 2010
@@ -50,7 +50,7 @@
 namespace {
   struct FPS : public MachineFunctionPass {
     static char ID;
-    FPS() : MachineFunctionPass(&ID) {
+    FPS() : MachineFunctionPass(ID) {
       // This is really only to keep valgrind quiet.
       // The logic in isLive() is too much for it.
       memset(Stack, 0, sizeof(Stack));
@@ -144,11 +144,6 @@
       dbgs() << "\n";
     }
 
-    /// isStackEmpty - Return true if the FP stack is empty.
-    bool isStackEmpty() const {
-      return StackTop == 0;
-    }
-
     /// getSlot - Return the stack slot number a particular register number is
     /// in.
     unsigned getSlot(unsigned RegNo) const {
@@ -172,7 +167,8 @@
 
     /// getStackEntry - Return the X86::FP<n> register in register ST(i).
     unsigned getStackEntry(unsigned STi) const {
-      assert(STi < StackTop && "Access past stack top!");
+      if (STi >= StackTop)
+        report_fatal_error("Access past stack top!");
       return Stack[StackTop-1-STi];
     }
 
@@ -185,7 +181,8 @@
     // pushReg - Push the specified FP<n> register onto the stack.
     void pushReg(unsigned Reg) {
       assert(Reg < 8 && "Register number out of range!");
-      assert(StackTop < 8 && "Stack overflow!");
+      if (StackTop >= 8)
+        report_fatal_error("Stack overflow!");
       Stack[StackTop] = Reg;
       RegMap[Reg] = StackTop++;
     }
@@ -202,7 +199,8 @@
       std::swap(RegMap[RegNo], RegMap[RegOnTop]);
 
       // Swap stack slot contents.
-      assert(RegMap[RegOnTop] < StackTop);
+      if (RegMap[RegOnTop] >= StackTop)
+        report_fatal_error("Access past stack top!");
       std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
 
       // Emit an fxch to update the runtime processors version of the state.
@@ -577,7 +575,8 @@
     friend bool operator<(const TableEntry &TE, unsigned V) {
       return TE.from < V;
     }
-    friend bool operator<(unsigned V, const TableEntry &TE) {
+    friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+                                              const TableEntry &TE) {
       return V < TE.from;
     }
   };
@@ -829,7 +828,8 @@
   MachineInstr* MI = I;
   DebugLoc dl = MI->getDebugLoc();
   ASSERT_SORTED(PopTable);
-  assert(StackTop > 0 && "Cannot pop empty stack!");
+  if (StackTop == 0)
+    report_fatal_error("Cannot pop empty stack!");
   RegMap[Stack[--StackTop]] = ~0;     // Update state
 
   // Check to see if there is a popping version of this instruction...
@@ -1021,7 +1021,8 @@
       MI->getOpcode() == X86::ISTT_FP32m ||
       MI->getOpcode() == X86::ISTT_FP64m ||
       MI->getOpcode() == X86::ST_FP80m) {
-    assert(StackTop > 0 && "Stack empty??");
+    if (StackTop == 0)
+      report_fatal_error("Stack empty??");
     --StackTop;
   } else if (KillsSrc) { // Last use of operand?
     popStackAfter(I);
@@ -1052,7 +1053,8 @@
     // If this is the last use of the source register, just make sure it's on
     // the top of the stack.
     moveToTop(Reg, I);
-    assert(StackTop > 0 && "Stack cannot be empty!");
+    if (StackTop == 0)
+      report_fatal_error("Stack cannot be empty!");
     --StackTop;
     pushReg(getFPReg(MI->getOperand(0)));
   } else {
@@ -1305,7 +1307,6 @@
 ///
 void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   MachineInstr *MI = I;
-  DebugLoc dl = MI->getDebugLoc();
   switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown SpecialFP instruction!");
   case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
@@ -1346,7 +1347,8 @@
     std::swap(RegMap[RegNo], RegMap[RegOnTop]);
     
     // Swap stack slot contents.
-    assert(RegMap[RegOnTop] < StackTop);
+    if (RegMap[RegOnTop] >= StackTop)
+      report_fatal_error("Access past stack top!");
     std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
     break;
   }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -171,6 +171,17 @@
 
     virtual void PreprocessISelDAG();
 
+    inline bool immSext8(SDNode *N) const {
+      return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+    }
+
+    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+    // sign extended field.
+    inline bool i64immSExt32(SDNode *N) const {
+      uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+      return (int64_t)v == (int32_t)v;
+    }
+
 // Include the pieces autogenerated from the target description.
 #include "X86GenDAGISel.inc"
 
@@ -179,20 +190,19 @@
     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
     SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
 
-    bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
-    bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
-    bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
                     SDValue &Segment);
-    bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
+    bool SelectLEAAddr(SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp,
                        SDValue &Segment);
-    bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
                            SDValue &Scale, SDValue &Index, SDValue &Disp,
                            SDValue &Segment);
     bool SelectScalarSSELoad(SDNode *Root, SDValue N,
@@ -253,12 +263,6 @@
       return CurDAG->getTargetConstant(Imm, MVT::i8);
     }
 
-    /// getI16Imm - Return a target constant with the specified value, of type
-    /// i16.
-    inline SDValue getI16Imm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i16);
-    }
-
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
     inline SDValue getI32Imm(unsigned Imm) {
@@ -500,10 +504,11 @@
     // FIXME: optimize the case where the src/dest is a load or store?
     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
                                           N->getOperand(0),
-                                          MemTmp, NULL, 0, MemVT,
+                                          MemTmp, MachinePointerInfo(), MemVT,
                                           false, false, 0);
     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
-                                        NULL, 0, MemVT, false, false, 0);
+                                        MachinePointerInfo(),
+                                        MemVT, false, false, 0);
 
     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
     // extload we created.  This will cause general havok on the dag because
@@ -538,29 +543,27 @@
 }
 
 
-bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
-                                              X86ISelAddressMode &AM) {
-  assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
-  SDValue Segment = N.getOperand(0);
-
-  if (AM.Segment.getNode() == 0) {
-    AM.Segment = Segment;
-    return false;
-  }
-
-  return true;
-}
-
-bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+  SDValue Address = N->getOperand(1);
+  
+  // load gs:0 -> GS segment register.
+  // load fs:0 -> FS segment register.
+  //
   // This optimization is valid because the GNU TLS model defines that
   // gs:0 (or fs:0 on X86-64) contains its own address.
   // For more information see http://people.redhat.com/drepper/tls.pdf
-
-  SDValue Address = N.getOperand(1);
-  if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
-      !MatchSegmentBaseAddress (Address, AM))
-    return false;
-
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+    if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+        Subtarget->isTargetELF())
+      switch (N->getPointerInfo().getAddrSpace()) {
+      case 256:
+        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+        return false;
+      case 257:
+        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+        return false;
+      }
+  
   return true;
 }
 
@@ -745,11 +748,6 @@
     break;
   }
 
-  case X86ISD::SegmentBaseAddress:
-    if (!MatchSegmentBaseAddress(N, AM))
-      return false;
-    break;
-
   case X86ISD::Wrapper:
   case X86ISD::WrapperRIP:
     if (!MatchWrapper(N, AM))
@@ -757,7 +755,7 @@
     break;
 
   case ISD::LOAD:
-    if (!MatchLoad(N, AM))
+    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
       return false;
     break;
 
@@ -1137,10 +1135,30 @@
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+///
+/// Parent is the parent node of the addr operand that is being matched.  It
+/// is always a load, store, atomic node, or null.  It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
+  
+  if (Parent &&
+      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+      // that are not a MemSDNode, and thus don't have proper addrspace info.
+      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+      Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+    unsigned AddrSpace =
+      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+    // AddrSpace 256 -> GS, 257 -> FS.
+    if (AddrSpace == 256)
+      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+    if (AddrSpace == 257)
+      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+  }
+  
   if (MatchAddress(N, AM))
     return false;
 
@@ -1176,7 +1194,7 @@
         IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
         IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
-      if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
+      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
         return false;
       return true;
     }
@@ -1194,7 +1212,7 @@
       IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
     // Okay, this is a zero extending load.  Fold it.
     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
-    if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
       return false;
     PatternNodeWithChain = SDValue(LD, 0);
     return true;
@@ -1205,7 +1223,7 @@
 
 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
                                     SDValue &Base, SDValue &Scale,
                                     SDValue &Index, SDValue &Disp,
                                     SDValue &Segment) {
@@ -1267,7 +1285,7 @@
 }
 
 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp, SDValue &Segment) {
   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1300,7 +1318,8 @@
       !IsLegalToFold(N, P, P, OptLevel))
     return false;
   
-  return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
+  return SelectAddr(N.getNode(),
+                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
 /// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1312,20 +1331,13 @@
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-static SDNode *FindCallStartFromCall(SDNode *Node) {
-  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-    assert(Node->getOperand(0).getValueType() == MVT::Other &&
-         "Node doesn't have a token chain argument!");
-  return FindCallStartFromCall(Node->getOperand(0).getNode());
-}
-
 SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   SDValue Chain = Node->getOperand(0);
   SDValue In1 = Node->getOperand(1);
   SDValue In2L = Node->getOperand(2);
   SDValue In2H = Node->getOperand(3);
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+  if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return NULL;
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1351,7 +1363,7 @@
   SDValue Ptr = Node->getOperand(1);
   SDValue Val = Node->getOperand(2);
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return 0;
 
   bool isInc = false, isDec = false, isSub = false, isCN = false;
@@ -1403,7 +1415,7 @@
       Opc = X86::LOCK_DEC16m;
     else if (isSub) {
       if (isCN) {
-        if (Predicate_immSext8(Val.getNode()))
+        if (immSext8(Val.getNode()))
           Opc = X86::LOCK_SUB16mi8;
         else
           Opc = X86::LOCK_SUB16mi;
@@ -1411,7 +1423,7 @@
         Opc = X86::LOCK_SUB16mr;
     } else {
       if (isCN) {
-        if (Predicate_immSext8(Val.getNode()))
+        if (immSext8(Val.getNode()))
           Opc = X86::LOCK_ADD16mi8;
         else
           Opc = X86::LOCK_ADD16mi;
@@ -1426,7 +1438,7 @@
       Opc = X86::LOCK_DEC32m;
     else if (isSub) {
       if (isCN) {
-        if (Predicate_immSext8(Val.getNode()))
+        if (immSext8(Val.getNode()))
           Opc = X86::LOCK_SUB32mi8;
         else
           Opc = X86::LOCK_SUB32mi;
@@ -1434,7 +1446,7 @@
         Opc = X86::LOCK_SUB32mr;
     } else {
       if (isCN) {
-        if (Predicate_immSext8(Val.getNode()))
+        if (immSext8(Val.getNode()))
           Opc = X86::LOCK_ADD32mi8;
         else
           Opc = X86::LOCK_ADD32mi;
@@ -1450,17 +1462,17 @@
     else if (isSub) {
       Opc = X86::LOCK_SUB64mr;
       if (isCN) {
-        if (Predicate_immSext8(Val.getNode()))
+        if (immSext8(Val.getNode()))
           Opc = X86::LOCK_SUB64mi8;
-        else if (Predicate_i64immSExt32(Val.getNode()))
+        else if (i64immSExt32(Val.getNode()))
           Opc = X86::LOCK_SUB64mi32;
       }
     } else {
       Opc = X86::LOCK_ADD64mr;
       if (isCN) {
-        if (Predicate_immSext8(Val.getNode()))
+        if (immSext8(Val.getNode()))
           Opc = X86::LOCK_ADD64mi8;
-        else if (Predicate_i64immSExt32(Val.getNode()))
+        else if (i64immSExt32(Val.getNode()))
           Opc = X86::LOCK_ADD64mi32;
       }
     }
@@ -1841,7 +1853,8 @@
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
     // use a smaller encoding.
-    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+        HasNoSignedComparisonUses(Node))
       // Look past the truncate if CMP is the only use of it.
       N0 = N0.getOperand(0);
     if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
@@ -1966,7 +1979,7 @@
   case 'v':   // not offsetable    ??
   default: return true;
   case 'm':   // memory
-    if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
+    if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
       return true;
     break;
   }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -16,6 +16,7 @@
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
+#include "X86ShuffleDecode.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "llvm/CallingConv.h"
@@ -62,9 +63,9 @@
                        SDValue V2);
 
 static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
-  
+
   bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-  
+
   if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
     if (is64Bit) return new X8664_MachoTargetObjectFile();
     return new TargetLoweringObjectFileMachO();
@@ -73,7 +74,7 @@
     return new X8632_ELFTargetObjectFile(TM);
   } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
     return new TargetLoweringObjectFileCOFF();
-  }  
+  }
   llvm_unreachable("unknown subtarget type");
 }
 
@@ -95,6 +96,18 @@
   setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(X86StackPtr);
 
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+    // Setup Windows compiler runtime calls.
+    setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+    setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::X86_StdCall);
+  }
+
   if (Subtarget->isTargetDarwin()) {
     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
     setUseUnderscoreSetJmp(false);
@@ -212,16 +225,13 @@
   }
 
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
-  if (!X86ScalarSSEf64) { 
+  if (!X86ScalarSSEf64) {
     setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
     setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
     if (Subtarget->is64Bit()) {
       setOperationAction(ISD::BIT_CONVERT    , MVT::f64  , Expand);
-      // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
-      if (Subtarget->hasMMX() && !DisableMMX)
-        setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Custom);
-      else 
-        setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Expand);
+      // Without SSE, i64->f64 goes through memory.
+      setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Expand);
     }
   }
 
@@ -345,7 +355,7 @@
 
   // We may not have a libcall for MEMBARRIER so we should lower this.
   setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
-  
+
   // On X86 and X86-64, atomic operations are lowered to locked instructions.
   // Locked instructions, in turn, have implicit fence semantics (all memory
   // operations are flushed before issuing the locked instruction, and they
@@ -414,7 +424,7 @@
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
   if (Subtarget->is64Bit())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-  if (Subtarget->isTargetCygMing())
+  if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   else
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
@@ -613,89 +623,42 @@
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
   if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
-    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass, false);
-    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
-    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
-    
-    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
-
-    setOperationAction(ISD::ADD,                MVT::v8i8,  Legal);
-    setOperationAction(ISD::ADD,                MVT::v4i16, Legal);
-    setOperationAction(ISD::ADD,                MVT::v2i32, Legal);
-    setOperationAction(ISD::ADD,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::SUB,                MVT::v8i8,  Legal);
-    setOperationAction(ISD::SUB,                MVT::v4i16, Legal);
-    setOperationAction(ISD::SUB,                MVT::v2i32, Legal);
-    setOperationAction(ISD::SUB,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::MULHS,              MVT::v4i16, Legal);
-    setOperationAction(ISD::MUL,                MVT::v4i16, Legal);
-
-    setOperationAction(ISD::AND,                MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::AND,                MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::AND,                MVT::v4i16, Promote);
-    AddPromotedToType (ISD::AND,                MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::AND,                MVT::v2i32, Promote);
-    AddPromotedToType (ISD::AND,                MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::AND,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::OR,                 MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::OR,                 MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::OR,                 MVT::v4i16, Promote);
-    AddPromotedToType (ISD::OR,                 MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::OR,                 MVT::v2i32, Promote);
-    AddPromotedToType (ISD::OR,                 MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::OR,                 MVT::v1i64, Legal);
-
-    setOperationAction(ISD::XOR,                MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::XOR,                MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::XOR,                MVT::v4i16, Promote);
-    AddPromotedToType (ISD::XOR,                MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::XOR,                MVT::v2i32, Promote);
-    AddPromotedToType (ISD::XOR,                MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::XOR,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::LOAD,               MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::LOAD,               MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::LOAD,               MVT::v4i16, Promote);
-    AddPromotedToType (ISD::LOAD,               MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::LOAD,               MVT::v2i32, Promote);
-    AddPromotedToType (ISD::LOAD,               MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::LOAD,               MVT::v1i64, Legal);
-
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i8,  Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i16, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i32, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v1i64, Custom);
-
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i8,  Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i16, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i32, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v1i64, Custom);
-
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Custom);
-
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i16, Custom);
-
-    setOperationAction(ISD::SELECT,             MVT::v8i8, Promote);
-    setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
-    setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
-    setOperationAction(ISD::SELECT,             MVT::v1i64, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
-
-    if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v8i8,  Custom);
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v4i16, Custom);
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v2i32, Custom);
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v1i64, Custom);
-    }
+    addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+    // No operations on x86mmx supported, everything uses intrinsics.
   }
 
+  // MMX-sized vectors (other than x86mmx) are expected to be expanded
+  // into smaller operations.
+  setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
+  setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
+  setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::AND,                MVT::v4i16, Expand);
+  setOperationAction(ISD::AND,                MVT::v2i32, Expand);
+  setOperationAction(ISD::AND,                MVT::v1i64, Expand);
+  setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
+  setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
+  setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
+  setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
+  setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
+  setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
+  setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
+  setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
+  setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
+  setOperationAction(ISD::BIT_CONVERT,        MVT::v8i8,  Expand);
+  setOperationAction(ISD::BIT_CONVERT,        MVT::v4i16, Expand);
+  setOperationAction(ISD::BIT_CONVERT,        MVT::v2i32, Expand);
+  setOperationAction(ISD::BIT_CONVERT,        MVT::v1i64, Expand);
+
   if (!UseSoftFloat && Subtarget->hasSSE1()) {
     addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
 
@@ -794,7 +757,7 @@
       // Do not attempt to promote non-128-bit vectors
       if (!VT.is128BitVector())
         continue;
-      
+
       setOperationAction(ISD::AND,    SVT, Promote);
       AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
       setOperationAction(ISD::OR,     SVT, Promote);
@@ -817,10 +780,6 @@
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
-    if (!DisableMMX && Subtarget->hasMMX()) {
-      setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
-      setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
-    }
   }
 
   if (Subtarget->hasSSE41()) {
@@ -871,6 +830,7 @@
     addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
     addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
     addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
 
     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
     setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
@@ -882,7 +842,7 @@
     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
-    //setOperationAction(ISD::BUILD_VECTOR,       MVT::v8f32, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v8f32, Custom);
     //setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8f32, Custom);
     //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
     //setOperationAction(ISD::SELECT,             MVT::v8f32, Custom);
@@ -1137,7 +1097,7 @@
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
       Subtarget->isPICStyleGOT())
     return MachineJumpTableInfo::EK_Custom32;
-  
+
   // Otherwise, use the normal jump table encoding heuristics.
   return TargetLowering::getJumpTableEncoding();
 }
@@ -1205,8 +1165,7 @@
     RRC = (Subtarget->is64Bit()
            ? X86::GR64RegisterClass : X86::GR32RegisterClass);
     break;
-  case MVT::v8i8: case MVT::v4i16:
-  case MVT::v2i32: case MVT::v1i64: 
+  case MVT::x86mmx:
     RRC = X86::VR64RegisterClass;
     break;
   case MVT::f32: case MVT::f64:
@@ -1265,7 +1224,7 @@
 
 #include "X86GenCallingConv.inc"
 
-bool 
+bool
 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                         const SmallVectorImpl<ISD::OutputArg> &Outs,
                         LLVMContext &Context) const {
@@ -1310,18 +1269,19 @@
     SDValue ValToCopy = OutVals[i];
     EVT ValVT = ValToCopy.getValueType();
 
-    // If this is x86-64, and we disabled SSE, we can't return FP values
-    if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
-        (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+    // If this is x86-64, and we disabled SSE, we can't return FP values,
+    // or SSE or MMX vectors.
+    if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+         VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+          (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
     // llvm-gcc has never done it right and no one has noticed, so this
     // should be OK for now.
     if (ValVT == MVT::f64 &&
-        (Subtarget->is64Bit() && !Subtarget->hasSSE2())) {
+        (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
       report_fatal_error("SSE2 register return with SSE2 disabled");
-    }
 
     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
     // the RET instruction and handled by the FP Stackifier.
@@ -1339,11 +1299,16 @@
     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
     // which is returned in RAX / RDX.
     if (Subtarget->is64Bit()) {
-      if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
-        ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
-        if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
+      if (ValVT == MVT::x86mmx) {
+        if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+          ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
                                   ValToCopy);
+          // If we don't have SSE2 available, convert to v4f32 so the generated
+          // register is legal.
+          if (!Subtarget->hasSSE2())
+            ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+        }
       }
     }
 
@@ -1360,7 +1325,7 @@
     MachineFunction &MF = DAG.getMachineFunction();
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
-    assert(Reg && 
+    assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
@@ -1524,10 +1489,11 @@
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                           DebugLoc dl) {
-  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile*/false, /*AlwaysInline=*/true,
-                       NULL, 0, NULL, 0);
+                       MachinePointerInfo(), MachinePointerInfo());
 }
 
 /// IsTailCallConvention - Return true if the calling convention is one that
@@ -1576,7 +1542,7 @@
                                     VA.getLocMemOffset(), isImmutable);
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     return DAG.getLoad(ValVT, dl, Chain, FIN,
-                       PseudoSourceValue::getFixedStack(FI), 0,
+                       MachinePointerInfo::getFixedStack(FI),
                        false, false, 0);
   }
 }
@@ -1633,9 +1599,11 @@
         RC = X86::FR32RegisterClass;
       else if (RegVT == MVT::f64)
         RC = X86::FR64RegisterClass;
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+        RC = X86::VR256RegisterClass;
       else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
         RC = X86::VR128RegisterClass;
-      else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+      else if (RegVT == MVT::x86mmx)
         RC = X86::VR64RegisterClass;
       else
         llvm_unreachable("Unknown argument type!");
@@ -1658,9 +1626,8 @@
       if (VA.isExtInLoc()) {
         // Handle MMX values passed in XMM regs.
         if (RegVT.isVector()) {
-          ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
-                                 ArgValue, DAG.getConstant(0, MVT::i64));
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+                                 ArgValue);
         } else
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
@@ -1671,8 +1638,8 @@
 
     // If value is passed via pointer - do a load.
     if (VA.getLocInfo() == CCValAssign::Indirect)
-      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
-                             false, false, 0);
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+                             MachinePointerInfo(), false, false, 0);
 
     InVals.push_back(ArgValue);
   }
@@ -1699,8 +1666,8 @@
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
-                    CallConv != CallingConv::X86_ThisCall)) {
+    if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+                    CallConv != CallingConv::X86_ThisCall))) {
       FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
     }
     if (Is64Bit) {
@@ -1710,9 +1677,6 @@
       static const unsigned GPR64ArgRegsWin64[] = {
         X86::RCX, X86::RDX, X86::R8,  X86::R9
       };
-      static const unsigned XMMArgRegsWin64[] = {
-        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
-      };
       static const unsigned GPR64ArgRegs64Bit[] = {
         X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
       };
@@ -1720,21 +1684,23 @@
         X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
         X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
       };
-      const unsigned *GPR64ArgRegs, *XMMArgRegs;
+      const unsigned *GPR64ArgRegs;
+      unsigned NumXMMRegs = 0;
 
       if (IsWin64) {
-        TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+        // The XMM registers which might contain var arg parameters are shadowed
+        // in their paired GPR.  So we only need to save the GPR to their home
+        // slots.
+        TotalNumIntRegs = 4;
         GPR64ArgRegs = GPR64ArgRegsWin64;
-        XMMArgRegs = XMMArgRegsWin64;
       } else {
         TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
         GPR64ArgRegs = GPR64ArgRegs64Bit;
-        XMMArgRegs = XMMArgRegs64Bit;
+
+        NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
       }
       unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                        TotalNumIntRegs);
-      unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
-                                                       TotalNumXMMRegs);
 
       bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
       assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
@@ -1746,14 +1712,24 @@
         // on the stack.
         TotalNumXMMRegs = 0;
 
-      // For X86-64, if there are vararg parameters that are passed via
-      // registers, then we must store them to their spots on the stack so they
-      // may be loaded by deferencing the result of va_next.
-      FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
-      FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
-      FuncInfo->setRegSaveFrameIndex(
-        MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+      if (IsWin64) {
+        const TargetFrameInfo &TFI = *getTargetMachine().getFrameInfo();
+        // Get to the caller-allocated home save location.  Add 8 to account
+        // for the return address.
+        int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+        FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+      } else {
+        // For X86-64, if there are vararg parameters that are passed via
+        // registers, then we must store them to their spots on the stack so they
+        // may be loaded by deferencing the result of va_next.
+        FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+        FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
                                false));
+      }
 
       // Store the integer parameter registers.
       SmallVector<SDValue, 8> MemOps;
@@ -1768,9 +1744,9 @@
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                       PseudoSourceValue::getFixedStack(
-                         FuncInfo->getRegSaveFrameIndex()),
-                       Offset, false, false, 0);
+                       MachinePointerInfo::getFixedStack(
+                         FuncInfo->getRegSaveFrameIndex(), Offset),
+                       false, false, 0);
         MemOps.push_back(Store);
         Offset += 8;
       }
@@ -1790,7 +1766,7 @@
                                FuncInfo->getVarArgsFPOffset()));
 
         for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
-          unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
+          unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
                                        X86::VR128RegisterClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
           SaveXMMOps.push_back(Val);
@@ -1838,11 +1814,11 @@
   unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
-  if (Flags.isByVal()) {
+  if (Flags.isByVal())
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-  }
+
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset,
+                      MachinePointerInfo::getStack(LocMemOffset),
                       false, false, 0);
 }
 
@@ -1858,7 +1834,8 @@
   OutRetAddr = getReturnAddressFrameIndex(DAG);
 
   // Load the "old" Return address.
-  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
+  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+                           false, false, 0);
   return SDValue(OutRetAddr.getNode(), 1);
 }
 
@@ -1877,7 +1854,7 @@
   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
-                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+                       MachinePointerInfo::getFixedStack(NewReturnAddrFI),
                        false, false, 0);
   return Chain;
 }
@@ -1991,7 +1968,7 @@
       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
-                           PseudoSourceValue::getFixedStack(FI), 0,
+                           MachinePointerInfo::getFixedStack(FI),
                            false, false, 0);
       Arg = SpillSlot;
       break;
@@ -2000,6 +1977,19 @@
 
     if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      if (isVarArg && Subtarget->isTargetWin64()) {
+        // Win64 ABI requires argument XMM reg to be copied to the corresponding
+        // shadow reg if callee is a varargs function.
+        unsigned ShadowReg = 0;
+        switch (VA.getLocReg()) {
+        case X86::XMM0: ShadowReg = X86::RCX; break;
+        case X86::XMM1: ShadowReg = X86::RDX; break;
+        case X86::XMM2: ShadowReg = X86::R8; break;
+        case X86::XMM3: ShadowReg = X86::R9; break;
+        }
+        if (ShadowReg)
+          RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+      }
     } else if (!IsSibcall && (!isTailCall || isByVal)) {
       assert(VA.isMemLoc());
       if (StackPtr.getNode() == 0)
@@ -2121,7 +2111,7 @@
           // Store relative to framepointer.
           MemOpChains2.push_back(
             DAG.getStore(ArgChain, dl, Arg, FIN,
-                         PseudoSourceValue::getFixedStack(FI), 0,
+                         MachinePointerInfo::getFixedStack(FI),
                          false, false, 0));
         }
       }
@@ -2170,8 +2160,8 @@
           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
         OpFlags = X86II::MO_PLT;
       } else if (Subtarget->isPICStyleStubAny() &&
-               (GV->isDeclaration() || GV->isWeakForLinker()) &&
-               Subtarget->getDarwinVers() < 9) {
+                 (GV->isDeclaration() || GV->isWeakForLinker()) &&
+                 Subtarget->getDarwinVers() < 9) {
         // PC-relative references to external symbols should go through $stub,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
@@ -2190,7 +2180,7 @@
         getTargetMachine().getRelocationModel() == Reloc::PIC_) {
       OpFlags = X86II::MO_PLT;
     } else if (Subtarget->isPICStyleStubAny() &&
-             Subtarget->getDarwinVers() < 9) {
+               Subtarget->getDarwinVers() < 9) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
@@ -2227,8 +2217,8 @@
   if (!isTailCall && Subtarget->isPICStyleGOT())
     Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
 
-  // Add an implicit use of AL for x86 vararg functions.
-  if (Is64Bit && isVarArg)
+  // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+  if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
     Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
 
   if (InFlag.getNode())
@@ -2542,6 +2532,11 @@
     }
   }
 
+  // An stdcall caller is expected to clean up its arguments; the callee
+  // isn't going to do that.   PR 8461.
+  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+    return false;
+
   return true;
 }
 
@@ -2555,6 +2550,116 @@
 //                           Other Lowering Hooks
 //===----------------------------------------------------------------------===//
 
+static bool MayFoldLoad(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+  switch(Opcode) {
+  default: return false;
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+  case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
+  case X86ISD::SHUFPS:
+  case X86ISD::MOVLHPS:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+    return true;
+  }
+  return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                               SDValue V1, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
+    return DAG.getNode(Opc, dl, VT, V1);
+  }
+
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                          SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+    return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+  }
+
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+               SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PALIGN:
+  case X86ISD::SHUFPD:
+  case X86ISD::SHUFPS:
+    return DAG.getNode(Opc, dl, VT, V1, V2,
+                       DAG.getConstant(TargetMask, MVT::i8));
+  }
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                    SDValue V1, SDValue V2, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::MOVLHPS:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+    return DAG.getNode(Opc, dl, VT, V1, V2);
+  }
+  return SDValue();
+}
 
 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -2737,7 +2842,7 @@
 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
 /// the second operand.
 static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+  if (VT == MVT::v4f32 || VT == MVT::v4i32 )
     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
   if (VT == MVT::v2f64 || VT == MVT::v2i64)
     return (Mask[0] < 2 && Mask[1] < 2);
@@ -2805,15 +2910,15 @@
 static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
                           bool hasSSSE3) {
   int i, e = VT.getVectorNumElements();
-  
+
   // Do not handle v2i64 / v2f64 shuffles with palignr.
   if (e < 4 || !hasSSSE3)
     return false;
-  
+
   for (i = 0; i != e; ++i)
     if (Mask[i] >= 0)
       break;
-  
+
   // All undef, not a palignr.
   if (i == e)
     return false;
@@ -2824,13 +2929,13 @@
   bool NeedsUnary = false;
 
   int s = Mask[i] - i;
-  
+
   // Check the rest of the elements to see if they are consecutive.
   for (++i; i != e; ++i) {
     int m = Mask[i];
-    if (m < 0) 
+    if (m < 0)
       continue;
-    
+
     Unary = Unary && (m < (int)e);
     NeedsUnary = NeedsUnary || (m < s);
 
@@ -2918,10 +3023,10 @@
 /// <2, 3, 2, 3>
 bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
   unsigned NumElems = N->getValueType(0).getVectorNumElements();
-  
+
   if (NumElems != 4)
     return false;
-  
+
   return isUndefOrEqual(N->getMaskElt(0), 2) &&
   isUndefOrEqual(N->getMaskElt(1), 3) &&
   isUndefOrEqual(N->getMaskElt(2), 2) &&
@@ -3409,18 +3514,24 @@
                              DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
-  // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
-  // type.  This ensures they get CSE'd.
+  // Always build SSE zero vectors as <4 x i32> bitcasted
+  // to their dest type. This ensures they get CSE'd.
   SDValue Vec;
-  if (VT.getSizeInBits() == 64) { // MMX
-    SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  } else if (HasSSE2) {  // SSE2
-    SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
-  } else { // SSE1
+  if (VT.getSizeInBits() == 128) {  // SSE
+    if (HasSSE2) {  // SSE2
+      SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+    } else { // SSE1
+      SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+    }
+  } else if (VT.getSizeInBits() == 256) { // AVX
+    // 256-bit logic and arithmetic instructions in AVX are
+    // all floating-point, no support for integer ops. Default
+    // to emitting fp zeroed vectors then.
     SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
   }
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
 }
@@ -3434,10 +3545,7 @@
   // type.  This ensures they get CSE'd.
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
   SDValue Vec;
-  if (VT.getSizeInBits() == 64)  // MMX
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  else                                              // SSE
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
 }
 
@@ -3501,12 +3609,8 @@
   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
 }
 
-/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
-                            bool HasSSE2) {
-  if (SV->getValueType(0).getVectorNumElements() <= 4)
-    return SDValue(SV, 0);
-
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
   EVT PVT = MVT::v4f32;
   EVT VT = SV->getValueType(0);
   DebugLoc dl = SV->getDebugLoc();
@@ -3550,68 +3654,253 @@
   return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
 }
 
-/// getNumOfConsecutiveZeros - Return the number of elements in a result of
-/// a shuffle that is zero.
-static
-unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
-                                  bool Low, SelectionDAG &DAG) {
-  unsigned NumZeros = 0;
-  for (int i = 0; i < NumElems; ++i) {
-    unsigned Index = Low ? i : NumElems-i-1;
-    int Idx = SVOp->getMaskElt(Index);
-    if (Idx < 0) {
-      ++NumZeros;
-      continue;
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+                            unsigned Depth) {
+  if (Depth == 6)
+    return SDValue();  // Limit search depth.
+
+  SDValue V = SDValue(N, 0);
+  EVT VT = V.getValueType();
+  unsigned Opcode = V.getOpcode();
+
+  // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+  if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+    Index = SV->getMaskElt(Index);
+
+    if (Index < 0)
+      return DAG.getUNDEF(VT.getVectorElementType());
+
+    int NumElems = VT.getVectorNumElements();
+    SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+  }
+
+  // Recurse into target specific vector shuffles to find scalars.
+  if (isTargetShuffle(Opcode)) {
+    int NumElems = VT.getVectorNumElements();
+    SmallVector<unsigned, 16> ShuffleMask;
+    SDValue ImmN;
+
+    switch(Opcode) {
+    case X86ISD::SHUFPS:
+    case X86ISD::SHUFPD:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodeSHUFPSMask(NumElems,
+                       cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                       ShuffleMask);
+      break;
+    case X86ISD::PUNPCKHBW:
+    case X86ISD::PUNPCKHWD:
+    case X86ISD::PUNPCKHDQ:
+    case X86ISD::PUNPCKHQDQ:
+      DecodePUNPCKHMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::UNPCKHPS:
+    case X86ISD::UNPCKHPD:
+      DecodeUNPCKHPMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::PUNPCKLBW:
+    case X86ISD::PUNPCKLWD:
+    case X86ISD::PUNPCKLDQ:
+    case X86ISD::PUNPCKLQDQ:
+      DecodePUNPCKLMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::UNPCKLPS:
+    case X86ISD::UNPCKLPD:
+      DecodeUNPCKLPMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::MOVHLPS:
+      DecodeMOVHLPSMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::MOVLHPS:
+      DecodeMOVLHPSMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::PSHUFD:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFMask(NumElems,
+                      cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                      ShuffleMask);
+      break;
+    case X86ISD::PSHUFHW:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                        ShuffleMask);
+      break;
+    case X86ISD::PSHUFLW:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                        ShuffleMask);
+      break;
+    case X86ISD::MOVSS:
+    case X86ISD::MOVSD: {
+      // The index 0 always comes from the first element of the second source,
+      // this is why MOVSS and MOVSD are used in the first place. The other
+      // elements come from the other positions of the first source vector.
+      unsigned OpNum = (Index == 0) ? 1 : 0;
+      return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+                                 Depth+1);
     }
-    SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
-    if (Elt.getNode() && X86::isZeroNode(Elt))
-      ++NumZeros;
-    else
-      break;
+    default:
+      assert("not implemented for target shuffle node");
+      return SDValue();
+    }
+
+    Index = ShuffleMask[Index];
+    if (Index < 0)
+      return DAG.getUNDEF(VT.getVectorElementType());
+
+    SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+                               Depth+1);
+  }
+
+  // Actual nodes that may contain scalar elements
+  if (Opcode == ISD::BIT_CONVERT) {
+    V = V.getOperand(0);
+    EVT SrcVT = V.getValueType();
+    unsigned NumElems = VT.getVectorNumElements();
+
+    if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+      return SDValue();
   }
-  return NumZeros;
+
+  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return (Index == 0) ? V.getOperand(0)
+                          : DAG.getUNDEF(VT.getVectorElementType());
+
+  if (V.getOpcode() == ISD::BUILD_VECTOR)
+    return V.getOperand(Index);
+
+  return SDValue();
 }
 
-/// isVectorShift - Returns true if the shuffle can be implemented as a
-/// logical left or right shift of a vector.
-/// FIXME: split into pslldqi, psrldqi, palignr variants.
-static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
-                          bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two diferent directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+                                  bool ZerosFromLeft, SelectionDAG &DAG) {
+  int i = 0;
 
-  isLeft = true;
-  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
-  if (!NumZeros) {
-    isLeft = false;
-    NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG);
-    if (!NumZeros)
-      return false;
+  while (i < NumElems) {
+    unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+    SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+    if (!(Elt.getNode() &&
+         (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+      break;
+    ++i;
   }
+
+  return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+                              int OpIdx, int NumElems, unsigned &OpNum) {
   bool SeenV1 = false;
   bool SeenV2 = false;
-  for (unsigned i = NumZeros; i < NumElems; ++i) {
-    unsigned Val = isLeft ? (i - NumZeros) : i;
-    int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
-    if (Idx_ < 0)
+
+  for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+    int Idx = SVOp->getMaskElt(i);
+    // Ignore undef indicies
+    if (Idx < 0)
       continue;
-    unsigned Idx = (unsigned) Idx_;
+
     if (Idx < NumElems)
       SeenV1 = true;
-    else {
-      Idx -= NumElems;
+    else
       SeenV2 = true;
-    }
-    if (Idx != Val)
+
+    // Only accept consecutive elements from the same vector
+    if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
       return false;
   }
-  if (SeenV1 && SeenV2)
+
+  OpNum = SeenV1 ? 0 : 1;
+  return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+              false /* check zeros from right */, DAG);
+  unsigned OpSrc;
+
+  if (!NumZeros)
     return false;
 
-  ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+  // Considering the elements in the mask that are not consecutive zeros,
+  // check if they consecutively come from only one of the source vectors.
+  //
+  //               V1 = {X, A, B, C}     0
+  //                         \  \  \    /
+  //   vector_shuffle V1, V2 <1, 2, 3, X>
+  //
+  if (!isShuffleMaskConsecutive(SVOp,
+            0,                   // Mask Start Index
+            NumElems-NumZeros-1, // Mask End Index
+            NumZeros,            // Where to start looking in the src vector
+            NumElems,            // Number of elements in vector
+            OpSrc))              // Which source operand ?
+    return false;
+
+  isLeft = false;
   ShAmt = NumZeros;
+  ShVal = SVOp->getOperand(OpSrc);
   return true;
 }
 
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                              bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+              true /* check zeros from left */, DAG);
+  unsigned OpSrc;
+
+  if (!NumZeros)
+    return false;
+
+  // Considering the elements in the mask that are not consecutive zeros,
+  // check if they consecutively come from only one of the source vectors.
+  //
+  //                           0    { A, B, X, X } = V2
+  //                          / \    /  /
+  //   vector_shuffle V1, V2 <X, X, 4, 5>
+  //
+  if (!isShuffleMaskConsecutive(SVOp,
+            NumZeros,     // Mask Start Index
+            NumElems-1,   // Mask End Index
+            0,            // Where to start looking in the src vector
+            NumElems,     // Number of elements in vector
+            OpSrc))       // Which source operand ?
+    return false;
+
+  isLeft = true;
+  ShAmt = NumZeros;
+  ShVal = SVOp->getOperand(OpSrc);
+  return true;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                          bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+      isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+    return true;
+
+  return false;
+}
 
 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
 ///
@@ -3696,8 +3985,7 @@
 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
                          unsigned NumBits, SelectionDAG &DAG,
                          const TargetLowering &TLI, DebugLoc dl) {
-  bool isMMX = VT.getSizeInBits() == 64;
-  EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+  EVT ShVT = MVT::v2i64;
   unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
   SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -3708,7 +3996,7 @@
 SDValue
 X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
                                           SelectionDAG &DAG) const {
-  
+
   // Check if the scalar load can be widened into a vector load. And if
   // the address is "base + cst" see if the cst can be "absorbed" into
   // the shuffle mask.
@@ -3763,41 +4051,42 @@
     int EltNo = (Offset - StartOffset) >> 2;
     int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
     EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
-    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+                             LD->getPointerInfo().getWithOffset(StartOffset),
                              false, false, 0);
     // Canonicalize it to a v4i32 shuffle.
     V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
                        DAG.getVectorShuffle(MVT::v4i32, dl, V1,
-                                            DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+                                            DAG.getUNDEF(MVT::v4i32),&Mask[0]));
   }
 
   return SDValue();
 }
 
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a 
-/// vector of type 'VT', see if the elements can be replaced by a single large 
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
 /// load which has the same value as a build_vector whose operands are 'elts'.
 ///
 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
-/// 
+///
 /// FIXME: we'd also like to handle the case where the last elements are zero
 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
 /// There's even a handy isZeroNode for that purpose.
 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
-                                        DebugLoc &dl, SelectionDAG &DAG) {
+                                        DebugLoc &DL, SelectionDAG &DAG) {
   EVT EltVT = VT.getVectorElementType();
   unsigned NumElems = Elts.size();
-  
+
   LoadSDNode *LDBase = NULL;
   unsigned LastLoadedElt = -1U;
-  
+
   // For each element in the initializer, see if we've found a load or an undef.
-  // If we don't find an initial load element, or later load elements are 
+  // If we don't find an initial load element, or later load elements are
   // non-consecutive, bail out.
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue Elt = Elts[i];
-    
+
     if (!Elt.getNode() ||
         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
       return SDValue();
@@ -3822,18 +4111,20 @@
   // consecutive loads for the low half, generate a vzext_load node.
   if (LastLoadedElt == NumElems - 1) {
     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
-      return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
-                         LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+      return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                         LDBase->getPointerInfo(),
                          LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
-    return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
-                       LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+    return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                       LDBase->getPointerInfo(),
                        LDBase->isVolatile(), LDBase->isNonTemporal(),
                        LDBase->getAlignment());
   } else if (NumElems == 4 && LastLoadedElt == 1) {
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
-    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+    SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+                                              Ops, 2, MVT::i32,
+                                              LDBase->getMemOperand());
+    return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode);
   }
   return SDValue();
 }
@@ -3841,13 +4132,17 @@
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  // All zero's are handled with pxor, all one's are handled with pcmpeqd.
-  if (ISD::isBuildVectorAllZeros(Op.getNode())
-      || ISD::isBuildVectorAllOnes(Op.getNode())) {
-    // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+  // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+  // All one's are handled with pcmpeqd. In AVX, zero's are handled with
+  // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
+  // is present, so AllOnes is ignored.
+  if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+      (Op.getValueType().getSizeInBits() != 256 &&
+       ISD::isBuildVectorAllOnes(Op.getNode()))) {
+    // Canonicalize this to <4 x i32> (SSE) to
     // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
     // eliminated on x86-32 hosts.
-    if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+    if (Op.getValueType() == MVT::v4i32)
       return Op;
 
     if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -3881,10 +4176,9 @@
     }
   }
 
-  if (NumNonZero == 0) {
-    // All undef vector. Return an UNDEF.  All zero vectors were handled above.
+  // All undef vector. Return an UNDEF.  All zero vectors were handled above.
+  if (NumNonZero == 0)
     return DAG.getUNDEF(VT);
-  }
 
   // Special case for single non-zero, non-undef, element.
   if (NumNonZero == 1) {
@@ -3899,9 +4193,10 @@
     if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
         (!IsAllConstants || Idx == 0)) {
       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
-        // Handle MMX and SSE both.
-        EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
-        unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+        // Handle SSE only.
+        assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+        EVT VecVT = MVT::v4i32;
+        unsigned VecElts = 4;
 
         // Truncate the value (which may itself be a constant) to i32, and
         // convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -3940,7 +4235,8 @@
                                            DAG);
       } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+        EVT MiddleVT = MVT::v4i32;
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
         Item = getShuffleVectorZeroOrUndef(Item, 0, true,
                                            Subtarget->hasSSE2(), DAG);
@@ -4022,7 +4318,7 @@
 
   if (EVTBits == 16 && NumElems == 8) {
     SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                        *this);
+                                      *this);
     if (V.getNode()) return V;
   }
 
@@ -4070,34 +4366,57 @@
     // Check for a build vector of consecutive loads.
     for (unsigned i = 0; i < NumElems; ++i)
       V[i] = Op.getOperand(i);
-    
+
     // Check for elements which are consecutive loads.
     SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
     if (LD.getNode())
       return LD;
-    
-    // For SSE 4.1, use inserts into undef.  
+
+    // For SSE 4.1, use insertps to put the high elements into the low element.
     if (getSubtarget()->hasSSE41()) {
-      V[0] = DAG.getUNDEF(VT);
-      for (unsigned i = 0; i < NumElems; ++i)
-        if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
-          V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+      SDValue Result;
+      if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+        Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+      else
+        Result = DAG.getUNDEF(VT);
+
+      for (unsigned i = 1; i < NumElems; ++i) {
+        if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
                              Op.getOperand(i), DAG.getIntPtrConstant(i));
-      return V[0];
+      }
+      return Result;
     }
-    
-    // Otherwise, expand into a number of unpckl*
-    // e.g. for v4f32
+
+    // Otherwise, expand into a number of unpckl*, start by extending each of
+    // our (non-undef) elements to the full vector width with the element in the
+    // bottom slot of the vector (which generates no code for SSE).
+    for (unsigned i = 0; i < NumElems; ++i) {
+      if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+      else
+        V[i] = DAG.getUNDEF(VT);
+    }
+
+    // Next, we iteratively mix elements, e.g. for v4f32:
     //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
     //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
     //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
-    for (unsigned i = 0; i < NumElems; ++i)
-      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
-    NumElems >>= 1;
-    while (NumElems != 0) {
-      for (unsigned i = 0; i < NumElems; ++i)
-        V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
-      NumElems >>= 1;
+    unsigned EltStride = NumElems >> 1;
+    while (EltStride != 0) {
+      for (unsigned i = 0; i < EltStride; ++i) {
+        // If V[i+EltStride] is undef and this is the first round of mixing,
+        // then it is safe to just drop this shuffle: V[i] is already in the
+        // right place, the one element (since it's the first round) being
+        // inserted as undef can be dropped.  This isn't safe for successive
+        // rounds because they will permute elements within both vectors.
+        if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+            EltStride == NumElems/2)
+          continue;
+
+        V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+      }
+      EltStride >>= 1;
     }
     return V[0];
   }
@@ -4136,10 +4455,10 @@
 // 2. [ssse3] 1 x pshufb
 // 3. [ssse3] 2 x pshufb + 1 x por
 // 4. [all]   mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
-static
-SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG,
-                                 const X86TargetLowering &TLI) {
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
@@ -4190,7 +4509,7 @@
   // quads, disable the next transformation since it does not help SSSE3.
   bool V1Used = InputQuads[0] || InputQuads[1];
   bool V2Used = InputQuads[2] || InputQuads[3];
-  if (TLI.getSubtarget()->hasSSSE3()) {
+  if (Subtarget->hasSSSE3()) {
     if (InputQuads.count() == 2 && V1Used && V2Used) {
       BestLoQuad = InputQuads.find_first();
       BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -4249,15 +4568,21 @@
     // If we've eliminated the use of V2, and the new mask is a pshuflw or
     // pshufhw, that's as cheap as it gets.  Return the new shuffle.
     if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
-      return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+      unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+      unsigned TargetMask = 0;
+      NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
                                   DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+      TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+                             X86::getShufflePSHUFLWImmediate(NewV.getNode());
+      V1 = NewV.getOperand(0);
+      return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
     }
   }
 
   // If we have SSSE3, and all words of the result are from 1 input vector,
   // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
   // is present, fall back to case 4.
-  if (TLI.getSubtarget()->hasSSSE3()) {
+  if (Subtarget->hasSSSE3()) {
     SmallVector<SDValue,16> pshufbMask;
 
     // If we have elements from both input vectors, set the high bit of the
@@ -4324,6 +4649,12 @@
       MaskV.push_back(i);
     NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
                                 &MaskV[0]);
+
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+      NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+                               NewV.getOperand(0),
+                               X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+                               DAG);
   }
 
   // If BestHi >= 0, generate a pshufhw to put the high elements in order,
@@ -4346,6 +4677,12 @@
     }
     NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
                                 &MaskV[0]);
+
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+      NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+                              NewV.getOperand(0),
+                              X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+                              DAG);
   }
 
   // In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -4522,21 +4859,19 @@
 }
 
 /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
 /// done when every pair / quad of shuffle mask elements point to elements in
 /// the right sequence. e.g.
-/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
 static
 SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG,
-                                 const TargetLowering &TLI, DebugLoc dl) {
+                                 SelectionDAG &DAG, DebugLoc dl) {
   EVT VT = SVOp->getValueType(0);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   unsigned NumElems = VT.getVectorNumElements();
   unsigned NewWidth = (NumElems == 4) ? 2 : 4;
-  EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
-  EVT NewVT = MaskVT;
+  EVT NewVT;
   switch (VT.getSimpleVT().SimpleTy) {
   default: assert(false && "Unexpected!");
   case MVT::v4f32: NewVT = MVT::v2f64; break;
@@ -4545,12 +4880,6 @@
   case MVT::v16i8: NewVT = MVT::v4i32; break;
   }
 
-  if (NewWidth == 2) {
-    if (VT.isInteger())
-      NewVT = MVT::v2i64;
-    else
-      NewVT = MVT::v2f64;
-  }
   int Scale = NumElems / NewWidth;
   SmallVector<int, 8> MaskVec;
   for (unsigned i = 0; i < NumElems; i += Scale) {
@@ -4759,58 +5088,376 @@
   return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
 }
 
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+static bool MayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (MayFoldLoad(V))
+    return true;
+  return false;
+}
+
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (ISD::isNormalLoad(V.getNode()))
+    return true;
+  return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+                                         const TargetLowering &TLI) {
+  EVT VT = V.getValueType();
+  ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+  // Be sure that the vector shuffle is present in a pattern like this:
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+  if (!V.hasOneUse())
+    return false;
+
+  SDNode *N = *V.getNode()->use_begin();
+  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return false;
+
+  SDValue EltNo = N->getOperand(1);
+  if (!isa<ConstantSDNode>(EltNo))
+    return false;
+
+  // If the bit convert changed the number of elements, it is unsafe
+  // to examine the mask.
+  bool HasShuffleIntoBitcast = false;
+  if (V.getOpcode() == ISD::BIT_CONVERT) {
+    EVT SrcVT = V.getOperand(0).getValueType();
+    if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+      return false;
+    V = V.getOperand(0);
+    HasShuffleIntoBitcast = true;
+  }
+
+  // Select the input vector, guarding against out of range extract vector.
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+  V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+  // Skip one more bit_convert if necessary
+  if (V.getOpcode() == ISD::BIT_CONVERT)
+    V = V.getOperand(0);
+
+  if (ISD::isNormalLoad(V.getNode())) {
+    // Is the original load suitable?
+    LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+    // FIXME: avoid the multi-use bug that is preventing lots of
+    // of foldings to be detected, this is still wrong of course, but
+    // give the temporary desired behavior, and if it happens that
+    // the load has real more uses, during isel it will not fold, and
+    // will generate poor code.
+    if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+      return false;
+
+    if (!HasShuffleIntoBitcast)
+      return true;
+
+    // If there's a bitcast before the shuffle, check if the load type and
+    // alignment is valid.
+    unsigned Align = LN0->getAlignment();
+    unsigned NewAlign =
+      TLI.getTargetData()->getABITypeAlignment(
+                                    VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+      return false;
+  }
+
+  return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // Canonizalize to v2f64.
+  V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1);
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                     getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+                                          V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+                        bool HasSSE2) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+  if (HasSSE2 && VT == MVT::v2f64)
+    return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+  // v4f32 or v4i32
+  return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+         "unsupported shuffle type");
+
+  if (V2.getOpcode() == ISD::UNDEF)
+    V2 = V1;
+
+  // v4i32 or v4f32
+  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
   unsigned NumElems = VT.getVectorNumElements();
-  bool isMMX = VT.getSizeInBits() == 64;
-  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
-  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
-  bool V1IsSplat = false;
-  bool V2IsSplat = false;
+
+  // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+  // operand of these instructions is only memory, so check if there's a
+  // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+  // same masks.
+  bool CanFoldLoad = false;
+
+  // Trivial case, when V2 comes from a load.
+  if (MayFoldVectorLoad(V2))
+    CanFoldLoad = true;
+
+  // When V1 is a load, it can be folded later into a store in isel, example:
+  //  (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+  //    turns into:
+  //  (MOVLPSmr addr:$src1, VR128:$src2)
+  // So, recognize this potential and also use MOVLPS or MOVLPD
+  if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+    CanFoldLoad = true;
+
+  if (CanFoldLoad) {
+    if (HasSSE2 && NumElems == 2)
+      return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+    if (NumElems == 4)
+      return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+  }
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  // movl and movlp will both match v2i64, but v2i64 is never matched by
+  // movl earlier because we make it strict to avoid messing with the movlp load
+  // folding logic (see the code above getMOVLP call). Match it here then,
+  // this is horrible, but will stay like this until we move all shuffle
+  // matching to x86 specific nodes. Note that for the 1st condition all
+  // types are matched with movsd.
+  if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+    return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+  else if (HasSSE2)
+    return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+  assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+  // Invert the operand order and use SHUFPS to match it.
+  return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+                              X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT) {
+  switch(VT.getSimpleVT().SimpleTy) {
+  case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+  case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+  case MVT::v4f32: return X86ISD::UNPCKLPS;
+  case MVT::v2f64: return X86ISD::UNPCKLPD;
+  case MVT::v16i8: return X86ISD::PUNPCKLBW;
+  case MVT::v8i16: return X86ISD::PUNPCKLWD;
+  default:
+    llvm_unreachable("Unknow type for unpckl");
+  }
+  return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+  switch(VT.getSimpleVT().SimpleTy) {
+  case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+  case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+  case MVT::v4f32: return X86ISD::UNPCKHPS;
+  case MVT::v2f64: return X86ISD::UNPCKHPD;
+  case MVT::v16i8: return X86ISD::PUNPCKHBW;
+  case MVT::v8i16: return X86ISD::PUNPCKHWD;
+  default:
+    llvm_unreachable("Unknow type for unpckh");
+  }
+  return 0;
+}
+
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+                               const TargetLowering &TLI,
+                               const X86Subtarget *Subtarget) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
 
   if (isZeroShuffle(SVOp))
     return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
 
-  // Promote splats to v4f32.
+  // Handle splat operations
   if (SVOp->isSplat()) {
-    if (isMMX || NumElems < 4)
+    // Special case, this is the only place now where it's
+    // allowed to return a vector_shuffle operation without
+    // using a target specific node, because *hopefully* it
+    // will be optimized away by the dag combiner.
+    if (VT.getVectorNumElements() <= 4 &&
+        CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
       return Op;
-    return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+
+    // Handle splats by matching through known masks
+    if (VT.getVectorNumElements() <= 4)
+      return SDValue();
+
+    // Canonicalize all of the remaining to v4f32.
+    return PromoteSplat(SVOp, DAG);
   }
 
   // If the shuffle can be profitably rewritten as a narrower shuffle, then
   // do it!
   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
-    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
     if (NewOp.getNode())
-      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                         LowerVECTOR_SHUFFLE(NewOp, DAG));
+      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, NewOp);
   } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
     // FIXME: Figure out a cleaner way to do this.
     // Try to make use of movq to zero out the top part.
     if (ISD::isBuildVectorAllZeros(V2.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
       if (NewOp.getNode()) {
         if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
           return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
                               DAG, Subtarget, dl);
       }
     } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
       if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
         return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
                             DAG, Subtarget, dl);
     }
   }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned NumElems = VT.getVectorNumElements();
+  bool isMMX = VT.getSizeInBits() == 64;
+  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+  bool V1IsSplat = false;
+  bool V2IsSplat = false;
+  bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+  bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+  bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
 
-  if (X86::isPSHUFDMask(SVOp))
+  // Shuffle operations on MMX not supported.
+  if (isMMX)
     return Op;
 
+  // Vector shuffle lowering takes 3 steps:
+  //
+  // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+  //    narrowing and commutation of operands should be handled.
+  // 2) Matching of shuffles with known shuffle masks to x86 target specific
+  //    shuffle nodes.
+  // 3) Rewriting of unmatched masks into new generic shuffle operations,
+  //    so the shuffle can be broken into other shuffles and the legalizer can
+  //    try the lowering again.
+  //
+  // The general ideia is that no vector_shuffle operation should be left to
+  // be matched during isel, all of them must be converted to a target specific
+  // node here.
+
+  // Normalize the input vectors. Here splats, zeroed vectors, profitable
+  // narrowing and commutation of operands should be handled. The actual code
+  // doesn't include all of those, work in progress...
+  SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+  if (NewOp.getNode())
+    return NewOp;
+
+  // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+  // unpckh_undef). Only use pshufd if speed is more important than size.
+  if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+  if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+      RelaxedMayFoldVectorLoad(V1))
+    return getMOVDDup(Op, dl, V1, DAG);
+
+  if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  // Use to match splats
+  if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+      (VT == MVT::v2f64 || VT == MVT::v2i64))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  if (X86::isPSHUFDMask(SVOp)) {
+    // The actual implementation will match the mask in the if above and then
+    // during isel it can match several different instructions, not only pshufd
+    // as its name says, sad but true, emulate the behavior for now...
+    if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+        return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+    if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+    if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+                                  TargetMask, DAG);
+
+    if (VT == MVT::v4f32)
+      return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+                                  TargetMask, DAG);
+  }
+
   // Check if this can be converted into a logical shift.
   bool isLeft = false;
   unsigned ShAmt = 0;
@@ -4830,17 +5477,30 @@
       return V2;
     if (ISD::isBuildVectorAllZeros(V1.getNode()))
       return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
-    if (!isMMX)
-      return Op;
+    if (!X86::isMOVLPMask(SVOp)) {
+      if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+        return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+      if (VT == MVT::v4i32 || VT == MVT::v4f32)
+        return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+    }
   }
 
   // FIXME: fold these into legal mask.
-  if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
-                 X86::isMOVSLDUPMask(SVOp) ||
-                 X86::isMOVHLPSMask(SVOp) ||
-                 X86::isMOVLHPSMask(SVOp) ||
-                 X86::isMOVLPMask(SVOp)))
-    return Op;
+  if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+    return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+  if (X86::isMOVHLPSMask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+  if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+  if (X86::isMOVLPMask(SVOp))
+    return getMOVLP(Op, dl, DAG, HasSSE2);
 
   if (ShouldXformToMOVHLPS(SVOp) ||
       ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -4880,11 +5540,11 @@
     return getMOVL(DAG, dl, VT, V2, V1);
   }
 
-  if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
-      X86::isUNPCKH_v_undef_Mask(SVOp) ||
-      X86::isUNPCKLMask(SVOp) ||
-      X86::isUNPCKHMask(SVOp))
-    return Op;
+  if (X86::isUNPCKLMask(SVOp))
+    return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+
+  if (X86::isUNPCKHMask(SVOp))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
     // Normalize mask so all entries that point to V2 points to its first
@@ -4906,28 +5566,67 @@
     // FIXME: this seems wrong.
     SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
     ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
-    if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
-        X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
-        X86::isUNPCKLMask(NewSVOp) ||
-        X86::isUNPCKHMask(NewSVOp))
-      return NewOp;
-  }
 
-  // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
+    if (X86::isUNPCKLMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+
+    if (X86::isUNPCKHMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+  }
 
   // Normalize the node to match x86 shuffle ops if needed
-  if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
     return CommuteVectorShuffle(SVOp, DAG);
 
-  // Check for legal shuffle and return?
-  SmallVector<int, 16> PermMask;
-  SVOp->getMask(PermMask);
-  if (isShuffleMaskLegal(PermMask, VT))
-    return Op;
+  // The checks below are all present in isShuffleMaskLegal, but they are
+  // inlined here right now to enable us to directly emit target specific
+  // nodes, and remove one by one until they don't return Op anymore.
+  SmallVector<int, 16> M;
+  SVOp->getMask(M);
+
+  if (isPALIGNRMask(M, VT, HasSSSE3))
+    return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+                                X86::getShufflePALIGNRImmediate(SVOp),
+                                DAG);
+
+  if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+      SVOp->getSplatIndex() == 0 && V2IsUndef) {
+    if (VT == MVT::v2f64)
+      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+    if (VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+  }
+
+  if (isPSHUFHWMask(M, VT))
+    return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+                                X86::getShufflePSHUFHWImmediate(SVOp),
+                                DAG);
+
+  if (isPSHUFLWMask(M, VT))
+    return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+                                X86::getShufflePSHUFLWImmediate(SVOp),
+                                DAG);
+
+  if (isSHUFPMask(M, VT)) {
+    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+    if (VT == MVT::v4f32 || VT == MVT::v4i32)
+      return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+                                  TargetMask, DAG);
+    if (VT == MVT::v2f64 || VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+                                  TargetMask, DAG);
+  }
+
+  if (X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+  if (X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
 
   // Handle v8i16 specifically since SSE can do byte extraction and insertion.
   if (VT == MVT::v8i16) {
-    SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+    SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
     if (NewOp.getNode())
       return NewOp;
   }
@@ -4938,8 +5637,8 @@
       return NewOp;
   }
 
-  // Handle all 4 wide cases with a number of shuffles except for MMX.
-  if (NumElems == 4 && !isMMX)
+  // Handle all 4 wide cases with a number of shuffles.
+  if (NumElems == 4)
     return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
 
   return SDValue();
@@ -5081,8 +5780,6 @@
     unsigned Opc;
     if (VT == MVT::v8i16)
       Opc = X86ISD::PINSRW;
-    else if (VT == MVT::v4i16)
-      Opc = X86ISD::MMX_PINSRW;
     else if (VT == MVT::v16i8)
       Opc = X86ISD::PINSRB;
     else
@@ -5138,8 +5835,7 @@
       N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
     if (N2.getValueType() != MVT::i32)
       N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
-    return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
-                       dl, VT, N0, N1, N2);
+    return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
   }
   return SDValue();
 }
@@ -5147,22 +5843,16 @@
 SDValue
 X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  
+
   if (Op.getValueType() == MVT::v1i64 &&
       Op.getOperand(0).getValueType() == MVT::i64)
     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
 
   SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
-  EVT VT = MVT::v2i32;
-  switch (Op.getValueType().getSimpleVT().SimpleTy) {
-  default: break;
-  case MVT::v16i8:
-  case MVT::v8i16:
-    VT = MVT::v4i32;
-    break;
-  }
+  assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+         "Expected an SSE type!");
   return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
-                     DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+                     DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
 }
 
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@@ -5337,7 +6027,7 @@
   // load.
   if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0, false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
@@ -5409,14 +6099,14 @@
                                    const EVT PtrVT, TLSModel::Model model,
                                    bool is64Bit) {
   DebugLoc dl = GA->getDebugLoc();
-  // Get the Thread Pointer
-  SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
-                             DebugLoc(), PtrVT,
-                             DAG.getRegister(is64Bit? X86::FS : X86::GS,
-                                             MVT::i32));
 
-  SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
-                                      NULL, 0, false, false, 0);
+  // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+  Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+                                                         is64Bit ? 257 : 256));
+
+  SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                      DAG.getIntPtrConstant(0),
+                                      MachinePointerInfo(Ptr), false, false, 0);
 
   unsigned char OperandFlags = 0;
   // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
@@ -5435,14 +6125,14 @@
 
   // emit "addl x at ntpoff,%eax" (local exec) or "addl x at indntpoff,%eax" (initial
   // exec)
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, 
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
                                            GA->getValueType(0),
                                            GA->getOffset(), OperandFlags);
   SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
-                         PseudoSourceValue::getGOT(), 0, false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, 0);
 
   // The address of the thread local variable is the add of the thread
   // pointer with the offset of the variable.
@@ -5451,29 +6141,29 @@
 
 SDValue
 X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
-  
+
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GA->getGlobal();
 
   if (Subtarget->isTargetELF()) {
     // TODO: implement the "local dynamic" model
     // TODO: implement the "initial exec"model for pic executables
-    
+
     // If GV is an alias then use the aliasee for determining
     // thread-localness.
     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
       GV = GA->resolveAliasedGlobal(false);
-    
-    TLSModel::Model model 
+
+    TLSModel::Model model
       = getTLSModel(GV, getTargetMachine().getRelocationModel());
-    
+
     switch (model) {
       case TLSModel::GeneralDynamic:
       case TLSModel::LocalDynamic: // not implemented
         if (Subtarget->is64Bit())
           return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
         return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-        
+
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
         return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
@@ -5484,7 +6174,7 @@
     unsigned char OpFlag = 0;
     unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
                            X86ISD::WrapperRIP : X86ISD::Wrapper;
-    
+
     // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
     // global base reg.
     bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
@@ -5493,24 +6183,24 @@
       OpFlag = X86II::MO_TLVP_PIC_BASE;
     else
       OpFlag = X86II::MO_TLVP;
-    DebugLoc DL = Op.getDebugLoc();    
+    DebugLoc DL = Op.getDebugLoc();
     SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
                                                 getPointerTy(),
                                                 GA->getOffset(), OpFlag);
     SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-  
+
     // With PIC32, the address is actually $g + Offset.
     if (PIC32)
       Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                            DAG.getNode(X86ISD::GlobalBaseReg,
                                        DebugLoc(), getPointerTy()),
                            Offset);
-    
+
     // Lowering the machine isd will make sure everything is in the right
     // location.
     SDValue Args[] = { Offset };
     SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
-    
+
     // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
     MFI->setAdjustsStack(true);
@@ -5520,7 +6210,7 @@
     unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
     return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
   }
-  
+
   assert(false &&
          "TLS not implemented for this target.");
 
@@ -5579,12 +6269,8 @@
                                            SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
 
-  if (SrcVT.isVector()) {
-    if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
-      return Op;
-    }
+  if (SrcVT.isVector())
     return SDValue();
-  }
 
   assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
          "Unknown SINT_TO_FP to lower!");
@@ -5605,25 +6291,36 @@
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot,
-                               PseudoSourceValue::getFixedStack(SSFI), 0,
+                               MachinePointerInfo::getFixedStack(SSFI),
                                false, false, 0);
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
 }
 
 SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
-                                     SDValue StackSlot, 
+                                     SDValue StackSlot,
                                      SelectionDAG &DAG) const {
   // Build the FILD
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   SDVTList Tys;
   bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
   if (useSSE)
     Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
   else
     Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+  unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, ByteSize, ByteSize);
+
   SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
-  SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
-                               Tys, Ops, array_lengthof(Ops));
+  SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+                                           X86ISD::FILD, DL,
+                                           Tys, Ops, array_lengthof(Ops),
+                                           SrcVT, MMO);
 
   if (useSSE) {
     Chain = Result.getValue(1);
@@ -5633,15 +6330,23 @@
     // shouldn't be necessary except that RFP cannot be live across
     // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+    unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+    int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
     SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
     Tys = DAG.getVTList(MVT::Other);
     SDValue Ops[] = {
       Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
     };
-    Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
-    Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0,
+    MachineMemOperand *MMO =
+      DAG.getMachineFunction()
+      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+    Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+                                    Ops, array_lengthof(Ops),
+                                    Op.getValueType(), MMO);
+    Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
                          false, false, 0);
   }
 
@@ -5715,12 +6420,12 @@
                                         DAG.getIntPtrConstant(0)));
   SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
   SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
   SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
   SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
 
@@ -5805,24 +6510,34 @@
     SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
                                      getPointerTy(), StackSlot, WordOff);
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                  StackSlot, NULL, 0, false, false, 0);
+                                  StackSlot, MachinePointerInfo(),
+                                  false, false, 0);
     SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
-                                  OffsetSlot, NULL, 0, false, false, 0);
+                                  OffsetSlot, MachinePointerInfo(),
+                                  false, false, 0);
     SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
     return Fild;
   }
 
   assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                StackSlot, NULL, 0, false, false, 0);
+                                StackSlot, MachinePointerInfo(),
+                               false, false, 0);
   // For i64 source, we need to add the appropriate power of 2 if the input
   // was negative.  This is the same as the optimization in
   // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
   // we must be careful to do the computation in x87 extended precision, not
   // in SSE. (The generic code can't know it's OK to do this, or how to.)
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, 8, 8);
+
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
   SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
-  SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+                                         MVT::i64, MMO);
 
   APInt FF(32, 0x5F800000ULL);
 
@@ -5846,8 +6561,8 @@
   // Load the value out, extending it from f32 to f80.
   // FIXME: Avoid the extend by constructing the right constant pool?
   SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
-                                 FudgePtr, PseudoSourceValue::getConstantPool(),
-                                 0, MVT::f32, false, false, 4);
+                                 FudgePtr, MachinePointerInfo::getConstantPool(),
+                                 MVT::f32, false, false, 4);
   // Extend everything to 80 bits to force it to be done on x87.
   SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
   return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
@@ -5855,7 +6570,7 @@
 
 std::pair<SDValue,SDValue> X86TargetLowering::
 FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
   EVT DstTy = Op.getValueType();
 
@@ -5884,6 +6599,8 @@
   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
+
+
   unsigned Opc;
   switch (DstTy.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
@@ -5894,37 +6611,43 @@
 
   SDValue Chain = DAG.getEntryNode();
   SDValue Value = Op.getOperand(0);
-  if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+  EVT TheVT = Op.getOperand(0).getValueType();
+  if (isScalarFPTypeInSSEReg(TheVT)) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
-    Chain = DAG.getStore(Chain, dl, Value, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0,
+    Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
                          false, false, 0);
     SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
     SDValue Ops[] = {
-      Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+      Chain, StackSlot, DAG.getValueType(TheVT)
     };
-    Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                              MachineMemOperand::MOLoad, MemSize, MemSize);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+                                    DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   }
 
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, MemSize, MemSize);
+
   // Build the FP_TO_INT*_IN_MEM
   SDValue Ops[] = { Chain, Value, StackSlot };
-  SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+  SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                         Ops, 3, DstTy, MMO);
 
   return std::make_pair(FIST, StackSlot);
 }
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
                                            SelectionDAG &DAG) const {
-  if (Op.getValueType().isVector()) {
-    if (Op.getValueType() == MVT::v2i32 &&
-        Op.getOperand(0).getValueType() == MVT::v2f64) {
-      return Op;
-    }
+  if (Op.getValueType().isVector())
     return SDValue();
-  }
 
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -5933,7 +6656,7 @@
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0, false, false, 0);
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
@@ -5944,7 +6667,7 @@
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0, false, false, 0);
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -5970,7 +6693,7 @@
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                             PseudoSourceValue::getConstantPool(), 0,
+                             MachinePointerInfo::getConstantPool(),
                              false, false, 16);
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
@@ -5997,7 +6720,7 @@
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                             PseudoSourceValue::getConstantPool(), 0,
+                             MachinePointerInfo::getConstantPool(),
                              false, false, 16);
   if (VT.isVector()) {
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -6046,7 +6769,7 @@
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
 
@@ -6075,7 +6798,7 @@
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
 
@@ -6427,11 +7150,8 @@
 
   switch (VT.getSimpleVT().SimpleTy) {
   default: break;
-  case MVT::v8i8:
   case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
-  case MVT::v4i16:
   case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
-  case MVT::v2i32:
   case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
   case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
   }
@@ -6533,7 +7253,7 @@
   if (Cond.getOpcode() == ISD::AND &&
       Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
-    if (C && C->getAPIntValue() == 1) 
+    if (C && C->getAPIntValue() == 1)
       Cond = Cond.getOperand(0);
   }
 
@@ -6566,7 +7286,7 @@
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
-    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
       SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
       if (NewSetCC.getNode()) {
         CC = NewSetCC.getOperand(0);
@@ -6640,7 +7360,7 @@
   if (Cond.getOpcode() == ISD::AND &&
       Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
-    if (C && C->getAPIntValue() == 1) 
+    if (C && C->getAPIntValue() == 1)
       Cond = Cond.getOperand(0);
   }
 
@@ -6741,7 +7461,7 @@
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
-    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
       SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
       if (NewSetCC.getNode()) {
         CC = NewSetCC.getOperand(0);
@@ -6768,8 +7488,8 @@
 SDValue
 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                            SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetCygMing() &&
-         "This should be used only on Cygwin/Mingw targets");
+  assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+         "This should be used only on Windows targets");
   DebugLoc dl = Op.getDebugLoc();
 
   // Get the inputs.
@@ -6786,7 +7506,7 @@
 
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
 
-  Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+  Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
   Flag = Chain.getValue(1);
 
   Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
@@ -6800,15 +7520,15 @@
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
-  if (!Subtarget->is64Bit()) {
+  if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                    getPointerTy());
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
-                        false, false, 0);
+    return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV), false, false, 0);
   }
 
   // __va_list_tag:
@@ -6819,48 +7539,107 @@
   SmallVector<SDValue, 8> MemOps;
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
-  SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+  SDValue Store = DAG.getStore(Op.getOperand(0), DL,
                                DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
                                                MVT::i32),
-                               FIN, SV, 0, false, false, 0);
+                               FIN, MachinePointerInfo(SV), false, false, 0);
   MemOps.push_back(Store);
 
   // Store fp_offset
-  FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
-  Store = DAG.getStore(Op.getOperand(0), dl,
+  Store = DAG.getStore(Op.getOperand(0), DL,
                        DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
                                        MVT::i32),
-                       FIN, SV, 4, false, false, 0);
+                       FIN, MachinePointerInfo(SV, 4), false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
-  FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                     getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
+  Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+                       MachinePointerInfo(SV, 8),
                        false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
-  FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(8));
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
                                     getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
-                       false, false, 0);
+  Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+                       MachinePointerInfo(SV, 16), false, false, 0);
   MemOps.push_back(Store);
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                      &MemOps[0], MemOps.size());
 }
 
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
-  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
-  assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+  assert(Subtarget->is64Bit() &&
+         "LowerVAARG only handles 64-bit va_arg!");
+  assert((Subtarget->isTargetLinux() ||
+          Subtarget->isTargetDarwin()) &&
+          "Unhandled target in LowerVAARG");
+  assert(Op.getNode()->getNumOperands() == 4);
+  SDValue Chain = Op.getOperand(0);
+  SDValue SrcPtr = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  unsigned Align = Op.getConstantOperandVal(3);
+  DebugLoc dl = Op.getDebugLoc();
 
-  report_fatal_error("VAArgInst is not yet implemented for x86-64!");
-  return SDValue();
+  EVT ArgVT = Op.getNode()->getValueType(0);
+  const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+  uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+  uint8_t ArgMode;
+
+  // Decide which area this value should be read from.
+  // TODO: Implement the AMD64 ABI in its entirety. This simple
+  // selection mechanism works only for the basic types.
+  if (ArgVT == MVT::f80) {
+    llvm_unreachable("va_arg for f80 not yet implemented");
+  } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+    ArgMode = 2;  // Argument passed in XMM register. Use fp_offset.
+  } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+    ArgMode = 1;  // Argument passed in GPR64 register(s). Use gp_offset.
+  } else {
+    llvm_unreachable("Unhandled argument type in LowerVAARG");
+  }
+
+  if (ArgMode == 2) {
+    // Sanity Check: Make sure using fp_offset makes sense.
+    assert(!UseSoftFloat &&
+           !(DAG.getMachineFunction()
+                .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+           Subtarget->hasSSE1());
+  }
+
+  // Insert VAARG_64 node into the DAG
+  // VAARG_64 returns two values: Variable Argument Address, Chain
+  SmallVector<SDValue, 11> InstOps;
+  InstOps.push_back(Chain);
+  InstOps.push_back(SrcPtr);
+  InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+  InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+  InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+  SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+  SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+                                          VTs, &InstOps[0], InstOps.size(),
+                                          MVT::i64,
+                                          MachinePointerInfo(SV),
+                                          /*Align=*/0,
+                                          /*Volatile=*/false,
+                                          /*ReadMem=*/true,
+                                          /*WriteMem=*/true);
+  Chain = VAARG.getValue(1);
+
+  // Load the next argument and return it
+  return DAG.getLoad(ArgVT, dl,
+                     Chain,
+                     VAARG,
+                     MachinePointerInfo(),
+                     false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
@@ -6871,11 +7650,12 @@
   SDValue SrcPtr = Op.getOperand(2);
   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
-  return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
                        DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
-                       false, DstSV, 0, SrcSV, 0);
+                       false,
+                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
 SDValue
@@ -6984,24 +7764,58 @@
                                 DAG.getConstant(X86CC, MVT::i8), Cond);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
-  // ptest intrinsics. The intrinsic these come from are designed to return
-  // an integer value, not just an instruction so lower it to the ptest
-  // pattern and a setcc for the result.
+  // ptest and testp intrinsics. The intrinsic these come from are designed to
+  // return an integer value, not just an instruction so lower it to the ptest
+  // or testp pattern and a setcc for the result.
   case Intrinsic::x86_sse41_ptestz:
   case Intrinsic::x86_sse41_ptestc:
-  case Intrinsic::x86_sse41_ptestnzc:{
+  case Intrinsic::x86_sse41_ptestnzc:
+  case Intrinsic::x86_avx_ptestz_256:
+  case Intrinsic::x86_avx_ptestc_256:
+  case Intrinsic::x86_avx_ptestnzc_256:
+  case Intrinsic::x86_avx_vtestz_ps:
+  case Intrinsic::x86_avx_vtestc_ps:
+  case Intrinsic::x86_avx_vtestnzc_ps:
+  case Intrinsic::x86_avx_vtestz_pd:
+  case Intrinsic::x86_avx_vtestc_pd:
+  case Intrinsic::x86_avx_vtestnzc_pd:
+  case Intrinsic::x86_avx_vtestz_ps_256:
+  case Intrinsic::x86_avx_vtestc_ps_256:
+  case Intrinsic::x86_avx_vtestnzc_ps_256:
+  case Intrinsic::x86_avx_vtestz_pd_256:
+  case Intrinsic::x86_avx_vtestc_pd_256:
+  case Intrinsic::x86_avx_vtestnzc_pd_256: {
+    bool IsTestPacked = false;
     unsigned X86CC = 0;
     switch (IntNo) {
     default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_avx_vtestz_ps:
+    case Intrinsic::x86_avx_vtestz_pd:
+    case Intrinsic::x86_avx_vtestz_ps_256:
+    case Intrinsic::x86_avx_vtestz_pd_256:
+      IsTestPacked = true; // Fallthrough
     case Intrinsic::x86_sse41_ptestz:
+    case Intrinsic::x86_avx_ptestz_256:
       // ZF = 1
       X86CC = X86::COND_E;
       break;
+    case Intrinsic::x86_avx_vtestc_ps:
+    case Intrinsic::x86_avx_vtestc_pd:
+    case Intrinsic::x86_avx_vtestc_ps_256:
+    case Intrinsic::x86_avx_vtestc_pd_256:
+      IsTestPacked = true; // Fallthrough
     case Intrinsic::x86_sse41_ptestc:
+    case Intrinsic::x86_avx_ptestc_256:
       // CF = 1
       X86CC = X86::COND_B;
       break;
+    case Intrinsic::x86_avx_vtestnzc_ps:
+    case Intrinsic::x86_avx_vtestnzc_pd:
+    case Intrinsic::x86_avx_vtestnzc_ps_256:
+    case Intrinsic::x86_avx_vtestnzc_pd_256:
+      IsTestPacked = true; // Fallthrough
     case Intrinsic::x86_sse41_ptestnzc:
+    case Intrinsic::x86_avx_ptestnzc_256:
       // ZF and CF = 0
       X86CC = X86::COND_A;
       break;
@@ -7009,7 +7823,8 @@
 
     SDValue LHS = Op.getOperand(1);
     SDValue RHS = Op.getOperand(2);
-    SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+    unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+    SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
     SDValue CC = DAG.getConstant(X86CC, MVT::i8);
     SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -7109,6 +7924,7 @@
       ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
     } else {
       ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
     }
 
     EVT VT = Op.getValueType();
@@ -7136,13 +7952,13 @@
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0, false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
@@ -7155,7 +7971,8 @@
   unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
                             false, false, 0);
   return FrameAddr;
 }
@@ -7172,14 +7989,16 @@
   SDValue Handler   = Op.getOperand(2);
   DebugLoc dl       = Op.getDebugLoc();
 
-  SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP,
-                                  getPointerTy());
+  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                     Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+                                     getPointerTy());
   unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
 
-  SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
-                                  DAG.getIntPtrConstant(-TD->getPointerSize()));
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+                                  DAG.getIntPtrConstant(TD->getPointerSize()));
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
-  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
   MF.getRegInfo().addLiveOut(StoreAddrReg);
 
@@ -7214,11 +8033,13 @@
     unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
     SDValue Addr = Trmp;
     OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 0, false, false, 0);
+                                Addr, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(2, MVT::i64));
-    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+                                MachinePointerInfo(TrmpAddr, 2),
                                 false, false, 2);
 
     // Load the 'nest' parameter value into R10.
@@ -7227,11 +8048,13 @@
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(10, MVT::i64));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 10, false, false, 0);
+                                Addr, MachinePointerInfo(TrmpAddr, 10),
+                                false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(12, MVT::i64));
-    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 12),
                                 false, false, 2);
 
     // Jump to the nested function.
@@ -7239,13 +8062,15 @@
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(20, MVT::i64));
     OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 20, false, false, 0);
+                                Addr, MachinePointerInfo(TrmpAddr, 20),
+                                false, false, 0);
 
     unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(22, MVT::i64));
     OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
-                                TrmpAddr, 22, false, false, 0);
+                                MachinePointerInfo(TrmpAddr, 22),
+                                false, false, 0);
 
     SDValue Ops[] =
       { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -7307,22 +8132,26 @@
     const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
     OutChains[0] = DAG.getStore(Root, dl,
                                 DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
-                                Trmp, TrmpAddr, 0, false, false, 0);
+                                Trmp, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(1, MVT::i32));
-    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 1),
                                 false, false, 1);
 
     const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(5, MVT::i32));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
-                                TrmpAddr, 5, false, false, 1);
+                                MachinePointerInfo(TrmpAddr, 5),
+                                false, false, 1);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(6, MVT::i32));
-    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+                                MachinePointerInfo(TrmpAddr, 6),
                                 false, false, 1);
 
     SDValue Ops[] =
@@ -7357,41 +8186,48 @@
   const TargetFrameInfo &TFI = *TM.getFrameInfo();
   unsigned StackAlignment = TFI.getStackAlignment();
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
   // Save FP Control Word to stack slot
   int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
-  SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
-                              DAG.getEntryNode(), StackSlot);
+
+  MachineMemOperand *MMO =
+   MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                           MachineMemOperand::MOStore, 2, 2);
+
+  SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+  SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+                                          DAG.getVTList(MVT::Other),
+                                          Ops, 2, MVT::i16, MMO);
 
   // Load FP Control Word from stack slot
-  SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
-                            false, false, 0);
+  SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+                            MachinePointerInfo(), false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
-    DAG.getNode(ISD::SRL, dl, MVT::i16,
-                DAG.getNode(ISD::AND, dl, MVT::i16,
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
                             CWD, DAG.getConstant(0x800, MVT::i16)),
                 DAG.getConstant(11, MVT::i8));
   SDValue CWD2 =
-    DAG.getNode(ISD::SRL, dl, MVT::i16,
-                DAG.getNode(ISD::AND, dl, MVT::i16,
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
                             CWD, DAG.getConstant(0x400, MVT::i16)),
                 DAG.getConstant(9, MVT::i8));
 
   SDValue RetVal =
-    DAG.getNode(ISD::AND, dl, MVT::i16,
-                DAG.getNode(ISD::ADD, dl, MVT::i16,
-                            DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+    DAG.getNode(ISD::AND, DL, MVT::i16,
+                DAG.getNode(ISD::ADD, DL, MVT::i16,
+                            DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
                             DAG.getConstant(1, MVT::i16)),
                 DAG.getConstant(3, MVT::i16));
 
 
   return DAG.getNode((VT.getSizeInBits() < 16 ?
-                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
 }
 
 SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
@@ -7517,12 +8353,12 @@
                      Op.getOperand(1), DAG.getConstant(23, MVT::i32));
 
     ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-    
+
     std::vector<Constant*> CV(4, CI);
     Constant *C = ConstantVector::get(CV);
     SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
     SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                                 PseudoSourceValue::getConstantPool(), 0,
+                                 MachinePointerInfo::getConstantPool(),
                                  false, false, 16);
 
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
@@ -7544,7 +8380,7 @@
     Constant *C = ConstantVector::get(CVM1);
     SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
     SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                            PseudoSourceValue::getConstantPool(), 0,
+                            MachinePointerInfo::getConstantPool(),
                             false, false, 16);
 
     // r = pblendv(r, psllw(r & (char16)15, 4), a);
@@ -7557,12 +8393,13 @@
                     R, M, Op);
     // a += a
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-    
+
     C = ConstantVector::get(CVM2);
     CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
     M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                    PseudoSourceValue::getConstantPool(), 0, false, false, 16);
-    
+                    MachinePointerInfo::getConstantPool(),
+                    false, false, 16);
+
     // r = pblendv(r, psllw(r & (char16)63, 2), a);
     M = DAG.getNode(ISD::AND, dl, VT, R, M);
     M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
@@ -7573,7 +8410,7 @@
                     R, M, Op);
     // a += a
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-    
+
     // return pblendv(r, r+r, a);
     R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                     DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
@@ -7653,37 +8490,51 @@
 
 SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
   DebugLoc dl = Op.getDebugLoc();
-  
-  if (!Subtarget->hasSSE2())
-    return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
-                       DAG.getConstant(0, MVT::i32));
-  
+
+  if (!Subtarget->hasSSE2()) {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Zero = DAG.getConstant(0,
+                                   Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    SDValue Ops[] = {
+      DAG.getRegister(X86::ESP, MVT::i32), // Base
+      DAG.getTargetConstant(1, MVT::i8),   // Scale
+      DAG.getRegister(0, MVT::i32),        // Index
+      DAG.getTargetConstant(0, MVT::i32),  // Disp
+      DAG.getRegister(0, MVT::i32),        // Segment.
+      Zero,
+      Chain
+    };
+    SDNode *Res =
+      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+                          array_lengthof(Ops));
+    return SDValue(Res, 0);
+  }
+
   unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
-  if(!isDev)
+  if (!isDev)
     return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-  else {
-    unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-    unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-    unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-    unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-    
-    // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-    if (!Op1 && !Op2 && !Op3 && Op4)
-      return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-    
-    // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-    if (Op1 && !Op2 && !Op3 && !Op4)
-      return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-    
-    // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), 
-    //           (MFENCE)>;
-    return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
-  }
+
+  unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+  unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+  // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+  if (!Op1 && !Op2 && !Op3 && Op4)
+    return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+  // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+  if (Op1 && !Op2 && !Op3 && !Op4)
+    return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+  //           (MFENCE)>;
+  return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 }
 
 SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
   EVT T = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   unsigned Reg = 0;
   unsigned size = 0;
   switch(T.getSimpleVT().SimpleTy) {
@@ -7697,7 +8548,7 @@
     Reg = X86::RAX; size = 8;
     break;
   }
-  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
                                     Op.getOperand(2), SDValue());
   SDValue Ops[] = { cpIn.getValue(0),
                     Op.getOperand(1),
@@ -7705,9 +8556,11 @@
                     DAG.getTargetConstant(size, MVT::i8),
                     cpIn.getValue(1) };
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+                                           Ops, 5, T, MMO);
   SDValue cpOut =
-    DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   return cpOut;
 }
 
@@ -7734,10 +8587,10 @@
                                             SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
   EVT DstVT = Op.getValueType();
-  assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && 
+  assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
           Subtarget->hasMMX() && !DisableMMX) &&
          "Unexpected custom BIT_CONVERT");
-  assert((DstVT == MVT::i64 || 
+  assert((DstVT == MVT::i64 ||
           (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
          "Unexpected custom BIT_CONVERT");
   // i64 <=> MMX conversions are Legal.
@@ -7866,8 +8719,8 @@
     if (FIST.getNode() != 0) {
       EVT VT = N->getValueType(0);
       // Return a load from the stack slot.
-      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
-                                    false, false, 0));
+      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+                                    MachinePointerInfo(), false, false, 0));
     }
     return;
   }
@@ -7909,7 +8762,9 @@
                       N->getOperand(1),
                       swapInH.getValue(1) };
     SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-    SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+    MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+    SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+                                             Ops, 3, T, MMO);
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
                                         MVT::i32, Result.getValue(1));
     SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
@@ -7982,7 +8837,6 @@
   case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
   case X86ISD::PINSRB:             return "X86ISD::PINSRB";
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
-  case X86ISD::MMX_PINSRW:         return "X86ISD::MMX_PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
@@ -7990,7 +8844,6 @@
   case X86ISD::FRCP:               return "X86ISD::FRCP";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
-  case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
   case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
   case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
   case X86ISD::FNSTCW16m:          return "X86ISD::FNSTCW16m";
@@ -8027,8 +8880,43 @@
   case X86ISD::AND:                return "X86ISD::AND";
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
   case X86ISD::PTEST:              return "X86ISD::PTEST";
+  case X86ISD::TESTP:              return "X86ISD::TESTP";
+  case X86ISD::PALIGN:             return "X86ISD::PALIGN";
+  case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
+  case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
+  case X86ISD::PSHUFHW_LD:         return "X86ISD::PSHUFHW_LD";
+  case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
+  case X86ISD::PSHUFLW_LD:         return "X86ISD::PSHUFLW_LD";
+  case X86ISD::SHUFPS:             return "X86ISD::SHUFPS";
+  case X86ISD::SHUFPD:             return "X86ISD::SHUFPD";
+  case X86ISD::MOVLHPS:            return "X86ISD::MOVLHPS";
+  case X86ISD::MOVLHPD:            return "X86ISD::MOVLHPD";
+  case X86ISD::MOVHLPS:            return "X86ISD::MOVHLPS";
+  case X86ISD::MOVHLPD:            return "X86ISD::MOVHLPD";
+  case X86ISD::MOVLPS:             return "X86ISD::MOVLPS";
+  case X86ISD::MOVLPD:             return "X86ISD::MOVLPD";
+  case X86ISD::MOVDDUP:            return "X86ISD::MOVDDUP";
+  case X86ISD::MOVSHDUP:           return "X86ISD::MOVSHDUP";
+  case X86ISD::MOVSLDUP:           return "X86ISD::MOVSLDUP";
+  case X86ISD::MOVSHDUP_LD:        return "X86ISD::MOVSHDUP_LD";
+  case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";
+  case X86ISD::MOVSD:              return "X86ISD::MOVSD";
+  case X86ISD::MOVSS:              return "X86ISD::MOVSS";
+  case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
+  case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
+  case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
+  case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
+  case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW";
+  case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD";
+  case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ";
+  case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ";
+  case X86ISD::PUNPCKHBW:          return "X86ISD::PUNPCKHBW";
+  case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD";
+  case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
+  case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
-  case X86ISD::MINGW_ALLOCA:       return "X86ISD::MINGW_ALLOCA";
+  case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
+  case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
   }
 }
 
@@ -8038,6 +8926,7 @@
                                               const Type *Ty) const {
   // X86 supports extremely general addressing modes.
   CodeModel::Model M = getTargetMachine().getCodeModel();
+  Reloc::Model R = getTargetMachine().getRelocationModel();
 
   // X86 allows a sign-extended 32-bit immediate field as a displacement.
   if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
@@ -8057,7 +8946,8 @@
       return false;
 
     // If lower 4G is not available, then we must use rip-relative addressing.
-    if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+    if ((M != CodeModel::Small || R != Reloc::Static) &&
+        Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
       return false;
   }
 
@@ -8587,6 +9477,261 @@
 }
 
 MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+                   MachineInstr *MI,
+                   MachineBasicBlock *MBB) const {
+  // Emit va_arg instruction on X86-64.
+
+  // Operands to this pseudo-instruction:
+  // 0  ) Output        : destination address (reg)
+  // 1-5) Input         : va_list address (addr, i64mem)
+  // 6  ) ArgSize       : Size (in bytes) of vararg type
+  // 7  ) ArgMode       : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+  // 8  ) Align         : Alignment of type
+  // 9  ) EFLAGS (implicit-def)
+
+  assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+  assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+  unsigned DestReg = MI->getOperand(0).getReg();
+  MachineOperand &Base = MI->getOperand(1);
+  MachineOperand &Scale = MI->getOperand(2);
+  MachineOperand &Index = MI->getOperand(3);
+  MachineOperand &Disp = MI->getOperand(4);
+  MachineOperand &Segment = MI->getOperand(5);
+  unsigned ArgSize = MI->getOperand(6).getImm();
+  unsigned ArgMode = MI->getOperand(7).getImm();
+  unsigned Align = MI->getOperand(8).getImm();
+
+  // Memory Reference
+  assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  // Machine Information
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+  const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+  DebugLoc DL = MI->getDebugLoc();
+
+  // struct va_list {
+  //   i32   gp_offset
+  //   i32   fp_offset
+  //   i64   overflow_area (address)
+  //   i64   reg_save_area (address)
+  // }
+  // sizeof(va_list) = 24
+  // alignment(va_list) = 8
+
+  unsigned TotalNumIntRegs = 6;
+  unsigned TotalNumXMMRegs = 8;
+  bool UseGPOffset = (ArgMode == 1);
+  bool UseFPOffset = (ArgMode == 2);
+  unsigned MaxOffset = TotalNumIntRegs * 8 +
+                       (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+  /* Align ArgSize to a multiple of 8 */
+  unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+  bool NeedsAlign = (Align > 8);
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *overflowMBB;
+  MachineBasicBlock *offsetMBB;
+  MachineBasicBlock *endMBB;
+
+  unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
+  unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
+  unsigned OffsetReg = 0;
+
+  if (!UseGPOffset && !UseFPOffset) {
+    // If we only pull from the overflow region, we don't create a branch.
+    // We don't need to alter control flow.
+    OffsetDestReg = 0; // unused
+    OverflowDestReg = DestReg;
+
+    offsetMBB = NULL;
+    overflowMBB = thisMBB;
+    endMBB = thisMBB;
+  } else {
+    // First emit code to check if gp_offset (or fp_offset) is below the bound.
+    // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+    // If not, pull from overflow_area. (branch to overflowMBB)
+    //
+    //       thisMBB
+    //         |     .
+    //         |        .
+    //     offsetMBB   overflowMBB
+    //         |        .
+    //         |     .
+    //        endMBB
+
+    // Registers for the PHI in endMBB
+    OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+    OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+    MachineFunction *MF = MBB->getParent();
+    overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+    MachineFunction::iterator MBBIter = MBB;
+    ++MBBIter;
+
+    // Insert the new basic blocks
+    MF->insert(MBBIter, offsetMBB);
+    MF->insert(MBBIter, overflowMBB);
+    MF->insert(MBBIter, endMBB);
+
+    // Transfer the remainder of MBB and its successor edges to endMBB.
+    endMBB->splice(endMBB->begin(), thisMBB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    thisMBB->end());
+    endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+    // Make offsetMBB and overflowMBB successors of thisMBB
+    thisMBB->addSuccessor(offsetMBB);
+    thisMBB->addSuccessor(overflowMBB);
+
+    // endMBB is a successor of both offsetMBB and overflowMBB
+    offsetMBB->addSuccessor(endMBB);
+    overflowMBB->addSuccessor(endMBB);
+
+    // Load the offset value into a register
+    OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Check if there is enough room left to pull this argument.
+    BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+      .addReg(OffsetReg)
+      .addImm(MaxOffset + 8 - ArgSizeA8);
+
+    // Branch to "overflowMBB" if offset >= max
+    // Fall through to "offsetMBB" otherwise
+    BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+      .addMBB(overflowMBB);
+  }
+
+  // In offsetMBB, emit code to use the reg_save_area.
+  if (offsetMBB) {
+    assert(OffsetReg != 0);
+
+    // Read the reg_save_area address.
+    unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, 16)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Zero-extend the offset
+    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+        .addImm(0)
+        .addReg(OffsetReg)
+        .addImm(X86::sub_32bit);
+
+    // Add the offset to the reg_save_area to get the final address.
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+      .addReg(OffsetReg64)
+      .addReg(RegSaveReg);
+
+    // Compute the offset for the next argument
+    unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+      .addReg(OffsetReg)
+      .addImm(UseFPOffset ? 16 : 8);
+
+    // Store it back into the va_list.
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .addReg(NextOffsetReg)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Jump to endMBB
+    BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+      .addMBB(endMBB);
+  }
+
+  //
+  // Emit code to use overflow area
+  //
+
+  // Load the overflow_area address into a register.
+  unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we need to align it, do so. Otherwise, just copy the address
+  // to OverflowDestReg.
+  if (NeedsAlign) {
+    // Align the overflow address
+    assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+    unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+    // aligned_addr = (addr + (align-1)) & ~(align-1)
+    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+      .addReg(OverflowAddrReg)
+      .addImm(Align-1);
+
+    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+      .addReg(TmpReg)
+      .addImm(~(uint64_t)(Align-1));
+  } else {
+    BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+      .addReg(OverflowAddrReg);
+  }
+
+  // Compute the next overflow address after this argument.
+  // (the overflow address should be kept 8-byte aligned)
+  unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+    .addReg(OverflowDestReg)
+    .addImm(ArgSizeA8);
+
+  // Store the new overflow address.
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .addReg(NextAddrReg)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we branched, emit the PHI to the front of endMBB.
+  if (offsetMBB) {
+    BuildMI(*endMBB, endMBB->begin(), DL,
+            TII->get(X86::PHI), DestReg)
+      .addReg(OffsetDestReg).addMBB(offsetMBB)
+      .addReg(OverflowDestReg).addMBB(overflowMBB);
+  }
+
+  // Erase the pseudo instruction
+  MI->eraseFromParent();
+
+  return endMBB;
+}
+
+MachineBasicBlock *
 X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
                                                  MachineInstr *MI,
                                                  MachineBasicBlock *MBB) const {
@@ -8641,8 +9786,8 @@
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
     MachineMemOperand *MMO =
       F->getMachineMemOperand(
-        PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
-        MachineMemOperand::MOStore, Offset,
+          MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+        MachineMemOperand::MOStore,
         /*Size=*/16, /*Align=*/16);
     BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
       .addFrameIndex(RegSaveFrameIndex)
@@ -8734,7 +9879,7 @@
 }
 
 MachineBasicBlock *
-X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
                                           MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
@@ -8744,12 +9889,16 @@
   // FIXME: The code should be tweaked as soon as we'll try to do codegen for
   // mingw-w64.
 
+  const char *StackProbeSymbol =
+      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
   BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
-    .addExternalSymbol("_alloca")
+    .addExternalSymbol(StackProbeSymbol)
     .addReg(X86::EAX, RegState::Implicit)
     .addReg(X86::ESP, RegState::Implicit)
     .addReg(X86::EAX, RegState::Define | RegState::Implicit)
-    .addReg(X86::ESP, RegState::Define | RegState::Implicit);
+    .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+    .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
 
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
@@ -8762,19 +9911,20 @@
   // our load from the relocation, sticking it in either RDI (x86-64)
   // or EAX and doing an indirect call.  The return value will then
   // be in the normal return register.
-  const X86InstrInfo *TII 
+  const X86InstrInfo *TII
     = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *F = BB->getParent();
-  
+
+  assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
   assert(MI->getOperand(3).isGlobal() && "This should be a global");
-  
+
   if (Subtarget->is64Bit()) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV64rm), X86::RDI)
     .addReg(X86::RIP)
     .addImm(0).addReg(0)
-    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, 
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                       MI->getOperand(3).getTargetFlags())
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
@@ -8784,7 +9934,7 @@
                                       TII->get(X86::MOV32rm), X86::EAX)
     .addReg(0)
     .addImm(0).addReg(0)
-    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, 
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                       MI->getOperand(3).getTargetFlags())
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
@@ -8794,13 +9944,13 @@
                                       TII->get(X86::MOV32rm), X86::EAX)
     .addReg(TII->getGlobalBaseReg(F))
     .addImm(0).addReg(0)
-    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, 
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                       MI->getOperand(3).getTargetFlags())
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
     addDirectMem(MIB, X86::EAX);
   }
-  
+
   MI->eraseFromParent(); // The pseudo instruction is gone now.
   return BB;
 }
@@ -8810,13 +9960,12 @@
                                                MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
-  case X86::MINGW_ALLOCA:
-    return EmitLoweredMingwAlloca(MI, BB);
+  case X86::WIN_ALLOCA:
+    return EmitLoweredWinAlloca(MI, BB);
   case X86::TLSCall_32:
   case X86::TLSCall_64:
     return EmitLoweredTLSCall(MI, BB);
   case X86::CMOV_GR8:
-  case X86::CMOV_V1I64:
   case X86::CMOV_FR32:
   case X86::CMOV_FR64:
   case X86::CMOV_V4F32:
@@ -9090,6 +10239,9 @@
                                                false);
   case X86::VASTART_SAVE_XMM_REGS:
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+  case X86::VAARG_64:
+    return EmitVAARG64WithCustomInserter(MI, BB);
   }
 }
 
@@ -9134,6 +10286,16 @@
   }
 }
 
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+  if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+    return Op.getValueType().getScalarType().getSizeInBits();
+
+  // Fallback case.
+  return 1;
+}
+
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
 bool X86TargetLowering::isGAPlusOffset(SDNode *N,
@@ -9157,21 +10319,20 @@
                                      const TargetLowering &TLI) {
   DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
-  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
 
   if (VT.getSizeInBits() != 128)
     return SDValue();
 
   SmallVector<SDValue, 16> Elts;
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
-    Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
-  
+    Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
   return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
 }
 
-/// PerformShuffleCombine - Detect vector gather/scatter index generation
-/// and convert it from being a bunch of shuffles and extracts to a simple
-/// store and scalar loads to extract the elements.
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
 static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
                                                 const TargetLowering &TLI) {
   SDValue InputVector = N->getOperand(0);
@@ -9221,8 +10382,8 @@
 
   // Store the value to a temporary stack slot.
   SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
-                            0, false, false, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
 
   // Replace each use (extract) with a load of the appropriate element.
   for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9237,11 +10398,12 @@
     SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
 
     SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
-                                     OffsetVal, StackPtr);
+                                     StackPtr, OffsetVal);
 
     // Load the scalar.
     SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
-                                     ScalarAddr, NULL, 0, false, false, 0);
+                                     ScalarAddr, MachinePointerInfo(),
+                                     false, false, 0);
 
     // Replace the exact with the load.
     DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -9944,9 +11106,8 @@
     // pair instead.
     if (Subtarget->is64Bit() || F64IsLegal) {
       EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
-      SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
-                                  Ld->getBasePtr(), Ld->getSrcValue(),
-                                  Ld->getSrcValueOffset(), Ld->isVolatile(),
+      SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+                                  Ld->getPointerInfo(), Ld->isVolatile(),
                                   Ld->isNonTemporal(), Ld->getAlignment());
       SDValue NewChain = NewLd.getValue(1);
       if (TokenFactorIndex != -1) {
@@ -9955,7 +11116,7 @@
                                Ops.size());
       }
       return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
-                          St->getSrcValue(), St->getSrcValueOffset(),
+                          St->getPointerInfo(),
                           St->isVolatile(), St->isNonTemporal(),
                           St->getAlignment());
     }
@@ -9966,11 +11127,11 @@
                                  DAG.getConstant(4, MVT::i32));
 
     SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
-                               Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                               Ld->getPointerInfo(),
                                Ld->isVolatile(), Ld->isNonTemporal(),
                                Ld->getAlignment());
     SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
-                               Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+                               Ld->getPointerInfo().getWithOffset(4),
                                Ld->isVolatile(), Ld->isNonTemporal(),
                                MinAlign(Ld->getAlignment(), 4));
 
@@ -9987,12 +11148,11 @@
                          DAG.getConstant(4, MVT::i32));
 
     SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
-                                St->getSrcValue(), St->getSrcValueOffset(),
+                                St->getPointerInfo(),
                                 St->isVolatile(), St->isNonTemporal(),
                                 St->getAlignment());
     SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
-                                St->getSrcValue(),
-                                St->getSrcValueOffset() + 4,
+                                St->getPointerInfo().getWithOffset(4),
                                 St->isVolatile(),
                                 St->isNonTemporal(),
                                 MinAlign(St->getAlignment(), 4));
@@ -10092,7 +11252,6 @@
   SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
   default: break;
-  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
   case ISD::EXTRACT_VECTOR_ELT:
                         return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
@@ -10109,6 +11268,29 @@
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
   case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
+  case X86ISD::SHUFPS:      // Handle all target specific shuffles
+  case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLHPS:
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
   }
 
   return SDValue();
@@ -10143,14 +11325,6 @@
   }
 }
 
-static bool MayFoldLoad(SDValue Op) {
-  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
-}
-
-static bool MayFoldIntoStore(SDValue Op) {
-  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
-}
-
 /// IsDesirableToPromoteOp - This method query the target whether it is
 /// beneficial for dag combiner to promote the specified node. If true, it
 /// should return the desired promotion type by reference.
@@ -10356,6 +11530,34 @@
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value,
+/// where: -1 = invalid match, and 0 = so-so match to 3 = good match.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+int X86TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  int weight = -1;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return 0;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    return TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'I':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+      if (C->getZExtValue() <= 31)
+        weight = 3;
+    }
+    break;
+  // etc.
+  }
+  return weight;
+}
+
 /// LowerXConstraint - try to replace an X constraint, which matches anything,
 /// with another that has more specific requirements based on the type of the
 /// corresponding operand.

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86ISelLowering.h Tue Oct 26 19:48:03 2010
@@ -57,35 +57,6 @@
       /// corresponds to X86::PSRLDQ.
       FSRL,
 
-      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
-      /// integer source in memory and FP reg result.  This corresponds to the
-      /// X86::FILD*m instructions. It has three inputs (token chain, address,
-      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
-      /// also produces a flag).
-      FILD,
-      FILD_FLAG,
-
-      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
-      /// integer destination in memory and a FP reg source.  This corresponds
-      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
-      /// has two inputs (token chain and address) and two outputs (int value
-      /// and token chain).
-      FP_TO_INT16_IN_MEM,
-      FP_TO_INT32_IN_MEM,
-      FP_TO_INT64_IN_MEM,
-
-      /// FLD - This instruction implements an extending load to FP stack slots.
-      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
-      /// operand, ptr to load from, and a ValueType node indicating the type
-      /// to load to.
-      FLD,
-
-      /// FST - This instruction implements a truncating store to FP stack
-      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
-      /// chain operand, value to store, address, and a ValueType to store it
-      /// as.
-      FST,
-
       /// CALL - These operations represent an abstract X86 call
       /// instruction, which includes a bunch of information.  In particular the
       /// operands of these node are:
@@ -105,7 +76,7 @@
       ///
       CALL,
 
-      /// RDTSC_DAG - This operation implements the lowering for 
+      /// RDTSC_DAG - This operation implements the lowering for
       /// readcyclecounter
       RDTSC_DAG,
 
@@ -157,11 +128,15 @@
       /// relative displacements.
       WrapperRIP,
 
-      /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
-      /// Can be used to move a vector value from a MMX register to a XMM
-      /// register.
+      /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+      /// of an XMM vector, with the high word zero filled.
       MOVQ2DQ,
 
+      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+      /// to an MMX vector.  If you think this is too close to the previous
+      /// mnemonic, so do I; blame Intel.
+      MOVDQ2Q,
+
       /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRB.
       PEXTRB,
@@ -196,17 +171,14 @@
 
       // TLSADDR - Thread Local Storage.
       TLSADDR,
-      
+
       // TLSCALL - Thread Local Storage.  When calling to an OS provided
       // thunk at the address from an earlier relocation.
       TLSCALL,
 
-      // SegmentBaseAddress - The address segment:0
-      SegmentBaseAddress,
-
       // EH_RETURN - Exception Handling helpers.
       EH_RETURN,
-      
+
       /// TC_RETURN - Tail call return.
       ///   operand #0 chain
       ///   operand #1 callee (register or absolute)
@@ -214,26 +186,16 @@
       ///   operand #3 optional in flag
       TC_RETURN,
 
-      // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
-      LCMPXCHG_DAG,
-      LCMPXCHG8_DAG,
-
-      // FNSTCW16m - Store FP control world into i16 memory.
-      FNSTCW16m,
-
       // VZEXT_MOVL - Vector move low and zero extend.
       VZEXT_MOVL,
 
-      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
-      VZEXT_LOAD,
-
       // VSHL, VSRL - Vector logical left / right shift.
       VSHL, VSRL,
 
       // CMPPD, CMPPS - Vector double/float comparison.
       // CMPPD, CMPPS - Vector double/float comparison.
       CMPPD, CMPPS,
-      
+
       // PCMP* - Vector integer comparisons.
       PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
       PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
@@ -244,20 +206,58 @@
 
       // MUL_IMM - X86 specific multiply by immediate.
       MUL_IMM,
-      
+
       // PTEST - Vector bitwise comparisons
       PTEST,
 
+      // TESTP - Vector packed fp sign bitwise comparisons
+      TESTP,
+
+      // Several flavors of instructions with vector shuffle behaviors.
+      PALIGN,
+      PSHUFD,
+      PSHUFHW,
+      PSHUFLW,
+      PSHUFHW_LD,
+      PSHUFLW_LD,
+      SHUFPD,
+      SHUFPS,
+      MOVDDUP,
+      MOVSHDUP,
+      MOVSLDUP,
+      MOVSHDUP_LD,
+      MOVSLDUP_LD,
+      MOVLHPS,
+      MOVLHPD,
+      MOVHLPS,
+      MOVHLPD,
+      MOVLPS,
+      MOVLPD,
+      MOVSD,
+      MOVSS,
+      UNPCKLPS,
+      UNPCKLPD,
+      UNPCKHPS,
+      UNPCKHPD,
+      PUNPCKLBW,
+      PUNPCKLWD,
+      PUNPCKLDQ,
+      PUNPCKLQDQ,
+      PUNPCKHBW,
+      PUNPCKHWD,
+      PUNPCKHDQ,
+      PUNPCKHQDQ,
+
       // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
       // according to %al. An operator is needed so that this can be expanded
       // with control flow.
       VASTART_SAVE_XMM_REGS,
 
-      // MINGW_ALLOCA - MingW's __alloca call to do stack probing.
-      MINGW_ALLOCA,
+      // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+      WIN_ALLOCA,
 
-      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 
-      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 
+      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
       // Atomic 64-bit binary operations.
       ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       ATOMSUB64_DAG,
@@ -266,12 +266,55 @@
       ATOMAND64_DAG,
       ATOMNAND64_DAG,
       ATOMSWAP64_DAG,
-      
+
       // Memory barrier
       MEMBARRIER,
       MFENCE,
       SFENCE,
-      LFENCE
+      LFENCE,
+
+      // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+      LCMPXCHG_DAG,
+      LCMPXCHG8_DAG,
+
+      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+      VZEXT_LOAD,
+
+      // FNSTCW16m - Store FP control world into i16 memory.
+      FNSTCW16m,
+
+      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+      /// integer destination in memory and a FP reg source.  This corresponds
+      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+      /// has two inputs (token chain and address) and two outputs (int value
+      /// and token chain).
+      FP_TO_INT16_IN_MEM,
+      FP_TO_INT32_IN_MEM,
+      FP_TO_INT64_IN_MEM,
+
+      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+      /// integer source in memory and FP reg result.  This corresponds to the
+      /// X86::FILD*m instructions. It has three inputs (token chain, address,
+      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+      /// also produces a flag).
+      FILD,
+      FILD_FLAG,
+
+      /// FLD - This instruction implements an extending load to FP stack slots.
+      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+      /// operand, ptr to load from, and a ValueType node indicating the type
+      /// to load to.
+      FLD,
+
+      /// FST - This instruction implements a truncating store to FP stack
+      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+      /// chain operand, value to store, address, and a ValueType to store it
+      /// as.
+      FST,
+
+      /// VAARG_64 - This instruction grabs the address of the next argument
+      /// from a va_list. (reads and modifies the va_list in memory)
+      VAARG_64
 
       // WARNING: Do not add anything in the end unless you want the node to
       // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -389,14 +432,14 @@
 
     /// getPICBaseSymbol - Return the X86-32 PIC base.
     MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const;
-    
+
     virtual unsigned getJumpTableEncoding() const;
 
     virtual const MCExpr *
     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
                               const MachineBasicBlock *MBB, unsigned uid,
                               MCContext &Ctx) const;
-    
+
     /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
     /// jumptable.
     virtual SDValue getPICJumpTableRelocBase(SDValue Table,
@@ -404,7 +447,7 @@
     virtual const MCExpr *
     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                  unsigned JTI, MCContext &Ctx) const;
-    
+
     /// getStackPtrReg - Return the stack pointer register we are using: either
     /// ESP or RSP.
     unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -448,7 +491,7 @@
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                                     SelectionDAG &DAG) const;
 
-    
+
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     /// isTypeDesirableForOp - Return true if the target has native support for
@@ -467,7 +510,7 @@
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
 
- 
+
     /// getTargetNodeName - This method returns the name of a target specific
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
@@ -475,26 +518,37 @@
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
-    /// computeMaskedBitsForTargetNode - Determine which of the bits specified 
-    /// in Mask are known to be either zero or one and return them in the 
+    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+    /// in Mask are known to be either zero or one and return them in the
     /// KnownZero/KnownOne bitsets.
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
                                                 const APInt &Mask,
-                                                APInt &KnownZero, 
+                                                APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
+    // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+    // operation that are sign bits.
+    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                     unsigned Depth) const;
+
     virtual bool
     isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
-    
+
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     virtual bool ExpandInlineAsm(CallInst *CI) const;
-    
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
-     
-    std::vector<unsigned> 
+
+    /// Examine constraint string and operand type and determine a weight value,
+    /// where: -1 = invalid match, and 0 = so-so match to 3 = good match.
+    /// The operand object must already have been set up with the operand type.
+    virtual int getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::vector<unsigned>
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                         EVT VT) const;
 
@@ -508,15 +562,15 @@
                                               char ConstraintLetter,
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
-    
+
     /// getRegForInlineAsmConstraint - Given a physical register constraint
     /// (e.g. {edx}), return the register number and the register class for the
     /// register.  This should only be used for C_Register constraints.  On
     /// error, this returns a register number of 0.
-    std::pair<unsigned, const TargetRegisterClass*> 
+    std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
-    
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
@@ -571,7 +625,7 @@
       // shrink long double fp constant since fldt is very slow.
       return !X86ScalarSSEf64 || VT == MVT::f80;
     }
-    
+
     const X86Subtarget* getSubtarget() const {
       return Subtarget;
     }
@@ -612,8 +666,8 @@
 
     /// X86StackPtr - X86 physical register used as stack ptr.
     unsigned X86StackPtr;
-   
-    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 
+
+    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     /// floating point ops.
     /// When SSE is available, use it for f32 operations.
     /// When SSE2 is available, use it for f64 operations.
@@ -731,6 +785,9 @@
     SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
 
+    // Utility functions to help LowerVECTOR_SHUFFLE
+    SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
@@ -792,7 +849,7 @@
                                                     unsigned immOpcL,
                                                     unsigned immOpcH,
                                                     bool invSrc = false) const;
-    
+
     /// Utility function to emit atomic min and max.  It takes the min/max
     /// instruction to expand, the associated basic block, and the associated
     /// cmov opcode for moving the min or max value.
@@ -800,6 +857,11 @@
                                                           MachineBasicBlock *BB,
                                                         unsigned cmovOpc) const;
 
+    // Utility function to emit the low-level va_arg code for X86-64.
+    MachineBasicBlock *EmitVAARG64WithCustomInserter(
+                       MachineInstr *MI,
+                       MachineBasicBlock *MBB) const;
+
     /// Utility function to emit the xmm reg save portion of va_start.
     MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
                                                    MachineInstr *BInstr,
@@ -808,9 +870,9 @@
     MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
                                          MachineBasicBlock *BB) const;
 
-    MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
+    MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
                                               MachineBasicBlock *BB) const;
-    
+
     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 

Removed: llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Instr64bit.td (removed)
@@ -1,2241 +0,0 @@
-//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file describes the X86-64 instruction set, defining the instructions,
-// and properties of the instructions which are needed for code generation,
-// machine code emission, and analysis.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-//
-
-// 64-bits but only 32 bits are significant.
-def i64i32imm  : Operand<i64> {
-  let ParserMatchClass = ImmSExti64i32AsmOperand;
-}
-
-// 64-bits but only 32 bits are significant, and those bits are treated as being
-// pc relative.
-def i64i32imm_pcrel : Operand<i64> {
-  let PrintMethod = "print_pcrel_imm";
-  let ParserMatchClass = X86AbsMemAsmOperand;
-}
-
-
-// 64-bits but only 8 bits are significant.
-def i64i8imm   : Operand<i64> {
-  let ParserMatchClass = ImmSExti64i8AsmOperand;
-}
-
-def lea64_32mem : Operand<i32> {
-  let PrintMethod = "printi32mem";
-  let AsmOperandLowerMethod = "lower_lea64_32mem";
-  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
-}
-
-
-// Special i64mem for addresses of load folding tail calls. These are not
-// allowed to use callee-saved registers since they must be scheduled
-// after callee-saved register are popped.
-def i64mem_TC : Operand<i64> {
-  let PrintMethod = "printi64mem";
-  let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// Complex Pattern Definitions.
-//
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
-                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
-                         X86WrapperRIP], []>;
-
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
-                               [tglobaltlsaddr], []>;
-                               
-//===----------------------------------------------------------------------===//
-// Pattern fragments.
-//
-
-def i64immSExt8  : PatLeaf<(i64 immSext8)>;
-
-def GetLo32XForm : SDNodeXForm<imm, [{
-  // Transformation function: get the low 32 bits.
-  return getI32Imm((unsigned)N->getZExtValue());
-}]>;
-
-def i64immSExt32  : PatLeaf<(i64 imm), [{
-  // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-  // sign extended field.
-  return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
-}]>;
-
-
-def i64immZExt32  : PatLeaf<(i64 imm), [{
-  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-  // unsignedsign extended field.
-  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
-
-def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
-def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi64i1  : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
-def zextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
-def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
-def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
-def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
-
-//===----------------------------------------------------------------------===//
-// Instruction list...
-//
-
-// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [RSP, EFLAGS], Uses = [RSP] in {
-def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-                           "#ADJCALLSTACKDOWN",
-                           [(X86callseq_start timm:$amt)]>,
-                          Requires<[In64BitMode]>;
-def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
-                           "#ADJCALLSTACKUP",
-                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
-                          Requires<[In64BitMode]>;
-}
-
-// Interrupt Instructions
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>;
-
-//===----------------------------------------------------------------------===//
-//  Call Instructions...
-//
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. RSP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [RSP] in {
-      
-    // NOTE: this pattern doesn't match "X86call imm", because we do not know
-    // that the offset between an arbitrary immediate and the call will fit in
-    // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
-                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                          "call{q}\t$dst", []>,
-                        Requires<[In64BitMode, NotWin64]>;
-    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
-                        Requires<[NotWin64]>;
-    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
-                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
-                        Requires<[NotWin64]>;
-                        
-    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
-                         "lcall{q}\t{*}$dst", []>;
-  }
-
-  // FIXME: We need to teach codegen about single list of call-clobbered 
-  // registers.
-let isCall = 1, isCodeGenOnly = 1 in
-  // All calls clobber the non-callee saved registers. RSP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
-      Uses = [RSP] in {
-    def WINCALL64pcrel32 : I<0xE8, RawFrm,
-                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                             "call\t$dst", []>,
-                           Requires<[IsWin64]>;
-    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                             "call\t{*}$dst",
-                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
-    def WINCALL64m       : I<0xFF, MRM2m, (outs), 
-                             (ins i64mem:$dst, variable_ops), "call\t{*}$dst",
-                             [(X86call (loadi64 addr:$dst))]>, 
-                           Requires<[IsWin64]>;
-  }
-
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
-  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [RSP] in {
-  def TCRETURNdi64 : I<0, Pseudo, (outs),
-                         (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
-                       "#TC_RETURN $dst $offset", []>;
-  def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
-                                           variable_ops),
-                       "#TC_RETURN $dst $offset", []>;
-  let mayLoad = 1 in
-  def TCRETURNmi64 : I<0, Pseudo, (outs), 
-                       (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
-                       "#TC_RETURN $dst $offset", []>;
-
-  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
-                                      (ins i64i32imm_pcrel:$dst, variable_ops),
-                   "jmp\t$dst  # TAILCALL", []>;
-  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
-
-  let mayLoad = 1 in
-  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
-}
-
-// Branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-  def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), 
-                       "jmp{q}\t$dst", []>;
-  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
-                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
-  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
-                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
-  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
-                     "ret\t#eh_return, addr: $addr",
-                     [(X86ehret GR64:$addr)]>;
-
-}
-
-//===----------------------------------------------------------------------===//
-//  Miscellaneous Instructions...
-//
-
-def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                    "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                    "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-
-let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
-def LEAVE64  : I<0xC9, RawFrm,
-                 (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
-def POP64r   : I<0x58, AddRegFrm,
-                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
-}
-let mayStore = 1 in {
-def PUSH64r  : I<0x50, AddRegFrm,
-                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
-}
-}
-
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
-                     "push{q}\t$imm", []>;
-def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
-                      "push{q}\t$imm", []>;
-def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
-                      "push{q}\t$imm", []>;
-}
-
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
-def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
-               Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
-def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
-                 Requires<[In64BitMode]>;
-
-def LEA64_32r : I<0x8D, MRMSrcMem,
-                  (outs GR32:$dst), (ins lea64_32mem:$src),
-                  "lea{l}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
-
-let isReMaterializable = 1 in
-def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                  "lea{q}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR64:$dst, lea64addr:$src)]>;
-
-let Constraints = "$src = $dst" in
-def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
-                  "bswap{q}\t$dst", 
-                  [(set GR64:$dst, (bswap GR64:$src))]>, TB;
-
-// Bit scan instructions.
-let Defs = [EFLAGS] in {
-def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                  "bsf{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
-def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                  "bsf{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
-
-def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                  "bsr{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
-def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                  "bsr{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
-} // Defs = [EFLAGS]
-
-// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
-                   [(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
-                   [(X86rep_stos i64)]>, REP;
-
-let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
-def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
-def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
-
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
-
-// Fast system-call instructions
-def SYSEXIT64 : RI<0x35, RawFrm,
-                   (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
-
-//===----------------------------------------------------------------------===//
-//  Move Instructions...
-//
-
-let neverHasSideEffects = 1 in
-def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1  in {
-def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
-                    "movabs{q}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, imm:$src)]>;
-def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
-                      "mov{q}\t{$src, $dst|$dst, $src}",
-                      [(set GR64:$dst, i64immSExt32:$src)]>;
-}
-
-// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
-// movabsq.
-let isAsmParserOnly = 1 in {
-def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
-                    "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let isCodeGenOnly = 1 in {
-def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                     "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}",
-                 [(set GR64:$dst, (load addr:$src))]>;
-
-def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}",
-                 [(store GR64:$src, addr:$dst)]>;
-def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                      "mov{q}\t{$src, $dst|$dst, $src}",
-                      [(store i64immSExt32:$src, addr:$dst)]>;
-
-/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
-    canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}",
-                []>;
-
-let mayStore = 1 in
-def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}",
-                []>;
-}
-
-def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
-                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
-                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
-                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
-                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-
-// Moves to and from segment registers
-def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-// Moves to and from debug registers
-def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Sign/Zero extenders
-
-// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
-// operand, which makes it a rare instruction with an 8-bit register
-// operand that can never access an h register. If support for h registers
-// were generalized, this would require a special register class.
-def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR8:$src))]>, TB;
-def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
-def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR16:$src))]>, TB;
-def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
-def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
-                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR32:$src))]>;
-def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
-
-// movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Use movzbl instead of movzbq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                   "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
-def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
-// Use movzwl instead of movzwq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                   "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
-def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
-
-// There's no movzlq instruction, but movl can be used for this purpose, using
-// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
-// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
-// zero-extension, however this isn't possible when the 32-bit value is
-// defined by a truncate or is copied from something where the high bits aren't
-// necessarily all zero. In such cases, we fall back to these explicit zext
-// instructions.
-def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                    "", [(set GR64:$dst, (zext GR32:$src))]>;
-def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. And x86's cmov doesn't do anything if the
-// condition is false. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-def def32 : PatLeaf<(i32 GR32:$src), [{
-  return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg &&
-         N->getOpcode() != X86ISD::CMOV;
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)),
-          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-let neverHasSideEffects = 1 in {
-  let Defs = [RAX], Uses = [EAX] in
-  def CDQE : RI<0x98, RawFrm, (outs), (ins),
-               "{cltq|cdqe}", []>;     // RAX = signext(EAX)
-
-  let Defs = [RAX,RDX], Uses = [RAX] in
-  def CQO  : RI<0x99, RawFrm, (outs), (ins),
-                "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
-}
-
-//===----------------------------------------------------------------------===//
-//  Arithmetic Instructions...
-//
-
-let Defs = [EFLAGS] in {
-
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
-                     "add{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isConvertibleToThreeAddress = 1 in {
-let isCommutable = 1 in
-// Register-Register Addition
-def ADD64rr    : RI<0x01, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "add{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86add_flag GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def ADD64rr_alt  : RI<0x03, MRMSrcReg, (outs GR64:$dst), 
-                       (ins GR64:$src1, GR64:$src2),
-                       "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Integer Addition
-def ADD64ri8  : RIi8<0x83, MRM0r, (outs GR64:$dst), 
-                     (ins GR64:$src1, i64i8imm:$src2),
-                     "add{q}\t{$src2, $dst|$dst, $src2}",
-                     [(set GR64:$dst, EFLAGS,
-                           (X86add_flag GR64:$src1, i64immSExt8:$src2))]>;
-def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), 
-                      (ins GR64:$src1, i64i32imm:$src2),
-                      "add{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86add_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isConvertibleToThreeAddress
-
-// Register-Memory Addition
-def ADD64rm     : RI<0x03, MRMSrcMem, (outs GR64:$dst), 
-                     (ins GR64:$src1, i64mem:$src2),
-                     "add{q}\t{$src2, $dst|$dst, $src2}",
-                     [(set GR64:$dst, EFLAGS,
-                           (X86add_flag GR64:$src1, (load addr:$src2)))]>;
-
-} // Constraints = "$src1 = $dst"
-
-// Memory-Register Addition
-def ADD64mr  : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                  "add{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (add (load addr:$dst), GR64:$src2), addr:$dst),
-                   (implicit EFLAGS)]>;
-def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
-                    "add{q}\t{$src2, $dst|$dst, $src2}",
-                [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst),
-                 (implicit EFLAGS)]>;
-def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
-                      "add{q}\t{$src2, $dst|$dst, $src2}",
-               [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst),
-                (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
-                     "adc{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def ADC64rr  : RI<0x11, MRMDestReg, (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2),
-                  "adc{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                    "adc{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC64rm  : RI<0x13, MRMSrcMem , (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2),
-                  "adc{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
-
-def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "adc{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
-def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), 
-                      (ins GR64:$src1, i64i32imm:$src2),
-                      "adc{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def ADC64mr  : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                  "adc{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
-                    "adc{q}\t{$src2, $dst|$dst, $src2}",
-                 [(store (adde (load addr:$dst), i64immSExt8:$src2), 
-                  addr:$dst)]>;
-def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
-                      "adc{q}\t{$src2, $dst|$dst, $src2}",
-                 [(store (adde (load addr:$dst), i64immSExt32:$src2), 
-                  addr:$dst)]>;
-} // Uses = [EFLAGS]
-
-let Constraints = "$src1 = $dst" in {
-// Register-Register Subtraction
-def SUB64rr  : RI<0x29, MRMDestReg, (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2),
-                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86sub_flag GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "sub{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB64rm  : RI<0x2B, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2),
-                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS, 
-                        (X86sub_flag GR64:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
-                                 (ins GR64:$src1, i64i8imm:$src2),
-                    "sub{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>;
-def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
-                                   (ins GR64:$src1, i64i32imm:$src2),
-                      "sub{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
-                     "sub{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Memory-Register Subtraction
-def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
-                   (implicit EFLAGS)]>;
-
-// Memory-Integer Subtraction
-def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
-                    "sub{q}\t{$src2, $dst|$dst, $src2}",
-                    [(store (sub (load addr:$dst), i64immSExt8:$src2),
-                            addr:$dst),
-                     (implicit EFLAGS)]>;
-def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
-                      "sub{q}\t{$src2, $dst|$dst, $src2}",
-                      [(store (sub (load addr:$dst), i64immSExt32:$src2),
-                              addr:$dst),
-                       (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-def SBB64rr    : RI<0x19, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-                     
-def SBB64rm  : RI<0x1B, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2),
-                  "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
-
-def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
-def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), 
-                      (ins GR64:$src1, i64i32imm:$src2),
-                      "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
-                     "sbb{q}\t{$src, %rax|%rax, $src}", []>;
-
-def SBB64mr  : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                  "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
-                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
-               [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
-def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2), 
-                      "sbb{q}\t{$src2, $dst|$dst, $src2}",
-              [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-// Unsigned multiplication
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in {
-def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
-                "mul{q}\t$src", []>;         // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
-
-// Signed multiplication
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
-                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
-}
-
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-// Register-Register Signed Integer Multiplication
-def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
-                                   (ins GR64:$src1, GR64:$src2),
-                  "imul{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
-
-// Register-Memory Signed Integer Multiplication
-def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
-                                   (ins GR64:$src1, i64mem:$src2),
-                  "imul{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Suprisingly enough, these are not two address instructions!
-
-// Register-Integer Signed Integer Multiplication
-def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
-                      (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
-def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
-                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       [(set GR64:$dst, EFLAGS,
-                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
-
-// Memory-Integer Signed Integer Multiplication
-def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
-                      (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
-                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1),
-                                          i64immSExt8:$src2))]>;
-def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
-                        (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
-                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                        [(set GR64:$dst, EFLAGS,
-                              (X86smul_flag (load addr:$src1),
-                                            i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Unsigned division / remainder
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in {
-// RDX:RAX/r64 = RAX,RDX
-def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
-                "div{q}\t$src", []>;
-// Signed division / remainder
-// RDX:RAX/r64 = RAX,RDX
-def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
-                "idiv{q}\t$src", []>;
-let mayLoad = 1 in {
-// RDX:RAX/[mem64] = RAX,RDX
-def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
-                "div{q}\t$src", []>;
-// RDX:RAX/[mem64] = RAX,RDX
-def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
-                "idiv{q}\t$src", []>;
-}
-}
-
-// Unary instructions
-let Defs = [EFLAGS], CodeSize = 2 in {
-let Constraints = "$src = $dst" in
-def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
-                [(set GR64:$dst, (ineg GR64:$src)),
-                 (implicit EFLAGS)]>;
-def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
-                [(store (ineg (loadi64 addr:$dst)), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
-def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
-                [(store (add (loadi64 addr:$dst), 1), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
-def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
-                [(store (add (loadi64 addr:$dst), -1), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
-// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), 
-                  "inc{w}\t$dst",
-                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
-                OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), 
-                  "inc{l}\t$dst",
-                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
-                Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), 
-                  "dec{w}\t$dst",
-                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
-                OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), 
-                  "dec{l}\t$dst",
-                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
-                Requires<[In64BitMode]>;
-} // Constraints = "$src = $dst", isConvertibleToThreeAddress
-
-// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
-// how to unfold them.
-def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
-                  [(store (add (loadi16 addr:$dst), 1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                OpSize, Requires<[In64BitMode]>;
-def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
-                  [(store (add (loadi32 addr:$dst), 1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                Requires<[In64BitMode]>;
-def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
-                  [(store (add (loadi16 addr:$dst), -1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                OpSize, Requires<[In64BitMode]>;
-def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
-                  [(store (add (loadi32 addr:$dst), -1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                Requires<[In64BitMode]>;
-} // Defs = [EFLAGS], CodeSize
-
-
-let Defs = [EFLAGS] in {
-// Shift instructions
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                  "shl{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (shl GR64:$src1, CL))]>;
-let isConvertibleToThreeAddress = 1 in   // Can transform into LEA.
-def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i8imm:$src2),
-                    "shl{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shl{q}\t$dst", []>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
-                  "shl{q}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q}\t$dst",
-                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
-                  "shr{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (srl GR64:$src1, CL))]>;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                  "shr{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
-def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shr{q}\t$dst",
-                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
-                  "shr{q}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q}\t$dst",
-                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
-                 "sar{q}\t{%cl, $dst|$dst, %CL}",
-                 [(set GR64:$dst, (sra GR64:$src1, CL))]>;
-def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
-                    (ins GR64:$src1, i8imm:$src2),
-                    "sar{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
-def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
-                 "sar{q}\t$dst",
-                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src = $dst"
-
-let Uses = [CL] in
-def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
-                 "sar{q}\t{%cl, $dst|$dst, %CL}",
-                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "sar{q}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
-                  "sar{q}\t$dst",
-                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Rotate instructions
-
-let Constraints = "$src = $dst" in {
-def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
-                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
-                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = "$src = $dst"
-
-def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
-                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
-                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rol{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
-def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i8imm:$src2),
-                    "rol{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
-def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rol{q}\t$dst",
-                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
-                   "rol{q}\t{%cl, $dst|$dst, %CL}",
-                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "rol{q}\t{$src, $dst|$dst, $src}",
-                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
-                 "rol{q}\t$dst",
-               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
-                  "ror{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
-def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i8imm:$src2),
-                    "ror{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
-def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
-                  "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
-                  "ror{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "ror{q}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
-                 "ror{q}\t$dst",
-               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Double shift instructions (generalizations of rotate)
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
-def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, 
-                    TB;
-def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, 
-                    TB;
-}
-
-let isCommutable = 1 in {  // FIXME: Update X86InstrInfo::commuteInstruction
-def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
-                      (outs GR64:$dst), 
-                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
-                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
-                                       (i8 imm:$src3)))]>,
-                 TB;
-def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
-                      (outs GR64:$dst), 
-                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
-                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
-                                       (i8 imm:$src3)))]>,
-                 TB;
-} // isCommutable
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in {
-def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
-                      addr:$dst)]>, TB;
-def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
-                      addr:$dst)]>, TB;
-}
-def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
-                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
-                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
-                 TB;
-def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
-                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
-                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
-                 TB;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-//  Logical Instructions...
-//
-
-let Constraints = "$src = $dst" , AddedComplexity = 15 in
-def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
-                [(set GR64:$dst, (not GR64:$src))]>;
-def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
-                [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
-
-let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
-                     "and{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def AND64rr  : RI<0x21, MRMDestReg, 
-                  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "and{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86and_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "and{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def AND64rm  : RI<0x23, MRMSrcMem,
-                  (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "and{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86and_flag GR64:$src1, (load addr:$src2)))]>;
-def AND64ri8 : RIi8<0x83, MRM4r, 
-                    (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "and{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86and_flag GR64:$src1, i64immSExt8:$src2))]>;
-def AND64ri32  : RIi32<0x81, MRM4r, 
-                       (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                       "and{q}\t{$src2, $dst|$dst, $src2}",
-                       [(set GR64:$dst, EFLAGS,
-                             (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def AND64mr  : RI<0x21, MRMDestMem,
-                  (outs), (ins i64mem:$dst, GR64:$src),
-                  "and{q}\t{$src, $dst|$dst, $src}",
-                  [(store (and (load addr:$dst), GR64:$src), addr:$dst),
-                   (implicit EFLAGS)]>;
-def AND64mi8 : RIi8<0x83, MRM4m,
-                    (outs), (ins i64mem:$dst, i64i8imm :$src),
-                    "and{q}\t{$src, $dst|$dst, $src}",
-                 [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-def AND64mi32  : RIi32<0x81, MRM4m,
-                       (outs), (ins i64mem:$dst, i64i32imm:$src),
-                       "and{q}\t{$src, $dst|$dst, $src}",
-             [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
-              (implicit EFLAGS)]>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def OR64rr   : RI<0x09, MRMDestReg, (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2),
-                  "or{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86or_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "or{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def OR64rm   : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
-                  (ins GR64:$src1, i64mem:$src2),
-                  "or{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86or_flag GR64:$src1, (load addr:$src2)))]>;
-def OR64ri8  : RIi8<0x83, MRM1r, (outs GR64:$dst),
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "or{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, EFLAGS,
-                         (X86or_flag GR64:$src1, i64immSExt8:$src2))]>;
-def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
-                     (ins GR64:$src1, i64i32imm:$src2),
-                     "or{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                "or{q}\t{$src, $dst|$dst, $src}",
-                [(store (or (load addr:$dst), GR64:$src), addr:$dst),
-                 (implicit EFLAGS)]>;
-def OR64mi8  : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
-                    "or{q}\t{$src, $dst|$dst, $src}",
-                  [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst),
-                   (implicit EFLAGS)]>;
-def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                     "or{q}\t{$src, $dst|$dst, $src}",
-              [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
-               (implicit EFLAGS)]>;
-
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
-                    "or{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def XOR64rr  : RI<0x31, MRMDestReg,  (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2), 
-                  "xor{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86xor_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                    "xor{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def XOR64rm  : RI<0x33, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2), 
-                  "xor{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86xor_flag GR64:$src1, (load addr:$src2)))]>;
-def XOR64ri8 : RIi8<0x83, MRM6r,  (outs GR64:$dst), 
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "xor{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>;
-def XOR64ri32 : RIi32<0x81, MRM6r, 
-                      (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), 
-                      "xor{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def XOR64mr  : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                  "xor{q}\t{$src, $dst|$dst, $src}",
-                  [(store (xor (load addr:$dst), GR64:$src), addr:$dst),
-                   (implicit EFLAGS)]>;
-def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
-                    "xor{q}\t{$src, $dst|$dst, $src}",
-                 [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                      "xor{q}\t{$src, $dst|$dst, $src}",
-             [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
-              (implicit EFLAGS)]>;
-              
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
-                     "xor{q}\t{$src, %rax|%rax, $src}", []>;
-
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-//  Comparison Instructions...
-//
-
-// Integer comparison
-let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
-                      "test{q}\t{$src, %rax|%rax, $src}", []>;
-let isCommutable = 1 in
-def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                  "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>;
-def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                  "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)),
-                    0))]>;
-def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
-                                        (ins GR64:$src1, i64i32imm:$src2),
-                       "test{q}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2),
-                      0))]>;
-def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
-                                        (ins i64mem:$src1, i64i32imm:$src2),
-                       "test{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1),
-                                           i64immSExt32:$src2), 0))]>;
-
-
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
-                     "cmp{q}\t{$src, %rax|%rax, $src}", []>;
-def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                      "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
-}
-
-def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>;
-def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>;
-def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>;
-def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
-                      "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>;
-def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
-                                          i64immSExt8:$src2))]>;
-def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
-                                       (ins i64mem:$src1, i64i32imm:$src2),
-                      "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
-                                            i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Bit tests.
-// TODO: BTC, BTR, and BTS
-let Defs = [EFLAGS] in {
-def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-               "bt{q}\t{$src2, $src1|$src1, $src2}",
-               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
-
-// Unlike with the register+register form, the memory+register form of the
-// bt instruction does not ignore the high bits of the index. From ISel's
-// perspective, this is pretty bizarre. Disable these instructions for now.
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-               "bt{q}\t{$src2, $src1|$src1, $src2}",
-//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
-//                (implicit EFLAGS)]
-                []
-                >, TB;
-
-def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
-		REX_W;
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
-def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
-                                     i64immSExt8:$src2))]>, TB;
-
-def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // Defs = [EFLAGS]
-
-// Conditional moves
-let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
-def CMOVB64rr : RI<0x42, MRMSrcReg,       // if <u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovb{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rr: RI<0x43, MRMSrcReg,       // if >=u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovae{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rr : RI<0x44, MRMSrcReg,       // if ==, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmove{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rr: RI<0x45, MRMSrcReg,       // if !=, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovne{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rr: RI<0x46, MRMSrcReg,       // if <=u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rr : RI<0x47, MRMSrcReg,       // if >u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmova{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rr : RI<0x4C, MRMSrcReg,       // if <s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovl{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rr: RI<0x4D, MRMSrcReg,       // if >=s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovge{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rr: RI<0x4E, MRMSrcReg,       // if <=s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovle{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rr : RI<0x4F, MRMSrcReg,       // if >s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovg{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rr : RI<0x48, MRMSrcReg,       // if signed, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovs{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rr: RI<0x49, MRMSrcReg,       // if !signed, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovns{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rr : RI<0x4A, MRMSrcReg,       // if parity, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovp{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rr : RI<0x4B, MRMSrcReg,       // if !parity, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rr : RI<0x40, MRMSrcReg,       // if overflow, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovo{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rr : RI<0x41, MRMSrcReg,       // if !overflow, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovno{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_NO, EFLAGS))]>, TB;
-} // isCommutable = 1
-
-def CMOVB64rm : RI<0x42, MRMSrcMem,       // if <u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovb{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rm: RI<0x43, MRMSrcMem,       // if >=u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovae{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rm : RI<0x44, MRMSrcMem,       // if ==, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmove{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rm: RI<0x45, MRMSrcMem,       // if !=, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovne{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rm: RI<0x46, MRMSrcMem,       // if <=u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rm : RI<0x47, MRMSrcMem,       // if >u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmova{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rm : RI<0x4C, MRMSrcMem,       // if <s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovl{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rm: RI<0x4D, MRMSrcMem,       // if >=s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovge{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rm: RI<0x4E, MRMSrcMem,       // if <=s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovle{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rm : RI<0x4F, MRMSrcMem,       // if >s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovg{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rm : RI<0x48, MRMSrcMem,       // if signed, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovs{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rm: RI<0x49, MRMSrcMem,       // if !signed, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovns{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rm : RI<0x4A, MRMSrcMem,       // if parity, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovp{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rm : RI<0x4B, MRMSrcMem,       // if !parity, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rm : RI<0x40, MRMSrcMem,       // if overflow, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovo{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rm : RI<0x41, MRMSrcMem,       // if !overflow, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovno{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_NO, EFLAGS))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Use sbb to materialize carry flag into a GPR.
-// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
-def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-
-def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C64r)>;
-
-//===----------------------------------------------------------------------===//
-// Descriptor-table support instructions
-
-// LLDT is not interpreted specially in 64-bit mode because there is no sign
-//   extension.
-def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
-                 "sldt{q}\t$dst", []>, TB;
-def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
-                 "sldt{q}\t$dst", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
-// smaller encoding, but doing so at isel time interferes with rematerialization
-// in the current register allocator. For now, this is rewritten when the
-// instruction is lowered to an MCInst.
-// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
-// when we have a better way to specify isel priority.
-let Defs = [EFLAGS],
-    AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)]>;
-
-// Materialize i64 constant where top 32-bits are zero. This could theoretically
-// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
-// that would make it more difficult to rematerialize.
-let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
-                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//===----------------------------------------------------------------------===//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. RSP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
-                   ".byte\t0x66; "
-                   "leaq\t$sym(%rip), %rdi; "
-                   ".word\t0x6666; "
-                   "rex64; "
-                   "call\t__tls_get_addr at PLT",
-                  [(X86tlsaddr tls64addr:$sym)]>,
-                  Requires<[In64BitMode]>;
-
-// Darwin TLS Support
-// For x86_64, the address of the thunk is passed in %rdi, on return 
-// the address of the variable is in %rax.  All other registers are preserved.
-let Defs = [RAX],
-    Uses = [RDI],
-    usesCustomInserter = 1 in
-def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
-                  "# TLSCall_64",
-                  [(X86TLSCall addr:$sym)]>,
-                  Requires<[In64BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "movq\t%gs:$src, $dst",
-                 [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "movq\t%fs:$src, $dst",
-                 [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// Atomic Instructions
-//===----------------------------------------------------------------------===//
-
-let Defs = [RAX, EFLAGS], Uses = [RAX] in {
-def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
-               "lock\n\t"
-               "cmpxchgq\t$swap,$ptr",
-               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
-}
-
-let Constraints = "$val = $dst" in {
-let Defs = [EFLAGS] in
-def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
-               "lock\n\t"
-               "xadd\t$val, $ptr",
-               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
-                TB, LOCK;
-
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$val,i64mem:$ptr),
-                  "xchg{q}\t{$val, $ptr|$ptr, $val}", 
-                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-
-def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
-                  "xchg{q}\t{$val, $src|$src, $val}", []>;
-}
-
-def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-                   
-def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-                      
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
-def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
-                    "cmpxchg16b\t$dst", []>, TB;
-
-def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
-                  "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                      "lock\n\t"
-                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
-                                      (ins i64mem:$dst, i64i8imm :$src2),
-                    "lock\n\t"
-                    "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
-                                        (ins i64mem:$dst, i64i32imm :$src2),
-                      "lock\n\t"
-                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                      "lock\n\t"
-                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
-                                      (ins i64mem:$dst, i64i8imm :$src2), 
-                      "lock\n\t"
-                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
-                                        (ins i64mem:$dst, i64i32imm:$src2),
-                      "lock\n\t"
-                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
-                     "lock\n\t"
-                     "inc{q}\t$dst", []>, LOCK;
-def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
-                      "lock\n\t"
-                      "dec{q}\t$dst", []>, LOCK;
-}
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomInserter = 1 in {
-def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMAND64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
-def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMOR64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
-def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMXOR64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
-def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMNAND64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
-def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
-               "#ATOMMIN64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
-def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMMAX64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMUMIN64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMUMAX64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
-}
-
-// Segmentation support instructions
-
-// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
-def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), 
-                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
-                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-                 
-def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; 
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
-
-def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
-                 "push{q}\t%fs", []>, TB;
-def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
-                 "push{q}\t%gs", []>, TB;
-
-def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
-                "pop{q}\t%fs", []>, TB;
-def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
-                "pop{q}\t%gs", []>, TB;
-                 
-def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
-                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
-                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
-                 "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Specialized register support
-
-// no m form encodable; use SMSW16m
-def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), 
-                 "smsw{q}\t$dst", []>, TB;
-
-// String manipulation instructions
-
-def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
-// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
-//  'movabs' predicate should handle this sort of thing.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
-
-// In static codegen with small code model, we can get the address of a label
-// into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri64i32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
-
-// In kernel code model, we can get the address of a label
-// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
-
-// If we have small model and -static mode, it is safe to store global addresses
-// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
-// for MOV64mi32 should handle this sort of thing.
-def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tconstpool:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tjumptable:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, texternalsym:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
-          Requires<[NearData, IsStatic]>;
-
-// Calls
-// Direct PC relative function call for small code model. 32-bit displacement
-// sign extended to 64-bit.
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
-
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
-          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
-
-// tailcall stuff
-def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
-          (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
-          (TCRETURNmi64 addr:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
-          (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-// tls has some funny stuff here...
-// This corresponds to movabs $foo at tpoff, %rax
-def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
-          (MOV64ri tglobaltlsaddr :$dst)>;
-// This corresponds to add $foo at tpoff, %rax
-def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
-          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo at tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
-          (MOV64rm tglobaltlsaddr :$dst)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR64:$src1, 0),
-          (TEST64rr GR64:$src1, GR64:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
-          (CMOVAE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
-          (CMOVB64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
-          (CMOVNE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
-          (CMOVE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
-          (CMOVA64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
-          (CMOVBE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
-          (CMOVGE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
-          (CMOVL64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
-          (CMOVG64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
-          (CMOVLE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
-          (CMOVNP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
-          (CMOVP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
-          (CMOVNS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
-          (CMOVS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
-          (CMOVNO64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
-          (CMOVO64rm GR64:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-
-// extload
-// When extloading from 16-bit and smaller memory locations into 64-bit 
-// registers, use zero-extending loads so that the entire 64-bit register is 
-// defined, avoiding partial-register updates.
-def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
-def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
-def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
-// For other extloads, use subregs, since the high contents of the register are
-// defined after an extload.
-def : Pat<(extloadi64i32 addr:$src),
-          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
-                         sub_32bit)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
-def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
-def : Pat<(i64 (anyext GR32:$src)),
-          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR64:$src1, 128),
-          (SUB64ri8 GR64:$src1, -128)>;
-def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
-          (SUB64mi8 addr:$dst, -128)>;
-
-// The same trick applies for 32-bit immediate fields in 64-bit
-// instructions.
-def : Pat<(add GR64:$src1, 0x0000000080000000),
-          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
-def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
-          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
-
-// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
-// has an immediate with at least 32 bits of leading zeros, to avoid needing to
-// materialize that immediate in a register first.
-def : Pat<(and GR64:$src, i64immZExt32:$imm),
-          (SUBREG_TO_REG
-            (i64 0),
-            (AND32ri
-              (EXTRACT_SUBREG GR64:$src, sub_32bit),
-              (i32 (GetLo32XForm imm:$imm))),
-            sub_32bit)>;
-
-// r & (2^32-1) ==> movz
-def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
-          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-// r & (2^16-1) ==> movz
-def : Pat<(and GR64:$src, 0xffff),
-          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR64:$src, 0xff),
-          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
-           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
-      Requires<[In64BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
-           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
-      Requires<[In64BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR64:$src, i32),
-          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-def : Pat<(sext_inreg GR64:$src, i16),
-          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR64:$src, i8),
-          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
-      Requires<[In64BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
-      Requires<[In64BitMode]>;
-
-// trunc patterns
-def : Pat<(i32 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
-def : Pat<(i16 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
-          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
-      Requires<[In64BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
-          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
-      Requires<[In64BitMode]>;
-
-// h-register tricks.
-// For now, be conservative on x86-64 and use an h-register extract only if the
-// value is immediately zero-extended or stored, which are somewhat common
-// cases. This uses a bunch of code to prevent a register requiring a REX prefix
-// from being allocated in the same instruction as the h register, as there's
-// currently no way to describe this requirement to the register allocator.
-
-// h-register extract and zero-extend.
-def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
-          (SUBREG_TO_REG
-            (i64 0),
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                              sub_8bit_hi)),
-            sub_32bit)>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
-          (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
-          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                                   GR32_ABCD)),
-                                             sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
-          (EXTRACT_SUBREG
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_16bit)>,
-      Requires<[In64BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
-          (SUBREG_TO_REG
-            (i64 0),
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_32bit)>;
-def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
-          (SUBREG_TO_REG
-            (i64 0),
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_32bit)>;
-
-// h-register extract and store.
-def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
-          (MOV8mr_NOREX
-            addr:$dst,
-            (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                            sub_8bit_hi))>;
-def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
-          (MOV8mr_NOREX
-            addr:$dst,
-            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
-          (MOV8mr_NOREX
-            addr:$dst,
-            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
-
-// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL, 63)),
-          (SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
-          (SHL64mCL addr:$dst)>;
-
-def : Pat<(srl GR64:$src1, (and CL, 63)),
-          (SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
-          (SHR64mCL addr:$dst)>;
-
-def : Pat<(sra GR64:$src1, (and CL, 63)),
-          (SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
-          (SAR64mCL addr:$dst)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in {  // Try this before the selecting to OR
-def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
-          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
-          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or_is_add GR64:$src1, GR64:$src2),
-          (ADD64rr GR64:$src1, GR64:$src2)>;
-} // AddedComplexity
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR64:$src1, GR64:$src2),
-          (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(addc GR64:$src1, (load addr:$src2)),
-          (ADD64rm GR64:$src1, addr:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
-          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
-          (ADD64ri32 GR64:$src1, imm:$src2)>;
-
-def : Pat<(subc GR64:$src1, GR64:$src2),
-          (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(subc GR64:$src1, (load addr:$src2)),
-          (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
-          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(subc GR64:$src1, imm:$src2),
-          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
-
-// addition
-def : Pat<(add GR64:$src1, GR64:$src2),
-          (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt8:$src2),
-          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2),
-          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
-          (ADD64rm GR64:$src1, addr:$src2)>;
-
-// subtraction
-def : Pat<(sub GR64:$src1, GR64:$src2),
-          (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
-          (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
-          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
-          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Multiply
-def : Pat<(mul GR64:$src1, GR64:$src2),
-          (IMUL64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
-          (IMUL64rm GR64:$src1, addr:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
-          (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
-          (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
-          (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
-          (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-
-// inc/dec
-def : Pat<(add GR16:$src, 1),  (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1),  (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1),  (INC64r GR64:$src)>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-
-// or
-def : Pat<(or GR64:$src1, GR64:$src2),
-          (OR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt8:$src2),
-          (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt32:$src2),
-          (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
-          (OR64rm GR64:$src1, addr:$src2)>;
-
-// xor
-def : Pat<(xor GR64:$src1, GR64:$src2),
-          (XOR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
-          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
-          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
-          (XOR64rm GR64:$src1, addr:$src2)>;
-
-// and
-def : Pat<(and GR64:$src1, GR64:$src2),
-          (AND64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt8:$src2),
-          (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt32:$src2),
-          (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
-          (AND64rm GR64:$src1, addr:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// X86-64 SSE Instructions
-//===----------------------------------------------------------------------===//
-
-// Move instructions...
-
-def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (v2i64 (scalar_to_vector GR64:$src)))]>;
-def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                         "mov{d|q}\t{$src, $dst|$dst, $src}",
-                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
-                                           (iPTR 0)))]>;
-
-def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
-def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
-                       "movq\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
-
-def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
-                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                        [(set GR64:$dst, (bitconvert FR64:$src))]>;
-def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
-                        "movq\t{$src, $dst|$dst, $src}",
-                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrBuilder.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrBuilder.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrBuilder.h Tue Oct 26 19:48:03 2010
@@ -56,6 +56,31 @@
     : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
     Base.Reg = 0;
   }
+  
+  
+  void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+    assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+    
+    if (BaseType == X86AddressMode::RegBase)
+      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+                                             false, false, false, 0, false));
+    else {
+      assert(BaseType == X86AddressMode::FrameIndexBase);
+      MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+    }
+    
+    MO.push_back(MachineOperand::CreateImm(Scale));
+    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+                                           false, false, false, 0, false));
+    
+    if (GV)
+      MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+    else
+      MO.push_back(MachineOperand::CreateImm(Disp));
+    
+    MO.push_back(MachineOperand::CreateReg(0, false, false,
+                                           false, false, false, 0, false));
+  }
 };
 
 /// addDirectMem - This function is used to add a direct memory reference to the
@@ -101,10 +126,11 @@
   
   if (AM.BaseType == X86AddressMode::RegBase)
     MIB.addReg(AM.Base.Reg);
-  else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+  else {
+    assert(AM.BaseType == X86AddressMode::FrameIndexBase);
     MIB.addFrameIndex(AM.Base.FrameIndex);
-  else
-    assert (0);
+  }
+
   MIB.addImm(AM.Scale).addReg(AM.IndexReg);
   if (AM.GV)
     MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
@@ -131,9 +157,8 @@
   if (TID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                            Flags, Offset,
-                            MFI.getObjectSize(FI),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+                            Flags, MFI.getObjectSize(FI),
                             MFI.getObjectAlignment(FI));
   return addOffset(MIB.addFrameIndex(FI), Offset)
             .addMemOperand(MMO);

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFPStack.td Tue Oct 26 19:48:03 2010
@@ -32,21 +32,24 @@
 def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
-                             [SDNPHasChain, SDNPMayLoad]>;
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
-                             [SDNPHasChain, SDNPInFlag, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPInFlag, SDNPMayStore,
+                              SDNPMemOperand]>;
 def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
-                             [SDNPHasChain, SDNPMayLoad]>;
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
-                             [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
+                             [SDNPHasChain, SDNPOutFlag, SDNPMayLoad,
+                              SDNPMemOperand]>;
 def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
-                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
 
 //===----------------------------------------------------------------------===//
 // FPStack pattern fragments
@@ -153,7 +156,7 @@
 def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
 }
 
-// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
 // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
 // f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
 // f80 instructions cannot use SSE and use neither of these.
@@ -212,11 +215,11 @@
                   [(set RFP80:$dst, 
                     (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
 def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src), 
-                 !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { 
+                 !strconcat("f", asmstring, "{s}\t$src")> { 
   let mayLoad = 1; 
 }
 def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src), 
-                 !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { 
+                 !strconcat("f", asmstring, "{l}\t$src")> { 
   let mayLoad = 1; 
 }
 // ST(0) = ST(0) + [memint]
@@ -245,11 +248,11 @@
                     [(set RFP80:$dst, (OpNode RFP80:$src1,
                                        (X86fild addr:$src2, i32)))]>;
 def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src), 
-                  !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { 
+                  !strconcat("fi", asmstring, "{s}\t$src")> { 
   let mayLoad = 1; 
 }
 def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src), 
-                  !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { 
+                  !strconcat("fi", asmstring, "{l}\t$src")> { 
   let mayLoad = 1; 
 }
 }
@@ -604,8 +607,8 @@
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
 
-// Register free
-
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
 def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
                 "ffree\t$reg">, DD;
 
@@ -613,7 +616,8 @@
 
 def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
 
-// Operandless floating-point instructions for the disassembler
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
 
 def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
 def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFormats.td Tue Oct 26 19:48:03 2010
@@ -39,6 +39,8 @@
 def MRM_F0 : Format<40>;
 def MRM_F8 : Format<41>;
 def MRM_F9 : Format<42>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -107,6 +109,7 @@
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr, Domain d = GenericDomain>
@@ -140,6 +143,7 @@
   bit hasVEX_i8ImmReg = 0;  // Does this inst requires the last source register
                             // to be encoded in a immediate field?
   bit hasVEX_L = 0;         // Does this inst uses large (256-bit) registers?
+  bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -158,6 +162,7 @@
   let TSFlags{34}    = hasVEX_4VPrefix;
   let TSFlags{35}    = hasVEX_i8ImmReg;
   let TSFlags{36}    = hasVEX_L;
+  let TSFlags{37}    = has3DNow0F0FOpcode;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -210,7 +215,7 @@
 class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
   : I<o, F, outs, ins, asm, []> {}
 
-// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
 class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
   : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
   let FPForm = fp;
@@ -224,13 +229,13 @@
 //   Iseg32 - 16-bit segment selector, 32-bit offset
 
 class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+              list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+              list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
   let Pattern = pattern;
   let CodeSize = 3;
 }

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Oct 26 19:48:03 2010
@@ -15,51 +15,8 @@
 // MMX Pattern Fragments
 //===----------------------------------------------------------------------===//
 
-def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
-
-def bc_v8i8  : PatFrag<(ops node:$in), (v8i8  (bitconvert node:$in))>;
-def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
-def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
-def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
-
-//===----------------------------------------------------------------------===//
-// MMX Masks
-//===----------------------------------------------------------------------===//
-
-// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
-// PSHUFW imm.
-def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
-def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
-def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], MMX_SHUFFLE_get_shuf_imm>;
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
 
 //===----------------------------------------------------------------------===//
 // SSE specific DAG Nodes.
@@ -102,7 +59,7 @@
 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
-                        [SDNPHasChain, SDNPMayLoad]>;
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
 def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
 def X86cmpps   : SDNode<"X86ISD::CMPPS",     SDTX86VFCMP>;
@@ -117,9 +74,61 @@
 def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
 
 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
-                                          SDTCisVT<1, v4f32>,
-                                          SDTCisVT<2, v4f32>]>;
+                                          SDTCisVec<1>,
+                                          SDTCisSameAs<2, 1>]>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+                                 SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                 SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw  : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw  : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
@@ -129,9 +138,11 @@
 // the top elements.  These are used for the SSE 'ss' and 'sd' instruction
 // forms.
 def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
-                                  [SDNPHasChain, SDNPMayLoad]>;
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
 def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
-                                  [SDNPHasChain, SDNPMayLoad]>;
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
 
 def ssmem : Operand<v4f32> {
   let PrintMethod = "printf32mem";
@@ -148,12 +159,13 @@
 // SSE pattern fragments
 //===----------------------------------------------------------------------===//
 
+// 128-bit load pattern fragments
 def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
 def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
 def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
 def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
 
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit load pattern fragments
 def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
 def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
 def loadv8i32    : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
@@ -174,6 +186,8 @@
                                (f32 (alignedload node:$ptr))>;
 def alignedloadfsf64 : PatFrag<(ops node:$ptr),
                                (f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
 def alignedloadv4f32 : PatFrag<(ops node:$ptr),
                                (v4f32 (alignedload node:$ptr))>;
 def alignedloadv2f64 : PatFrag<(ops node:$ptr),
@@ -183,7 +197,7 @@
 def alignedloadv2i64 : PatFrag<(ops node:$ptr),
                                (v2i64 (alignedload node:$ptr))>;
 
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit aligned load pattern fragments
 def alignedloadv8f32 : PatFrag<(ops node:$ptr),
                                (v8f32 (alignedload node:$ptr))>;
 def alignedloadv4f64 : PatFrag<(ops node:$ptr),
@@ -206,16 +220,21 @@
 
 def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
 def memopfsf64 : PatFrag<(ops node:$ptr), (f64   (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
 def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
 def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
 
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit memop pattern fragments
 def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
 def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
 
 // SSSE3 uses MMX registers for some instructions. They aren't aligned on a
 // 16-byte boundary.
@@ -224,10 +243,7 @@
   return cast<LoadSDNode>(N)->getAlignment() >= 8;
 }]>;
 
-def memopv8i8  : PatFrag<(ops node:$ptr), (v8i8  (memop64 node:$ptr))>;
-def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
-def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+def memopmmx  : PatFrag<(ops node:$ptr), (x86mmx  (memop64 node:$ptr))>;
 
 // MOVNT Support
 // Like 'store', but requires the non-temporal bit to be set
@@ -255,6 +271,7 @@
   return false;
 }]>;
 
+// 128-bit bitconvert pattern fragments
 def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
 def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
 def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -262,7 +279,7 @@
 def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
 
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit bitconvert pattern fragments
 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
 
 def vzmovl_v2i64 : PatFrag<(ops node:$src),

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -34,7 +34,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
-
 #include <limits>
 
 using namespace llvm;
@@ -55,7 +54,11 @@
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
     TM(tm), RI(tm, *this) {
-  SmallVector<unsigned,16> AmbEntries;
+  enum {
+    TB_NOT_REVERSABLE = 1U << 31,
+    TB_FLAGS = TB_NOT_REVERSABLE
+  };
+      
   static const unsigned OpTbl2Addr[][2] = {
     { X86::ADC32ri,     X86::ADC32mi },
     { X86::ADC32ri8,    X86::ADC32mi8 },
@@ -65,13 +68,22 @@
     { X86::ADC64rr,     X86::ADC64mr },
     { X86::ADD16ri,     X86::ADD16mi },
     { X86::ADD16ri8,    X86::ADD16mi8 },
+    { X86::ADD16ri_DB,  X86::ADD16mi  | TB_NOT_REVERSABLE },
+    { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
     { X86::ADD16rr,     X86::ADD16mr },
+    { X86::ADD16rr_DB,  X86::ADD16mr | TB_NOT_REVERSABLE },
     { X86::ADD32ri,     X86::ADD32mi },
     { X86::ADD32ri8,    X86::ADD32mi8 },
+    { X86::ADD32ri_DB,  X86::ADD32mi | TB_NOT_REVERSABLE },
+    { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
     { X86::ADD32rr,     X86::ADD32mr },
+    { X86::ADD32rr_DB,  X86::ADD32mr | TB_NOT_REVERSABLE },
     { X86::ADD64ri32,   X86::ADD64mi32 },
     { X86::ADD64ri8,    X86::ADD64mi8 },
+    { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+    { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
     { X86::ADD64rr,     X86::ADD64mr },
+    { X86::ADD64rr_DB,  X86::ADD64mr | TB_NOT_REVERSABLE },
     { X86::ADD8ri,      X86::ADD8mi },
     { X86::ADD8rr,      X86::ADD8mr },
     { X86::AND16ri,     X86::AND16mi },
@@ -216,16 +228,21 @@
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
     unsigned RegOp = OpTbl2Addr[i][0];
-    unsigned MemOp = OpTbl2Addr[i][1];
-    if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
-                                               std::make_pair(MemOp,0))).second)
-      assert(false && "Duplicated entries?");
+    unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+    assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+    
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+      continue;
+                          
     // Index 0, folded load and store, no alignment requirement.
     unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
-    if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                                std::make_pair(RegOp,
-                                                              AuxInfo))).second)
-      AmbEntries.push_back(MemOp);
+    
+    assert(!MemOp2RegOpTable.count(MemOp) && 
+            "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
 
   // If the third value is 1, then it's folding either a load or a store.
@@ -235,6 +252,7 @@
     { X86::BT64ri8,     X86::BT64mi8, 1, 0 },
     { X86::CALL32r,     X86::CALL32m, 1, 0 },
     { X86::CALL64r,     X86::CALL64m, 1, 0 },
+    { X86::WINCALL64r,  X86::WINCALL64m, 1, 0 },
     { X86::CMP16ri,     X86::CMP16mi, 1, 0 },
     { X86::CMP16ri8,    X86::CMP16mi8, 1, 0 },
     { X86::CMP16rr,     X86::CMP16mr, 1, 0 },
@@ -251,8 +269,8 @@
     { X86::DIV64r,      X86::DIV64m, 1, 0 },
     { X86::DIV8r,       X86::DIV8m, 1, 0 },
     { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
-    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0, 0 },
-    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0, 0 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
     { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
     { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
     { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
@@ -267,7 +285,6 @@
     { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
     { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
     { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
-    { X86::MOV32rr_TC,  X86::MOV32mr_TC, 0, 0 },
     { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
     { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
     { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
@@ -311,19 +328,22 @@
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp = OpTbl0[i][0];
-    unsigned MemOp = OpTbl0[i][1];
-    unsigned Align = OpTbl0[i][3];
-    if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    unsigned RegOp      = OpTbl0[i][0];
+    unsigned MemOp      = OpTbl0[i][1] & ~TB_FLAGS;
     unsigned FoldedLoad = OpTbl0[i][2];
+    unsigned Align      = OpTbl0[i][3];
+    assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+    
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+      continue;
+    
     // Index 0, folded load or store.
     unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
-    if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
-      if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                     std::make_pair(RegOp, AuxInfo))).second)
-        AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
 
   static const unsigned OpTbl1[][3] = {
@@ -341,8 +361,8 @@
     { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
     { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
     { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
-    { X86::FsMOVAPDrr,      X86::MOVSDrm, 0 },
-    { X86::FsMOVAPSrr,      X86::MOVSSrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
     { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
     { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
     { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
@@ -359,8 +379,8 @@
     { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm, 16 },
     { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm, 16 },
     { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm, 0 },
-    { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
-    { X86::Int_CVTSD2SIrr,  X86::Int_CVTSD2SIrm, 0 },
+    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm, 0 },
+    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm, 0 },
     { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm, 0 },
     { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
     { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm, 0 },
@@ -369,8 +389,8 @@
     { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm, 0 },
     { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
     { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm, 0 },
-    { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
-    { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+    { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm, 16 },
+    { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm, 16 },
     { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
     { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
     { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
@@ -379,7 +399,6 @@
     { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
     { X86::MOV16rr,         X86::MOV16rm, 0 },
     { X86::MOV32rr,         X86::MOV32rm, 0 },
-    { X86::MOV32rr_TC,      X86::MOV32rm_TC, 0 },
     { X86::MOV64rr,         X86::MOV64rm, 0 },
     { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
     { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
@@ -438,25 +457,31 @@
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
     unsigned RegOp = OpTbl1[i][0];
-    unsigned MemOp = OpTbl1[i][1];
+    unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
     unsigned Align = OpTbl1[i][2];
-    if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+    RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+    
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+      continue;
+    
     // Index 1, folded load
     unsigned AuxInfo = 1 | (1 << 4);
-    if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
-      if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                     std::make_pair(RegOp, AuxInfo))).second)
-        AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
 
   static const unsigned OpTbl2[][3] = {
     { X86::ADC32rr,         X86::ADC32rm, 0 },
     { X86::ADC64rr,         X86::ADC64rm, 0 },
     { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD16rr_DB,      X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
     { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD32rr_DB,      X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
     { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD64rr_DB,      X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
     { X86::ADD8rr,          X86::ADD8rm, 0 },
     { X86::ADDPDrr,         X86::ADDPDrm, 16 },
     { X86::ADDPSrr,         X86::ADDPSrm, 16 },
@@ -651,20 +676,23 @@
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
     unsigned RegOp = OpTbl2[i][0];
-    unsigned MemOp = OpTbl2[i][1];
+    unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
     unsigned Align = OpTbl2[i][2];
-    if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    
+    assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+    RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+    
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+      continue;
+    
     // Index 2, folded load
     unsigned AuxInfo = 2 | (1 << 4);
-    if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                   std::make_pair(RegOp, AuxInfo))).second)
-      AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+           "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
-
-  // Remove ambiguous entries.
-  assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
 }
 
 bool
@@ -744,9 +772,7 @@
   case X86::MOV8rm:
   case X86::MOV16rm:
   case X86::MOV32rm:
-  case X86::MOV32rm_TC:
   case X86::MOV64rm:
-  case X86::MOV64rm_TC:
   case X86::LD_Fp64m:
   case X86::MOVSSrm:
   case X86::MOVSDrm:
@@ -767,9 +793,7 @@
   case X86::MOV8mr:
   case X86::MOV16mr:
   case X86::MOV32mr:
-  case X86::MOV32mr_TC:
   case X86::MOV64mr:
-  case X86::MOV64mr_TC:
   case X86::ST_FpP64m:
   case X86::MOVSSmr:
   case X86::MOVSDmr:
@@ -1098,7 +1122,7 @@
   unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
     ? X86::LEA64_32r : X86::LEA32r;
   MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
   unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
             
   // Build and insert into an implicit UNDEF value. This is OK because
@@ -1136,9 +1160,12 @@
     break;
   case X86::ADD16ri:
   case X86::ADD16ri8:
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
     addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());    
     break;
-  case X86::ADD16rr: {
+  case X86::ADD16rr:
+  case X86::ADD16rr_DB: {
     unsigned Src2 = MI->getOperand(2).getReg();
     bool isKill2 = MI->getOperand(2).isKill();
     unsigned leaInReg2 = 0;
@@ -1148,7 +1175,7 @@
       // just a single insert_subreg.
       addRegReg(MIB, leaInReg, true, leaInReg, false);
     } else {
-      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
       // Build and insert into an implicit UNDEF value. This is OK because
       // well be shifting and then extracting the lower 16-bits. 
       BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
@@ -1235,6 +1262,11 @@
     unsigned ShAmt = MI->getOperand(2).getImm();
     if (ShAmt == 0 || ShAmt >= 4) return 0;
 
+    // LEA can't handle RSP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+      return 0;
+
     NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
       .addReg(Dest, RegState::Define | getDeadRegState(isDead))
       .addReg(0).addImm(1 << ShAmt)
@@ -1249,6 +1281,11 @@
     unsigned ShAmt = MI->getOperand(2).getImm();
     if (ShAmt == 0 || ShAmt >= 4) return 0;
 
+    // LEA can't handle ESP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+      return 0;
+
     unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
     NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
       .addReg(Dest, RegState::Define | getDeadRegState(isDead))
@@ -1287,6 +1324,14 @@
       assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
       unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
         : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                               .addReg(Dest, RegState::Define |
                                       getDeadRegState(isDead)),
@@ -1309,6 +1354,13 @@
       assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
       unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
         : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                               .addReg(Dest, RegState::Define |
                                       getDeadRegState(isDead)),
@@ -1326,12 +1378,29 @@
                            Src, isKill, -1);
       break;
     case X86::ADD64rr:
-    case X86::ADD32rr: {
+    case X86::ADD64rr_DB:
+    case X86::ADD32rr:
+    case X86::ADD32rr_DB: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
-        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      unsigned Opc;
+      TargetRegisterClass *RC;
+      if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+        Opc = X86::LEA64r;
+        RC = X86::GR64_NOSPRegisterClass;
+      } else {
+        Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+        RC = X86::GR32_NOSPRegisterClass;
+      }
+
+      
       unsigned Src2 = MI->getOperand(2).getReg();
       bool isKill2 = MI->getOperand(2).isKill();
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+          !MF.getRegInfo().constrainRegClass(Src2, RC))
+        return 0;
+
       NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                         .addReg(Dest, RegState::Define |
                                 getDeadRegState(isDead)),
@@ -1340,7 +1409,8 @@
         LV->replaceKillInstruction(Src2, MI, NewMI);
       break;
     }
-    case X86::ADD16rr: {
+    case X86::ADD16rr:
+    case X86::ADD16rr_DB: {
       if (DisableLEA16)
         return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1356,6 +1426,8 @@
     }
     case X86::ADD64ri32:
     case X86::ADD64ri8:
+    case X86::ADD64ri32_DB:
+    case X86::ADD64ri8_DB:
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
                               .addReg(Dest, RegState::Define |
@@ -1363,7 +1435,9 @@
                               Src, isKill, MI->getOperand(2).getImm());
       break;
     case X86::ADD32ri:
-    case X86::ADD32ri8: {
+    case X86::ADD32ri8:
+    case X86::ADD32ri_DB:
+    case X86::ADD32ri8_DB: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
       unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1374,6 +1448,8 @@
     }
     case X86::ADD16ri:
     case X86::ADD16ri8:
+    case X86::ADD16ri_DB:
+    case X86::ADD16ri8_DB:
       if (DisableLEA16)
         return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1844,6 +1920,33 @@
   return X86::GR8_ABCD_HRegClass.contains(Reg);
 }
 
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+  // SrcReg(VR128) -> DestReg(GR64)
+  // SrcReg(VR64)  -> DestReg(GR64)
+  // SrcReg(GR64)  -> DestReg(VR128)
+  // SrcReg(GR64)  -> DestReg(VR64)
+
+  if (X86::GR64RegClass.contains(DestReg)) {
+    if (X86::VR128RegClass.contains(SrcReg)) {
+      // Copy from a VR128 register to a GR64 register.
+      return X86::MOVPQIto64rr;
+    } else if (X86::VR64RegClass.contains(SrcReg)) {
+      // Copy from a VR64 register to a GR64 register.
+      return X86::MOVSDto64rr;
+    }
+  } else if (X86::GR64RegClass.contains(SrcReg)) {
+    // Copy from a GR64 register to a VR128 register.
+    if (X86::VR128RegClass.contains(DestReg))
+      return X86::MOV64toPQIrr;
+    // Copy from a GR64 register to a VR64 register.
+    else if (X86::VR64RegClass.contains(DestReg))
+      return X86::MOV64toSDrr;
+  }
+
+  return 0;
+}
+
 void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MI, DebugLoc DL,
                                unsigned DestReg, unsigned SrcReg,
@@ -1868,6 +1971,8 @@
     Opc = X86::MOVAPSrr;
   else if (X86::VR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MMX_MOVQ64rr;
+  else
+    Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
 
   if (Opc) {
     BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -1915,13 +2020,22 @@
   default:
     llvm_unreachable("Unknown regclass");
   case X86::GR64RegClassID:
+  case X86::GR64_ABCDRegClassID:
+  case X86::GR64_NOREXRegClassID:
+  case X86::GR64_NOREX_NOSPRegClassID:
   case X86::GR64_NOSPRegClassID:
+  case X86::GR64_TCRegClassID:
     return load ? X86::MOV64rm : X86::MOV64mr;
   case X86::GR32RegClassID:
-  case X86::GR32_NOSPRegClassID:
+  case X86::GR32_ABCDRegClassID:
   case X86::GR32_ADRegClassID:
+  case X86::GR32_NOREXRegClassID:
+  case X86::GR32_NOSPRegClassID:
+  case X86::GR32_TCRegClassID:
     return load ? X86::MOV32rm : X86::MOV32mr;
   case X86::GR16RegClassID:
+  case X86::GR16_ABCDRegClassID:
+  case X86::GR16_NOREXRegClassID:
     return load ? X86::MOV16rm : X86::MOV16mr;
   case X86::GR8RegClassID:
     // Copying to or from a physical H register on x86-64 requires a NOREX
@@ -1931,32 +2045,14 @@
       return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
     else
       return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_ABCDRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32_ABCDRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16_ABCDRegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
   case X86::GR8_ABCD_LRegClassID:
+  case X86::GR8_NOREXRegClassID:
     return load ? X86::MOV8rm :X86::MOV8mr;
   case X86::GR8_ABCD_HRegClassID:
     if (TM.getSubtarget<X86Subtarget>().is64Bit())
       return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
     else
       return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_NOREXRegClassID:
-  case X86::GR64_NOREX_NOSPRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32_NOREXRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16_NOREXRegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
-  case X86::GR8_NOREXRegClassID:
-    return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_TCRegClassID:
-    return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
-  case X86::GR32_TCRegClassID:
-    return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
   case X86::RFP80RegClassID:
     return load ? X86::LD_Fp80m : X86::ST_FpP80m;
   case X86::RFP64RegClassID:
@@ -2208,7 +2304,7 @@
                                     MachineInstr *MI, unsigned i,
                                     const SmallVectorImpl<MachineOperand> &MOs,
                                     unsigned Size, unsigned Align) const {
-  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
   bool isTwoAddrFold = false;
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
@@ -2246,8 +2342,8 @@
   // If table selected...
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-      OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+    DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+      OpcodeTablePtr->find(MI->getOpcode());
     if (I != OpcodeTablePtr->end()) {
       unsigned Opcode = I->second.first;
       unsigned MinAlign = I->second.second;
@@ -2313,8 +2409,8 @@
     case X86::Int_CVTSS2SDrr:
     case X86::RCPSSr:
     case X86::RCPSSr_Int:
-    case X86::ROUNDSDr_Int:
-    case X86::ROUNDSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
     case X86::RSQRTSSr:
     case X86::RSQRTSSr_Int:
     case X86::SQRTSSr:
@@ -2365,8 +2461,8 @@
     case X86::Int_CVTSS2SDrr:
     case X86::RCPSSr:
     case X86::RCPSSr_Int:
-    case X86::ROUNDSDr_Int:
-    case X86::ROUNDSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
     case X86::RSQRTSSr:
     case X86::RSQRTSSr_Int:
     case X86::SQRTSSr:
@@ -2380,10 +2476,17 @@
     Alignment = (*LoadMI->memoperands_begin())->getAlignment();
   else
     switch (LoadMI->getOpcode()) {
+    case X86::AVX_SET0PSY:
+    case X86::AVX_SET0PDY:
+      Alignment = 32;
+      break;
     case X86::V_SET0PS:
     case X86::V_SET0PD:
     case X86::V_SET0PI:
     case X86::V_SETALLONES:
+    case X86::AVX_SET0PS:
+    case X86::AVX_SET0PD:
+    case X86::AVX_SET0PI:
       Alignment = 16;
       break;
     case X86::FsFLD0SD:
@@ -2410,12 +2513,22 @@
   } else if (Ops.size() != 1)
     return NULL;
 
+  // Make sure the subregisters match.
+  // Otherwise we risk changing the size of the load.
+  if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+    return NULL;
+
   SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
   switch (LoadMI->getOpcode()) {
   case X86::V_SET0PS:
   case X86::V_SET0PD:
   case X86::V_SET0PI:
   case X86::V_SETALLONES:
+  case X86::AVX_SET0PS:
+  case X86::AVX_SET0PD:
+  case X86::AVX_SET0PI:
+  case X86::AVX_SET0PSY:
+  case X86::AVX_SET0PDY:
   case X86::FsFLD0SD:
   case X86::FsFLD0SS: {
     // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
@@ -2442,10 +2555,13 @@
     // Create a constant-pool entry.
     MachineConstantPool &MCP = *MF.getConstantPool();
     const Type *Ty;
-    if (LoadMI->getOpcode() == X86::FsFLD0SS)
+    unsigned Opc = LoadMI->getOpcode();
+    if (Opc == X86::FsFLD0SS)
       Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+    else if (Opc == X86::FsFLD0SD)
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+    else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+      Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
     else
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
     const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
@@ -2501,7 +2617,7 @@
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
   // replacing the *two* registers with the memory location.
-  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
   if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
   } else if (OpNum == 0) { // If operand 0
@@ -2509,8 +2625,7 @@
     case X86::MOV8r0:
     case X86::MOV16r0:
     case X86::MOV32r0:
-    case X86::MOV64r0:
-      return true;
+    case X86::MOV64r0: return true;
     default: break;
     }
     OpcodeTablePtr = &RegOp2MemOpTable0;
@@ -2520,21 +2635,16 @@
     OpcodeTablePtr = &RegOp2MemOpTable2;
   }
   
-  if (OpcodeTablePtr) {
-    // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-      OpcodeTablePtr->find((unsigned*)Opc);
-    if (I != OpcodeTablePtr->end())
-      return true;
-  }
+  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+    return true;
   return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
 }
 
 bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                 unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(MI->getOpcode());
   if (I == MemOp2RegOpTable.end())
     return false;
   unsigned Opc = I->second.first;
@@ -2662,8 +2772,8 @@
   if (!N->isMachineOpcode())
     return false;
 
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(N->getMachineOpcode());
   if (I == MemOp2RegOpTable.end())
     return false;
   unsigned Opc = I->second.first;
@@ -2763,8 +2873,8 @@
 unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
                                       bool UnfoldLoad, bool UnfoldStore,
                                       unsigned *LoadRegIndex) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)Opc);
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(Opc);
   if (I == MemOp2RegOpTable.end())
     return 0;
   bool FoldedLoad = I->second.second & (1 << 4);
@@ -2943,6 +3053,8 @@
   case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
   case X86::YMM8:  case X86::YMM9:  case X86::YMM10: case X86::YMM11:
   case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+  case X86::CR8:   case X86::CR9:   case X86::CR10:  case X86::CR11:
+  case X86::CR12:  case X86::CR13:  case X86::CR14:  case X86::CR15:
     return true;
   }
   return false;
@@ -2975,7 +3087,7 @@
 // that we don't include here. We don't want to replace instructions selected
 // by intrinsics.
 static const unsigned ReplaceableInstrs[][3] = {
-  //PackedInt       PackedSingle     PackedDouble
+  //PackedSingle     PackedDouble    PackedInt
   { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
   { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
   { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
@@ -2991,6 +3103,22 @@
   { X86::V_SET0PS,   X86::V_SET0PD,  X86::V_SET0PI  },
   { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
   { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
+  // AVX 128-bit support
+  { X86::VMOVAPSmr,  X86::VMOVAPDmr,  X86::VMOVDQAmr  },
+  { X86::VMOVAPSrm,  X86::VMOVAPDrm,  X86::VMOVDQArm  },
+  { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
+  { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
+  { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+  { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
+  { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
+  { X86::VANDPSrm,   X86::VANDPDrm,   X86::VPANDrm    },
+  { X86::VANDPSrr,   X86::VANDPDrr,   X86::VPANDrr    },
+  { X86::VORPSrm,    X86::VORPDrm,    X86::VPORrm     },
+  { X86::VORPSrr,    X86::VORPDrr,    X86::VPORrr     },
+  { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+  { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
+  { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
@@ -3024,12 +3152,47 @@
   NopInst.setOpcode(X86::NOOP);
 }
 
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  switch (DefMI->getOpcode()) {
+  default: return false;
+  case X86::DIVSDrm:
+  case X86::DIVSDrm_Int:
+  case X86::DIVSDrr:
+  case X86::DIVSDrr_Int:
+  case X86::DIVSSrm:
+  case X86::DIVSSrm_Int:
+  case X86::DIVSSrr:
+  case X86::DIVSSrr_Int:
+  case X86::SQRTPDm:
+  case X86::SQRTPDm_Int:
+  case X86::SQRTPDr:
+  case X86::SQRTPDr_Int:
+  case X86::SQRTPSm:
+  case X86::SQRTPSm_Int:
+  case X86::SQRTPSr:
+  case X86::SQRTPSr_Int:
+  case X86::SQRTSDm:
+  case X86::SQRTSDm_Int:
+  case X86::SQRTSDr:
+  case X86::SQRTSDr_Int:
+  case X86::SQRTSSm:
+  case X86::SQRTSSm_Int:
+  case X86::SQRTSSr:
+  case X86::SQRTSSr_Int:
+    return true;
+  }
+}
+
 namespace {
   /// CGBR - Create Global Base Reg pass. This initializes the PIC
   /// global base register for x86-32.
   struct CGBR : public MachineFunctionPass {
     static char ID;
-    CGBR() : MachineFunctionPass(&ID) {}
+    CGBR() : MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       const X86TargetMachine *TM =
@@ -3042,6 +3205,13 @@
       if (TM->getRelocationModel() != Reloc::PIC_)
         return false;
 
+      X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+      // If we didn't need a GlobalBaseReg, don't insert code.
+      if (GlobalBaseReg == 0)
+        return false;
+
       // Insert the set of GlobalBaseReg into the first MBB of the function
       MachineBasicBlock &FirstMBB = MF.front();
       MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -3053,7 +3223,7 @@
       if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
         PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
       else
-        PC = TII->getGlobalBaseReg(&MF);
+        PC = GlobalBaseReg;
   
       // Operand of MovePCtoStack is completely ignored by asm printer. It's
       // only used in JIT code emission as displacement to pc.
@@ -3062,7 +3232,6 @@
       // If we're using vanilla 'GOT' PIC style, we should use relative addressing
       // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
       if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
-        unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
         // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
         BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
           .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",

Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.h Tue Oct 26 19:48:03 2010
@@ -312,6 +312,17 @@
     MRM_F8 = 41,
     MRM_F9 = 42,
 
+    /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+    /// immediates, the first of which is a 16-bit immediate (specified by
+    /// the imm encoding) and the second is a 8-bit fixed value.
+    RawFrmImm8 = 43,
+    
+    /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+    /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+    /// the imm encoding) and the second is a 16-bit fixed value.  In the AMD
+    /// manual, this operand is described as pntr16:32 and pntr16:16
+    RawFrmImm16 = 44,
+
     FormMask       = 63,
 
     //===------------------------------------------------------------------===//
@@ -438,28 +449,36 @@
     OpcodeMask    = 0xFF << OpcodeShift,
 
     //===------------------------------------------------------------------===//
-    // VEX - The opcode prefix used by AVX instructions
-    VEX         = 1ULL << 32,
+    /// VEX - The opcode prefix used by AVX instructions
+    VEX         = 1U << 0,
 
-    // VEX_W - Has a opcode specific functionality, but is used in the same
-    // way as REX_W is for regular SSE instructions.
-    VEX_W       = 1ULL << 33,
-
-    // VEX_4V - Used to specify an additional AVX/SSE register. Several 2
-    // address instructions in SSE are represented as 3 address ones in AVX
-    // and the additional register is encoded in VEX_VVVV prefix.
-    VEX_4V      = 1ULL << 34,
-
-    // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
-    // must be encoded in the i8 immediate field. This usually happens in
-    // instructions with 4 operands.
-    VEX_I8IMM   = 1ULL << 35,
-
-    // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
-    // instruction uses 256-bit wide registers. This is usually auto detected if
-    // a VR256 register is used, but some AVX instructions also have this field
-    // marked when using a f256 memory references.
-    VEX_L       = 1ULL << 36
+    /// VEX_W - Has a opcode specific functionality, but is used in the same
+    /// way as REX_W is for regular SSE instructions.
+    VEX_W       = 1U << 1,
+
+    /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+    /// address instructions in SSE are represented as 3 address ones in AVX
+    /// and the additional register is encoded in VEX_VVVV prefix.
+    VEX_4V      = 1U << 2,
+
+    /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+    /// must be encoded in the i8 immediate field. This usually happens in
+    /// instructions with 4 operands.
+    VEX_I8IMM   = 1U << 3,
+
+    /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+    /// instruction uses 256-bit wide registers. This is usually auto detected
+    /// if a VR256 register is used, but some AVX instructions also have this
+    /// field marked when using a f256 memory references.
+    VEX_L       = 1U << 4,
+    
+    /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+    /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
+    /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+    /// storing a classifier in the imm8 field.  To simplify our implementation,
+    /// we handle this by storeing the classifier in the opcode field and using
+    /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+    Has3DNow0F0FOpcode = 1U << 5
   };
   
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -522,11 +541,13 @@
     case X86II::AddRegFrm:
     case X86II::MRMDestReg:
     case X86II::MRMSrcReg:
+    case X86II::RawFrmImm8:
+    case X86II::RawFrmImm16:
        return -1;
     case X86II::MRMDestMem:
       return 0;
     case X86II::MRMSrcMem: {
-      bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+      bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
       unsigned FirstMemOp = 1;
       if (HasVEX_4V)
         ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
@@ -592,14 +613,14 @@
   /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
   /// RegOp2MemOpTable2 - Load / store folding opcode maps.
   ///
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
   
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
-  DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+  DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
 
 public:
   explicit X86InstrInfo(X86TargetMachine &tm);
@@ -838,18 +859,23 @@
   /// SetSSEDomain - Set the SSEDomain of MI.
   void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
 
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      unsigned OpNum,
+                                      const SmallVectorImpl<MachineOperand> &MOs,
+                                      unsigned Size, unsigned Alignment) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+  
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
 
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                     MachineInstr* MI,
-                                     unsigned OpNum,
-                                     const SmallVectorImpl<MachineOperand> &MOs,
-                                     unsigned Size, unsigned Alignment) const;
-
   /// isFrameOperand - Return true and the FrameIndex if the specified
   /// operand and follow operands form a reference to the stack frame.
   bool isFrameOperand(const MachineInstr *MI, unsigned int Op,





More information about the llvm-branch-commits mailing list