[llvm-commits] Using tabs to make assembly output slightly prettier

Dan Gohman djg at cray.com
Mon Jul 30 12:14:41 PDT 2007


Attached is a patch that changes the x86 assembly output to use tab
characters to separate the mnemonics from their operands instead of
single spaces. This makes the assembly output a little more consistent
with various other compilers (f.e. GCC), and slightly easier to read.

Any objections?

Dan

-- 
Dan Gohman, Cray Inc.
-------------- next part --------------
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td	(revision 40587)
+++ lib/Target/X86/X86InstrSSE.td	(working copy)
@@ -293,56 +293,56 @@
 
 // Move Instructions
 def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                  "movss {$src, $dst|$dst, $src}", []>;
+                  "movss\t{$src, $dst|$dst, $src}", []>;
 def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                  "movss {$src, $dst|$dst, $src}",
+                  "movss\t{$src, $dst|$dst, $src}",
                   [(set FR32:$dst, (loadf32 addr:$src))]>;
 def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
-                  "movss {$src, $dst|$dst, $src}",
+                  "movss\t{$src, $dst|$dst, $src}",
                   [(store FR32:$src, addr:$dst)]>;
 
 // Conversion instructions
 def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
-                      "cvttss2si {$src, $dst|$dst, $src}",
+                      "cvttss2si\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
 def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                      "cvttss2si {$src, $dst|$dst, $src}",
+                      "cvttss2si\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
 def CVTSI2SSrr  : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
-                      "cvtsi2ss {$src, $dst|$dst, $src}",
+                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
 def CVTSI2SSrm  : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
-                      "cvtsi2ss {$src, $dst|$dst, $src}",
+                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
 
 // Match intrinsics which expect XMM operand(s).
 def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                         "cvtss2si {$src, $dst|$dst, $src}",
+                         "cvtss2si\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
 def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                         "cvtss2si {$src, $dst|$dst, $src}",
+                         "cvtss2si\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst, (int_x86_sse_cvtss2si
                                            (load addr:$src)))]>;
 
 // Aliases for intrinsics
 def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                          "cvttss2si {$src, $dst|$dst, $src}",
+                          "cvttss2si\t{$src, $dst|$dst, $src}",
                           [(set GR32:$dst,
                             (int_x86_sse_cvttss2si VR128:$src))]>;
 def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
-                          "cvttss2si {$src, $dst|$dst, $src}",
+                          "cvttss2si\t{$src, $dst|$dst, $src}",
                           [(set GR32:$dst,
                             (int_x86_sse_cvttss2si(load addr:$src)))]>;
 
 let isTwoAddress = 1 in {
   def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
                            (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
-                           "cvtsi2ss {$src2, $dst|$dst, $src2}",
+                           "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
                            [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
                                               GR32:$src2))]>;
   def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
                            (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
-                           "cvtsi2ss {$src2, $dst|$dst, $src2}",
+                           "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
                            [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
                                               (loadi32 addr:$src2)))]>;
 }
@@ -351,45 +351,45 @@
 let isTwoAddress = 1 in {
   def CMPSSrr : SSI<0xC2, MRMSrcReg, 
                     (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
-                    "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
+                    "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
   def CMPSSrm : SSI<0xC2, MRMSrcMem, 
                     (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
-                    "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
+                    "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
 }
 
 def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
-                   "ucomiss {$src2, $src1|$src1, $src2}",
+                   "ucomiss\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp FR32:$src1, FR32:$src2)]>;
 def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
-                   "ucomiss {$src2, $src1|$src1, $src2}",
+                   "ucomiss\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
 
 // Aliases to match intrinsics which expect XMM operand(s).
 let isTwoAddress = 1 in {
   def Int_CMPSSrr : SSI<0xC2, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                        "cmp${cc}ss {$src, $dst|$dst, $src}",
+                        "cmp${cc}ss\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
                                            VR128:$src, imm:$cc))]>;
   def Int_CMPSSrm : SSI<0xC2, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
-                        "cmp${cc}ss {$src, $dst|$dst, $src}",
+                        "cmp${cc}ss\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
                                            (load addr:$src), imm:$cc))]>;
 }
 
 def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                       "ucomiss {$src2, $src1|$src1, $src2}",
+                       "ucomiss\t{$src2, $src1|$src1, $src2}",
                        [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>;
 def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                       "ucomiss {$src2, $src1|$src1, $src2}",
+                       "ucomiss\t{$src2, $src1|$src1, $src2}",
                        [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2))]>;
 
 def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                      "comiss {$src2, $src1|$src1, $src2}",
+                      "comiss\t{$src2, $src1|$src1, $src2}",
                       [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>;
 def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                      "comiss {$src2, $src1|$src1, $src2}",
+                      "comiss\t{$src2, $src1|$src1, $src2}",
                       [(X86comi (v4f32 VR128:$src1), (load addr:$src2))]>;
 
 // Aliases of packed SSE1 instructions for scalar use. These all have names that
@@ -397,53 +397,53 @@
 
 // Alias instructions that map fld0 to pxor for sse.
 def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
-                 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+                 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
                Requires<[HasSSE1]>, TB, OpSize;
 
 // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
 // disregarded.
 def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                     "movaps {$src, $dst|$dst, $src}", []>;
+                     "movaps\t{$src, $dst|$dst, $src}", []>;
 
 // Alias instruction to load FR32 from f128mem using movaps. Upper bits are
 // disregarded.
 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
-                     "movaps {$src, $dst|$dst, $src}",
+                     "movaps\t{$src, $dst|$dst, $src}",
                      [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
 
 // Alias bitwise logical operations using SSE logical ops on packed FP values.
 let isTwoAddress = 1 in {
 let isCommutable = 1 in {
   def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                      "andps {$src2, $dst|$dst, $src2}",
+                      "andps\t{$src2, $dst|$dst, $src2}",
                       [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
   def FsORPSrr  : PSI<0x56, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                      "orps {$src2, $dst|$dst, $src2}",
+                      "orps\t{$src2, $dst|$dst, $src2}",
                       [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
   def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                      "xorps {$src2, $dst|$dst, $src2}",
+                      "xorps\t{$src2, $dst|$dst, $src2}",
                       [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
 }
 
 def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
-                    "andps {$src2, $dst|$dst, $src2}",
+                    "andps\t{$src2, $dst|$dst, $src2}",
                     [(set FR32:$dst, (X86fand FR32:$src1,
                                       (memopfsf32 addr:$src2)))]>;
 def FsORPSrm  : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
-                    "orps {$src2, $dst|$dst, $src2}",
+                    "orps\t{$src2, $dst|$dst, $src2}",
                     [(set FR32:$dst, (X86for FR32:$src1,
                                       (memopfsf32 addr:$src2)))]>;
 def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
-                    "xorps {$src2, $dst|$dst, $src2}",
+                    "xorps\t{$src2, $dst|$dst, $src2}",
                     [(set FR32:$dst, (X86fxor FR32:$src1,
                                       (memopfsf32 addr:$src2)))]>;
 
 def FsANDNPSrr : PSI<0x55, MRMSrcReg,
                      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                     "andnps {$src2, $dst|$dst, $src2}", []>;
+                     "andnps\t{$src2, $dst|$dst, $src2}", []>;
 def FsANDNPSrm : PSI<0x55, MRMSrcMem,
                      (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
-                     "andnps {$src2, $dst|$dst, $src2}", []>;
+                     "andnps\t{$src2, $dst|$dst, $src2}", []>;
 }
 
 /// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
@@ -462,38 +462,38 @@
                                   bit Commutable = 0> {
   // Scalar operation, reg+reg.
   def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                  [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Scalar operation, reg+mem.
   def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
-                 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                  [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
                  
   // Vector operation, reg+reg.
   def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
 
   // Vector operation, reg+mem.
   def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
                  [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
 
   // Intrinsic operation, reg+reg.
   def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Intrinsic operation, reg+mem.
   def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
-                     !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F32Int VR128:$src1,
                                                sse_load_f32:$src2))]>;
 }
@@ -523,51 +523,51 @@
 
   // Scalar operation, reg+reg.
   def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                  [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Scalar operation, reg+mem.
   def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
-                 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                  [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
                  
   // Vector operation, reg+reg.
   def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
 
   // Vector operation, reg+mem.
   def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
                  [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
 
   // Intrinsic operation, reg+reg.
   def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Intrinsic operation, reg+mem.
   def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
-                     !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F32Int VR128:$src1,
                                                sse_load_f32:$src2))]>;
 
   // Vector intrinsic operation, reg+reg.
   def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Vector intrinsic operation, reg+mem.
   def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
-                     !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
 }
 }
@@ -582,44 +582,44 @@
 
 // Move Instructions
 def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movaps {$src, $dst|$dst, $src}", []>;
+                   "movaps\t{$src, $dst|$dst, $src}", []>;
 def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movaps {$src, $dst|$dst, $src}",
+                   "movaps\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
 
 def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movaps {$src, $dst|$dst, $src}",
+                   "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
 
 def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movups {$src, $dst|$dst, $src}", []>;
+                   "movups\t{$src, $dst|$dst, $src}", []>;
 def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movups {$src, $dst|$dst, $src}",
+                   "movups\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (loadv4f32 addr:$src))]>;
 def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movups {$src, $dst|$dst, $src}",
+                   "movups\t{$src, $dst|$dst, $src}",
                    [(store (v4f32 VR128:$src), addr:$dst)]>;
 
 // Intrinsic forms of MOVUPS load and store
 def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "movups {$src, $dst|$dst, $src}",
+                       "movups\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
 def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                       "movups {$src, $dst|$dst, $src}",
+                       "movups\t{$src, $dst|$dst, $src}",
                        [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
 
 let isTwoAddress = 1 in {
   let AddedComplexity = 20 in {
     def MOVLPSrm : PSI<0x12, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movlps {$src2, $dst|$dst, $src2}",
+                       "movlps\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst, 
                          (v4f32 (vector_shuffle VR128:$src1,
                          (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
                                  MOVLP_shuffle_mask)))]>;
     def MOVHPSrm : PSI<0x16, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movhps {$src2, $dst|$dst, $src2}",
+                       "movhps\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst, 
                          (v4f32 (vector_shuffle VR128:$src1,
                          (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
@@ -628,14 +628,14 @@
 } // isTwoAddress
 
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movlps {$src, $dst|$dst, $src}",
+                   "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
                                  (iPTR 0))), addr:$dst)]>;
 
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
 def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movhps {$src, $dst|$dst, $src}",
+                   "movhps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
                                  (v2f64 (vector_shuffle
                                          (bc_v2f64 (v4f32 VR128:$src)), (undef),
@@ -645,13 +645,13 @@
 let isTwoAddress = 1 in {
 let AddedComplexity = 15 in {
 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "movlhps {$src2, $dst|$dst, $src2}",
+                    "movlhps\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
                               MOVHP_shuffle_mask)))]>;
 
 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "movhlps {$src2, $dst|$dst, $src2}",
+                    "movhlps\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
                               MOVHLPS_shuffle_mask)))]>;
@@ -681,50 +681,50 @@
                            bit Commutable = 0> {
   // Scalar operation, reg.
   def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode FR32:$src))]> {
     let isCommutable = Commutable;
   }
 
   // Scalar operation, mem.
   def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
                  
   // Vector operation, reg.
   def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-              !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
               [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
     let isCommutable = Commutable;
   }
 
   // Vector operation, mem.
   def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
 
   // Intrinsic operation, reg.
   def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (F32Int VR128:$src))]> {
     let isCommutable = Commutable;
   }
 
   // Intrinsic operation, mem.
   def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
-                    !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
 
   // Vector intrinsic operation, reg
   def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V4F32Int VR128:$src))]> {
     let isCommutable = Commutable;
   }
 
   // Vector intrinsic operation, mem
   def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
-                    !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
 }
 
@@ -744,46 +744,46 @@
   let isCommutable = 1 in {
     def ANDPSrr : PSI<0x54, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "andps {$src2, $dst|$dst, $src2}",
+                      "andps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst, (v2i64
                                          (and VR128:$src1, VR128:$src2)))]>;
     def ORPSrr  : PSI<0x56, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "orps {$src2, $dst|$dst, $src2}",
+                      "orps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst, (v2i64
                                          (or VR128:$src1, VR128:$src2)))]>;
     def XORPSrr : PSI<0x57, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "xorps {$src2, $dst|$dst, $src2}",
+                      "xorps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst, (v2i64
                                          (xor VR128:$src1, VR128:$src2)))]>;
   }
 
   def ANDPSrm : PSI<0x54, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                    "andps {$src2, $dst|$dst, $src2}",
+                    "andps\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
                                        (memopv2i64 addr:$src2)))]>;
   def ORPSrm  : PSI<0x56, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                    "orps {$src2, $dst|$dst, $src2}",
+                    "orps\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
                                        (memopv2i64 addr:$src2)))]>;
   def XORPSrm : PSI<0x57, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                    "xorps {$src2, $dst|$dst, $src2}",
+                    "xorps\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
                                        (memopv2i64 addr:$src2)))]>;
   def ANDNPSrr : PSI<0x55, MRMSrcReg,
                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     "andnps {$src2, $dst|$dst, $src2}",
+                     "andnps\t{$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v2i64 (and (xor VR128:$src1,
                                     (bc_v2i64 (v4i32 immAllOnesV))),
                                VR128:$src2)))]>;
   def ANDNPSrm : PSI<0x55, MRMSrcMem,
                      (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
-                     "andnps {$src2, $dst|$dst, $src2}",
+                     "andnps\t{$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
                                     (bc_v2i64 (v4i32 immAllOnesV))),
@@ -793,12 +793,12 @@
 let isTwoAddress = 1 in {
   def CMPPSrri : PSIi8<0xC2, MRMSrcReg, 
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                      "cmp${cc}ps {$src, $dst|$dst, $src}",
+                      "cmp${cc}ps\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                          VR128:$src, imm:$cc))]>;
   def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, 
                       (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
-                      "cmp${cc}ps {$src, $dst|$dst, $src}",
+                      "cmp${cc}ps\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
                                          (load addr:$src), imm:$cc))]>;
 }
@@ -809,7 +809,7 @@
     def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, 
                           (outs VR128:$dst), (ins VR128:$src1,
                            VR128:$src2, i32i8imm:$src3),
-                          "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
+                          "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                           [(set VR128:$dst,
                             (v4f32 (vector_shuffle
                                     VR128:$src1, VR128:$src2,
@@ -817,7 +817,7 @@
   def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1,
                          f128mem:$src2, i32i8imm:$src3),
-                        "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
+                        "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                         [(set VR128:$dst,
                           (v4f32 (vector_shuffle
                                   VR128:$src1, (load addr:$src2),
@@ -826,14 +826,14 @@
   let AddedComplexity = 10 in {
     def UNPCKHPSrr : PSI<0x15, MRMSrcReg, 
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpckhps {$src2, $dst|$dst, $src2}",
+                         "unpckhps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (vector_shuffle
                                    VR128:$src1, VR128:$src2,
                                    UNPCKH_shuffle_mask)))]>;
     def UNPCKHPSrm : PSI<0x15, MRMSrcMem, 
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpckhps {$src2, $dst|$dst, $src2}",
+                         "unpckhps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (vector_shuffle
                                    VR128:$src1, (load addr:$src2),
@@ -841,14 +841,14 @@
 
     def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpcklps {$src2, $dst|$dst, $src2}",
+                         "unpcklps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (vector_shuffle
                                    VR128:$src1, VR128:$src2,
                                    UNPCKL_shuffle_mask)))]>;
     def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpcklps {$src2, $dst|$dst, $src2}",
+                         "unpcklps\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v4f32 (vector_shuffle
                                    VR128:$src1, (load addr:$src2),
@@ -858,22 +858,22 @@
 
 // Mask creation
 def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                     "movmskps {$src, $dst|$dst, $src}",
+                     "movmskps\t{$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
 def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                     "movmskpd {$src, $dst|$dst, $src}",
+                     "movmskpd\t{$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
 
 // Prefetching loads.
 // TODO: no intrinsics for these?
-def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0 $src", []>;
-def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1 $src", []>;
-def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2 $src", []>;
-def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta $src", []>;
+def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", []>;
+def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", []>;
+def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", []>;
+def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", []>;
 
 // Non-temporal stores
 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                    "movntps {$src, $dst|$dst, $src}",
+                    "movntps\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
 
 // Load, store, and memory fence
@@ -881,24 +881,24 @@
 
 // MXCSR register
 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
-                  "ldmxcsr $src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
 def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
-                  "stmxcsr $dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
 
 // Alias instructions that map zero vector to pxor / xorp* for sse.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let isReMaterializable = 1 in
 def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
-                 "xorps $dst, $dst",
+                 "xorps\t$dst, $dst",
                  [(set VR128:$dst, (v4f32 immAllZerosV))]>;
 
 // FR32 to 128-bit vector conversion.
 def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
-                      "movss {$src, $dst|$dst, $src}",
+                      "movss\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4f32 (scalar_to_vector FR32:$src)))]>;
 def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
-                     "movss {$src, $dst|$dst, $src}",
+                     "movss\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
 
@@ -908,11 +908,11 @@
 // def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
 //           (f32 FR32:$src)>;
 def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
-                     "movss {$src, $dst|$dst, $src}",
+                     "movss\t{$src, $dst|$dst, $src}",
                      [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
                                        (iPTR 0)))]>;
 def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
-                     "movss {$src, $dst|$dst, $src}",
+                     "movss\t{$src, $dst|$dst, $src}",
                      [(store (f32 (vector_extract (v4f32 VR128:$src),
                                    (iPTR 0))), addr:$dst)]>;
 
@@ -922,12 +922,12 @@
 let isTwoAddress = 1 in {
   def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
-                        "movss {$src2, $dst|$dst, $src2}", []>;
+                        "movss\t{$src2, $dst|$dst, $src2}", []>;
 
   let AddedComplexity = 15 in
     def MOVLPSrr : SSI<0x10, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                       "movss {$src2, $dst|$dst, $src2}",
+                       "movss\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst,
                          (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
                                  MOVL_shuffle_mask)))]>;
@@ -937,7 +937,7 @@
 // Loading from memory automatically zeroing upper bits.
 let AddedComplexity = 20 in
 def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
-                      "movss {$src, $dst|$dst, $src}",
+                      "movss\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
                                  (v4f32 (scalar_to_vector (loadf32 addr:$src))),
                                                 MOVL_shuffle_mask)))]>;
@@ -963,60 +963,60 @@
 
 // Move Instructions
 def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                  "movsd {$src, $dst|$dst, $src}", []>;
+                  "movsd\t{$src, $dst|$dst, $src}", []>;
 def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
-                  "movsd {$src, $dst|$dst, $src}",
+                  "movsd\t{$src, $dst|$dst, $src}",
                   [(set FR64:$dst, (loadf64 addr:$src))]>;
 def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
-                  "movsd {$src, $dst|$dst, $src}",
+                  "movsd\t{$src, $dst|$dst, $src}",
                   [(store FR64:$src, addr:$dst)]>;
 
 // Conversion instructions
 def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
-                      "cvttsd2si {$src, $dst|$dst, $src}",
+                      "cvttsd2si\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
 def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
-                      "cvttsd2si {$src, $dst|$dst, $src}",
+                      "cvttsd2si\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
-                      "cvtsd2ss {$src, $dst|$dst, $src}",
+                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround FR64:$src))]>;
 def CVTSD2SSrm  : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 
-                      "cvtsd2ss {$src, $dst|$dst, $src}",
+                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
 def CVTSI2SDrr  : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
-                      "cvtsi2sd {$src, $dst|$dst, $src}",
+                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
                       [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
 def CVTSI2SDrm  : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
-                      "cvtsi2sd {$src, $dst|$dst, $src}",
+                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
                       [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
 
 // SSE2 instructions with XS prefix
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
-                   "cvtss2sd {$src, $dst|$dst, $src}",
+                   "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (fextend FR32:$src))]>, XS,
                  Requires<[HasSSE2]>;
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
-                   "cvtss2sd {$src, $dst|$dst, $src}",
+                   "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
                  Requires<[HasSSE2]>;
 
 // Match intrinsics which expect XMM operand(s).
 def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                         "cvtsd2si {$src, $dst|$dst, $src}",
+                         "cvtsd2si\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
 def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
-                         "cvtsd2si {$src, $dst|$dst, $src}",
+                         "cvtsd2si\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst, (int_x86_sse2_cvtsd2si
                                            (load addr:$src)))]>;
 
 // Aliases for intrinsics
 def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                          "cvttsd2si {$src, $dst|$dst, $src}",
+                          "cvttsd2si\t{$src, $dst|$dst, $src}",
                           [(set GR32:$dst,
                             (int_x86_sse2_cvttsd2si VR128:$src))]>;
 def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
-                          "cvttsd2si {$src, $dst|$dst, $src}",
+                          "cvttsd2si\t{$src, $dst|$dst, $src}",
                           [(set GR32:$dst, (int_x86_sse2_cvttsd2si
                                             (load addr:$src)))]>;
 
@@ -1024,45 +1024,45 @@
 let isTwoAddress = 1 in {
   def CMPSDrr : SDI<0xC2, MRMSrcReg, 
                     (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
-                    "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
+                    "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
   def CMPSDrm : SDI<0xC2, MRMSrcMem, 
                     (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
-                    "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
+                    "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
 }
 
 def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
-                   "ucomisd {$src2, $src1|$src1, $src2}",
+                   "ucomisd\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp FR64:$src1, FR64:$src2)]>;
 def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
-                   "ucomisd {$src2, $src1|$src1, $src2}",
+                   "ucomisd\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
 
 // Aliases to match intrinsics which expect XMM operand(s).
 let isTwoAddress = 1 in {
   def Int_CMPSDrr : SDI<0xC2, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                        "cmp${cc}sd {$src, $dst|$dst, $src}",
+                        "cmp${cc}sd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
                                            VR128:$src, imm:$cc))]>;
   def Int_CMPSDrm : SDI<0xC2, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
-                        "cmp${cc}sd {$src, $dst|$dst, $src}",
+                        "cmp${cc}sd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
                                            (load addr:$src), imm:$cc))]>;
 }
 
 def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                       "ucomisd {$src2, $src1|$src1, $src2}",
+                       "ucomisd\t{$src2, $src1|$src1, $src2}",
                        [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
 def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                       "ucomisd {$src2, $src1|$src1, $src2}",
+                       "ucomisd\t{$src2, $src1|$src1, $src2}",
                        [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2))]>;
 
 def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-                      "comisd {$src2, $src1|$src1, $src2}",
+                      "comisd\t{$src2, $src1|$src1, $src2}",
                       [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
 def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-                      "comisd {$src2, $src1|$src1, $src2}",
+                      "comisd\t{$src2, $src1|$src1, $src2}",
                       [(X86comi (v2f64 VR128:$src1), (load addr:$src2))]>;
 
 // Aliases of packed SSE2 instructions for scalar use. These all have names that
@@ -1070,53 +1070,53 @@
 
 // Alias instructions that map fld0 to pxor for sse.
 def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
-                 "pxor $dst, $dst", [(set FR64:$dst, fpimm0)]>,
+                 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
                Requires<[HasSSE2]>, TB, OpSize;
 
 // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
 // disregarded.
 def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                     "movapd {$src, $dst|$dst, $src}", []>;
+                     "movapd\t{$src, $dst|$dst, $src}", []>;
 
 // Alias instruction to load FR64 from f128mem using movapd. Upper bits are
 // disregarded.
 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
-                     "movapd {$src, $dst|$dst, $src}",
+                     "movapd\t{$src, $dst|$dst, $src}",
                      [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
 
 // Alias bitwise logical operations using SSE logical ops on packed FP values.
 let isTwoAddress = 1 in {
 let isCommutable = 1 in {
   def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                      "andpd {$src2, $dst|$dst, $src2}",
+                      "andpd\t{$src2, $dst|$dst, $src2}",
                       [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
   def FsORPDrr  : PDI<0x56, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                      "orpd {$src2, $dst|$dst, $src2}",
+                      "orpd\t{$src2, $dst|$dst, $src2}",
                       [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
   def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                      "xorpd {$src2, $dst|$dst, $src2}",
+                      "xorpd\t{$src2, $dst|$dst, $src2}",
                       [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
 }
 
 def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
-                    "andpd {$src2, $dst|$dst, $src2}",
+                    "andpd\t{$src2, $dst|$dst, $src2}",
                     [(set FR64:$dst, (X86fand FR64:$src1,
                                       (memopfsf64 addr:$src2)))]>;
 def FsORPDrm  : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
-                    "orpd {$src2, $dst|$dst, $src2}",
+                    "orpd\t{$src2, $dst|$dst, $src2}",
                     [(set FR64:$dst, (X86for FR64:$src1,
                                       (memopfsf64 addr:$src2)))]>;
 def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
-                    "xorpd {$src2, $dst|$dst, $src2}",
+                    "xorpd\t{$src2, $dst|$dst, $src2}",
                     [(set FR64:$dst, (X86fxor FR64:$src1,
                                       (memopfsf64 addr:$src2)))]>;
 
 def FsANDNPDrr : PDI<0x55, MRMSrcReg,
                      (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                     "andnpd {$src2, $dst|$dst, $src2}", []>;
+                     "andnpd\t{$src2, $dst|$dst, $src2}", []>;
 def FsANDNPDrm : PDI<0x55, MRMSrcMem,
                      (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
-                     "andnpd {$src2, $dst|$dst, $src2}", []>;
+                     "andnpd\t{$src2, $dst|$dst, $src2}", []>;
 }
 
 /// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
@@ -1135,38 +1135,38 @@
                                   bit Commutable = 0> {
   // Scalar operation, reg+reg.
   def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                  [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Scalar operation, reg+mem.
   def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
-                 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                  [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
                  
   // Vector operation, reg+reg.
   def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
 
   // Vector operation, reg+mem.
   def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
                  [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
 
   // Intrinsic operation, reg+reg.
   def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Intrinsic operation, reg+mem.
   def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
-                     !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F64Int VR128:$src1,
                                                sse_load_f64:$src2))]>;
 }
@@ -1196,51 +1196,51 @@
 
   // Scalar operation, reg+reg.
   def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-                 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                  [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Scalar operation, reg+mem.
   def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
-                 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                  [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
                  
   // Vector operation, reg+reg.
   def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
 
   // Vector operation, reg+mem.
   def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
                  [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
 
   // Intrinsic operation, reg+reg.
   def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Intrinsic operation, reg+mem.
   def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
-                     !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (F64Int VR128:$src1,
                                                sse_load_f64:$src2))]>;
 
   // Vector intrinsic operation, reg+reg.
   def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
 
   // Vector intrinsic operation, reg+mem.
   def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                     !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
                      [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
 }
 }
@@ -1255,44 +1255,44 @@
 
 // Move Instructions
 def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movapd {$src, $dst|$dst, $src}", []>;
+                   "movapd\t{$src, $dst|$dst, $src}", []>;
 def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movapd {$src, $dst|$dst, $src}",
+                   "movapd\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
 
 def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movapd {$src, $dst|$dst, $src}",
+                   "movapd\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
 
 def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movupd {$src, $dst|$dst, $src}", []>;
+                   "movupd\t{$src, $dst|$dst, $src}", []>;
 def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                   "movupd {$src, $dst|$dst, $src}",
+                   "movupd\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (loadv2f64 addr:$src))]>;
 def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                   "movupd {$src, $dst|$dst, $src}",
+                   "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v2f64 VR128:$src), addr:$dst)]>;
 
 // Intrinsic forms of MOVUPD load and store
 def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "movupd {$src, $dst|$dst, $src}",
+                       "movupd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
 def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                       "movupd {$src, $dst|$dst, $src}",
+                       "movupd\t{$src, $dst|$dst, $src}",
                        [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
 
 let isTwoAddress = 1 in {
   let AddedComplexity = 20 in {
     def MOVLPDrm : PDI<0x12, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movlpd {$src2, $dst|$dst, $src2}",
+                       "movlpd\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst, 
                          (v2f64 (vector_shuffle VR128:$src1,
                                  (scalar_to_vector (loadf64 addr:$src2)),
                                  MOVLP_shuffle_mask)))]>;
     def MOVHPDrm : PDI<0x16, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-                       "movhpd {$src2, $dst|$dst, $src2}",
+                       "movhpd\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst, 
                          (v2f64 (vector_shuffle VR128:$src1,
                                  (scalar_to_vector (loadf64 addr:$src2)),
@@ -1301,14 +1301,14 @@
 } // isTwoAddress
 
 def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movlpd {$src, $dst|$dst, $src}",
+                   "movlpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (v2f64 VR128:$src),
                                  (iPTR 0))), addr:$dst)]>;
 
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
 def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                   "movhpd {$src, $dst|$dst, $src}",
+                   "movhpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
                                  (v2f64 (vector_shuffle VR128:$src, (undef),
                                          UNPCKH_shuffle_mask)), (iPTR 0))),
@@ -1316,79 +1316,79 @@
 
 // SSE2 instructions without OpSize prefix
 def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtdq2ps {$src, $dst|$dst, $src}",
+                       "cvtdq2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
                      TB, Requires<[HasSSE2]>;
 def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                       "cvtdq2ps {$src, $dst|$dst, $src}",
+                       "cvtdq2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
                                          (bitconvert (memopv2i64 addr:$src))))]>,
                      TB, Requires<[HasSSE2]>;
 
 // SSE2 instructions with XS prefix
 def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtdq2pd {$src, $dst|$dst, $src}",
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
                      XS, Requires<[HasSSE2]>;
 def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                       "cvtdq2pd {$src, $dst|$dst, $src}",
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
                                           (bitconvert (memopv2i64 addr:$src))))]>,
                      XS, Requires<[HasSSE2]>;
 
 def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                         "cvtps2dq {$src, $dst|$dst, $src}",
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
 def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                         "cvtps2dq {$src, $dst|$dst, $src}",
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq
                                             (load addr:$src)))]>;
 // SSE2 packed instructions with XS prefix
 def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                        "cvttps2dq {$src, $dst|$dst, $src}",
+                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
                       XS, Requires<[HasSSE2]>;
 def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvttps2dq {$src, $dst|$dst, $src}",
+                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
                                            (load addr:$src)))]>,
                       XS, Requires<[HasSSE2]>;
 
 // SSE2 packed instructions with XD prefix
 def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtpd2dq {$src, $dst|$dst, $src}",
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
                      XD, Requires<[HasSSE2]>;
 def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtpd2dq {$src, $dst|$dst, $src}",
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
                                           (load addr:$src)))]>,
                      XD, Requires<[HasSSE2]>;
 
 def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                          "cvttpd2dq {$src, $dst|$dst, $src}",
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
 def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                          "cvttpd2dq {$src, $dst|$dst, $src}",
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
                           [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                              (load addr:$src)))]>;
 
 // SSE2 instructions without OpSize prefix
 def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtps2pd {$src, $dst|$dst, $src}",
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
                      TB, Requires<[HasSSE2]>;
 def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
-                       "cvtps2pd {$src, $dst|$dst, $src}",
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd
                                           (load addr:$src)))]>,
                      TB, Requires<[HasSSE2]>;
 
 def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                         "cvtpd2ps {$src, $dst|$dst, $src}",
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
 def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
-                         "cvtpd2ps {$src, $dst|$dst, $src}",
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
                                             (load addr:$src)))]>;
 
@@ -1397,33 +1397,33 @@
 let isTwoAddress = 1 in {
 def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
-                        "cvtsi2sd {$src2, $dst|$dst, $src2}",
+                        "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
                                            GR32:$src2))]>;
 def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
-                        "cvtsi2sd {$src2, $dst|$dst, $src2}",
+                        "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
                                            (loadi32 addr:$src2)))]>;
 def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                   "cvtsd2ss {$src2, $dst|$dst, $src2}",
+                   "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
                                       VR128:$src2))]>;
 def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 
-                   "cvtsd2ss {$src2, $dst|$dst, $src2}",
+                   "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
                    [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
                                       (load addr:$src2)))]>;
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "cvtss2sd {$src2, $dst|$dst, $src2}",
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
                                        VR128:$src2))]>, XS,
                     Requires<[HasSSE2]>;
 def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
-                    "cvtss2sd {$src2, $dst|$dst, $src2}",
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
                                        (load addr:$src2)))]>, XS,
                     Requires<[HasSSE2]>;
@@ -1450,50 +1450,50 @@
                            bit Commutable = 0> {
   // Scalar operation, reg.
   def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode FR64:$src))]> {
     let isCommutable = Commutable;
   }
 
   // Scalar operation, mem.
   def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
-                !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
                  
   // Vector operation, reg.
   def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-              !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
               [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
     let isCommutable = Commutable;
   }
 
   // Vector operation, mem.
   def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
 
   // Intrinsic operation, reg.
   def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (F64Int VR128:$src))]> {
     let isCommutable = Commutable;
   }
 
   // Intrinsic operation, mem.
   def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
-                    !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
 
   // Vector intrinsic operation, reg
   def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V2F64Int VR128:$src))]> {
     let isCommutable = Commutable;
   }
 
   // Vector intrinsic operation, mem
   def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                    !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
 }
 
@@ -1508,19 +1508,19 @@
   let isCommutable = 1 in {
     def ANDPDrr : PDI<0x54, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "andpd {$src2, $dst|$dst, $src2}",
+                      "andpd\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (and (bc_v2i64 (v2f64 VR128:$src1)),
                          (bc_v2i64 (v2f64 VR128:$src2))))]>;
     def ORPDrr  : PDI<0x56, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "orpd {$src2, $dst|$dst, $src2}",
+                      "orpd\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (or (bc_v2i64 (v2f64 VR128:$src1)),
                          (bc_v2i64 (v2f64 VR128:$src2))))]>;
     def XORPDrr : PDI<0x57, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "xorpd {$src2, $dst|$dst, $src2}",
+                      "xorpd\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (xor (bc_v2i64 (v2f64 VR128:$src1)),
                          (bc_v2i64 (v2f64 VR128:$src2))))]>;
@@ -1528,31 +1528,31 @@
 
   def ANDPDrm : PDI<0x54, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                    "andpd {$src2, $dst|$dst, $src2}",
+                    "andpd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (and (bc_v2i64 (v2f64 VR128:$src1)),
                        (memopv2i64 addr:$src2)))]>;
   def ORPDrm  : PDI<0x56, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                    "orpd {$src2, $dst|$dst, $src2}",
+                    "orpd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (or (bc_v2i64 (v2f64 VR128:$src1)),
                        (memopv2i64 addr:$src2)))]>;
   def XORPDrm : PDI<0x57, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                    "xorpd {$src2, $dst|$dst, $src2}",
+                    "xorpd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (xor (bc_v2i64 (v2f64 VR128:$src1)),
                        (memopv2i64 addr:$src2)))]>;
   def ANDNPDrr : PDI<0x55, MRMSrcReg,
                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                     "andnpd {$src2, $dst|$dst, $src2}",
+                     "andnpd\t{$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
                         (bc_v2i64 (v2f64 VR128:$src2))))]>;
   def ANDNPDrm : PDI<0x55, MRMSrcMem,
                      (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
-                     "andnpd {$src2, $dst|$dst, $src2}",
+                     "andnpd\t{$src2, $dst|$dst, $src2}",
                      [(set VR128:$dst,
                        (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
                         (memopv2i64 addr:$src2)))]>;
@@ -1561,12 +1561,12 @@
 let isTwoAddress = 1 in {
   def CMPPDrri : PDIi8<0xC2, MRMSrcReg, 
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
-                      "cmp${cc}pd {$src, $dst|$dst, $src}",
+                      "cmp${cc}pd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                          VR128:$src, imm:$cc))]>;
   def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, 
                       (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
-                      "cmp${cc}pd {$src, $dst|$dst, $src}",
+                      "cmp${cc}pd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
                                          (load addr:$src), imm:$cc))]>;
 }
@@ -1575,14 +1575,14 @@
 let isTwoAddress = 1 in {
   def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
-                        "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
+                        "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                         [(set VR128:$dst, (v2f64 (vector_shuffle
                                                   VR128:$src1, VR128:$src2,
                                                   SHUFP_shuffle_mask:$src3)))]>;
   def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1,
                          f128mem:$src2, i8imm:$src3),
-                        "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
+                        "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                         [(set VR128:$dst,
                           (v2f64 (vector_shuffle
                                   VR128:$src1, (load addr:$src2),
@@ -1591,14 +1591,14 @@
   let AddedComplexity = 10 in {
     def UNPCKHPDrr : PDI<0x15, MRMSrcReg, 
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpckhpd {$src2, $dst|$dst, $src2}",
+                         "unpckhpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (vector_shuffle
                                    VR128:$src1, VR128:$src2,
                                    UNPCKH_shuffle_mask)))]>;
     def UNPCKHPDrm : PDI<0x15, MRMSrcMem, 
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpckhpd {$src2, $dst|$dst, $src2}",
+                         "unpckhpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (vector_shuffle
                                    VR128:$src1, (load addr:$src2),
@@ -1606,14 +1606,14 @@
 
     def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "unpcklpd {$src2, $dst|$dst, $src2}",
+                         "unpcklpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (vector_shuffle
                                    VR128:$src1, VR128:$src2,
                                    UNPCKL_shuffle_mask)))]>;
     def UNPCKLPDrm : PDI<0x14, MRMSrcMem, 
                          (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                         "unpcklpd {$src2, $dst|$dst, $src2}",
+                         "unpcklpd\t{$src2, $dst|$dst, $src2}",
                          [(set VR128:$dst,
                            (v2f64 (vector_shuffle
                                    VR128:$src1, (load addr:$src2),
@@ -1627,29 +1627,29 @@
 
 // Move Instructions
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movdqa {$src, $dst|$dst, $src}", []>;
+                   "movdqa\t{$src, $dst|$dst, $src}", []>;
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                   "movdqa {$src, $dst|$dst, $src}",
+                   "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
 def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                   "movdqa {$src, $dst|$dst, $src}",
+                   "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
 def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                   "movdqu {$src, $dst|$dst, $src}",
+                   "movdqu\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
                  XS, Requires<[HasSSE2]>;
 def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                   "movdqu {$src, $dst|$dst, $src}",
+                   "movdqu\t{$src, $dst|$dst, $src}",
                    [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
                  XS, Requires<[HasSSE2]>;
 
 // Intrinsic forms of MOVDQU load and store
 def MOVDQUrm_Int :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                       "movdqu {$src, $dst|$dst, $src}",
+                       "movdqu\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
                  XS, Requires<[HasSSE2]>;
 def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                       "movdqu {$src, $dst|$dst, $src}",
+                       "movdqu\t{$src, $dst|$dst, $src}",
                        [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
                      XS, Requires<[HasSSE2]>;
 
@@ -1658,12 +1658,12 @@
 multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                             bit Commutable = 0> {
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
     let isCommutable = Commutable;
   }
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1,
                                         (bitconvert (memopv2i64 addr:$src2))))]>;
 }
@@ -1671,14 +1671,14 @@
 multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                              string OpcodeStr, Intrinsic IntId> {
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1,
                                         (bitconvert (memopv2i64 addr:$src2))))]>;
   def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (IntId VR128:$src1,
                                         (scalar_to_vector (i32 imm:$src2))))]>;
 }
@@ -1688,12 +1688,12 @@
 multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                         ValueType OpVT, bit Commutable = 0> {
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
                                        (bitconvert (memopv2i64 addr:$src2)))))]>;
 }
@@ -1706,12 +1706,12 @@
 multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               bit Commutable = 0> {
   def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
     let isCommutable = Commutable;
   }
   def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-               !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
 }
 
@@ -1774,10 +1774,10 @@
 let isTwoAddress = 1 in {
   def PSLLDQri : PDIi8<0x73, MRM7r,
                        (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                       "pslldq {$src2, $dst|$dst, $src2}", []>;
+                       "pslldq\t{$src2, $dst|$dst, $src2}", []>;
   def PSRLDQri : PDIi8<0x73, MRM3r,
                        (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                       "psrldq {$src2, $dst|$dst, $src2}", []>;
+                       "psrldq\t{$src2, $dst|$dst, $src2}", []>;
   // PSRADQri doesn't exist in SSE[1-3].
 }
 
@@ -1798,13 +1798,13 @@
 let isTwoAddress = 1 in {
   def PANDNrr : PDI<0xDF, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "pandn {$src2, $dst|$dst, $src2}",
+                    "pandn\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
                                               VR128:$src2)))]>;
 
   def PANDNrm : PDI<0xDF, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                    "pandn {$src2, $dst|$dst, $src2}",
+                    "pandn\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
                                               (load addr:$src2))))]>;
 }
@@ -1825,13 +1825,13 @@
 // Shuffle and unpack instructions
 def PSHUFDri : PDIi8<0x70, MRMSrcReg,
                      (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
-                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set VR128:$dst, (v4i32 (vector_shuffle
                                                VR128:$src1, (undef),
                                                PSHUFD_shuffle_mask:$src2)))]>;
 def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
                      (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
-                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set VR128:$dst, (v4i32 (vector_shuffle
                                                (bc_v4i32(memopv2i64 addr:$src1)),
                                                (undef),
@@ -1840,14 +1840,14 @@
 // SSE2 with ImmT == Imm8 and XS prefix.
 def PSHUFHWri : Ii8<0x70, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
-                    "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (vector_shuffle
                                               VR128:$src1, (undef),
                                               PSHUFHW_shuffle_mask:$src2)))]>,
                 XS, Requires<[HasSSE2]>;
 def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
                     (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
-                    "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (vector_shuffle
                                               (bc_v8i16 (memopv2i64 addr:$src1)),
                                               (undef),
@@ -1857,14 +1857,14 @@
 // SSE2 with ImmT == Imm8 and XD prefix.
 def PSHUFLWri : Ii8<0x70, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                    "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (vector_shuffle
                                               VR128:$src1, (undef),
                                               PSHUFLW_shuffle_mask:$src2)))]>,
                 XD, Requires<[HasSSE2]>;
 def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
                     (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
-                    "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (vector_shuffle
                                               (bc_v8i16 (memopv2i64 addr:$src1)),
                                               (undef),
@@ -1875,52 +1875,52 @@
 let isTwoAddress = 1 in {
   def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "punpcklbw {$src2, $dst|$dst, $src2}",
+                        "punpcklbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpcklbw {$src2, $dst|$dst, $src2}",
+                        "punpcklbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v16i8 (vector_shuffle VR128:$src1,
                                   (bc_v16i8 (memopv2i64 addr:$src2)),
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "punpcklwd {$src2, $dst|$dst, $src2}",
+                        "punpcklwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpcklwd {$src2, $dst|$dst, $src2}",
+                        "punpcklwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v8i16 (vector_shuffle VR128:$src1,
                                   (bc_v8i16 (memopv2i64 addr:$src2)),
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "punpckldq {$src2, $dst|$dst, $src2}",
+                        "punpckldq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckldq {$src2, $dst|$dst, $src2}",
+                        "punpckldq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v4i32 (vector_shuffle VR128:$src1,
                                   (bc_v4i32 (memopv2i64 addr:$src2)),
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, 
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "punpcklqdq {$src2, $dst|$dst, $src2}",
+                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKL_shuffle_mask)))]>;
   def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, 
                          (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                         "punpcklqdq {$src2, $dst|$dst, $src2}",
+                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (vector_shuffle VR128:$src1,
                                   (memopv2i64 addr:$src2),
@@ -1928,52 +1928,52 @@
   
   def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "punpckhbw {$src2, $dst|$dst, $src2}",
+                        "punpckhbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckhbw {$src2, $dst|$dst, $src2}",
+                        "punpckhbw\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v16i8 (vector_shuffle VR128:$src1,
                                   (bc_v16i8 (memopv2i64 addr:$src2)),
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "punpckhwd {$src2, $dst|$dst, $src2}",
+                        "punpckhwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckhwd {$src2, $dst|$dst, $src2}",
+                        "punpckhwd\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v8i16 (vector_shuffle VR128:$src1,
                                   (bc_v8i16 (memopv2i64 addr:$src2)),
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, 
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "punpckhdq {$src2, $dst|$dst, $src2}",
+                        "punpckhdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckhdq {$src2, $dst|$dst, $src2}",
+                        "punpckhdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v4i32 (vector_shuffle VR128:$src1,
                                   (bc_v4i32 (memopv2i64 addr:$src2)),
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, 
                          (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "punpckhqdq {$src2, $dst|$dst, $src2}",
+                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
                                   UNPCKH_shuffle_mask)))]>;
   def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, 
                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckhqdq {$src2, $dst|$dst, $src2}",
+                        "punpckhqdq\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst,
                           (v2i64 (vector_shuffle VR128:$src1,
                                   (memopv2i64 addr:$src2),
@@ -1983,21 +1983,21 @@
 // Extract / Insert
 def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                    "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
                                      (iPTR imm:$src2)))]>;
 let isTwoAddress = 1 in {
   def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1,
                         GR32:$src2, i32i8imm:$src3),
-                       "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                        [(set VR128:$dst,
                          (v8i16 (X86pinsrw (v8i16 VR128:$src1),
                                  GR32:$src2, (iPTR imm:$src3))))]>;
   def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1,
                         i16mem:$src2, i32i8imm:$src3),
-                       "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                        [(set VR128:$dst,
                          (v8i16 (X86pinsrw (v8i16 VR128:$src1),
                                  (i32 (anyext (loadi16 addr:$src2))),
@@ -2006,30 +2006,30 @@
 
 // Mask creation
 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
-                     "pmovmskb {$src, $dst|$dst, $src}",
+                     "pmovmskb\t{$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
 
 // Conditional store
 def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
-                     "maskmovdqu {$mask, $src|$src, $mask}",
+                     "maskmovdqu\t{$mask, $src|$src, $mask}",
                      [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
                  Imp<[EDI],[]>;
 
 // Non-temporal stores
 def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                    "movntpd {$src, $dst|$dst, $src}",
+                    "movntpd\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
 def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntdq {$src, $dst|$dst, $src}",
+                    "movntdq\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
 def MOVNTImr  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                    "movnti {$src, $dst|$dst, $src}",
+                    "movnti\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, 
                   TB, Requires<[HasSSE2]>;
 
 // Flush cache
 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
-               "clflush $src", [(int_x86_sse2_clflush addr:$src)]>,
+               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
               TB, Requires<[HasSSE2]>;
 
 // Load, store, and memory fence
@@ -2043,44 +2043,44 @@
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let isReMaterializable = 1 in
   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
-                         "pcmpeqd $dst, $dst",
+                         "pcmpeqd\t$dst, $dst",
                          [(set VR128:$dst, (v2f64 immAllOnesV))]>;
 
 // FR64 to 128-bit vector conversion.
 def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
-                      "movsd {$src, $dst|$dst, $src}",
+                      "movsd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v2f64 (scalar_to_vector FR64:$src)))]>;
 def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                     "movsd {$src, $dst|$dst, $src}",
+                     "movsd\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, 
                        (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
 
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
-                      "movd {$src, $dst|$dst, $src}",
+                      "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))]>;
 def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
-                      "movd {$src, $dst|$dst, $src}",
+                      "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
 
 def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
-                      "movd {$src, $dst|$dst, $src}",
+                      "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))]>;
 
 def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
-                      "movd {$src, $dst|$dst, $src}",
+                      "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
 
 // SSE2 instructions with XS prefix
 def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                    "movq {$src, $dst|$dst, $src}",
+                    "movq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
                       (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
                   Requires<[HasSSE2]>;
 def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
-                      "movq {$src, $dst|$dst, $src}",
+                      "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)]>;
 
@@ -2090,27 +2090,27 @@
 // def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
 //           (f32 FR32:$src)>;
 def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
-                     "movsd {$src, $dst|$dst, $src}",
+                     "movsd\t{$src, $dst|$dst, $src}",
                      [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
                                        (iPTR 0)))]>;
 def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                     "movsd {$src, $dst|$dst, $src}",
+                     "movsd\t{$src, $dst|$dst, $src}",
                      [(store (f64 (vector_extract (v2f64 VR128:$src),
                                    (iPTR 0))), addr:$dst)]>;
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
-                       "movd {$src, $dst|$dst, $src}",
+                       "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
                                         (iPTR 0)))]>;
 def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
-                       "movd {$src, $dst|$dst, $src}",
+                       "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)]>;
 
 def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
-                      "movd {$src, $dst|$dst, $src}",
+                      "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))]>;
 def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
-                      "movd {$src, $dst|$dst, $src}",
+                      "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
 
 
@@ -2119,12 +2119,12 @@
 let isTwoAddress = 1 in {
   def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
-                        "movsd {$src2, $dst|$dst, $src2}", []>;
+                        "movsd\t{$src2, $dst|$dst, $src2}", []>;
 
   let AddedComplexity = 15 in
     def MOVLPDrr : SDI<0x10, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                       "movsd {$src2, $dst|$dst, $src2}",
+                       "movsd\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst,
                          (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
                                  MOVL_shuffle_mask)))]>;
@@ -2132,14 +2132,14 @@
 
 // Store / copy lower 64-bits of a XMM register.
 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
-                     "movq {$src, $dst|$dst, $src}",
+                     "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
 
 // Move to lower bits of a VR128 and zeroing upper bits.
 // Loading from memory automatically zeroing upper bits.
 let AddedComplexity = 20 in
   def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                        "movsd {$src, $dst|$dst, $src}",
+                        "movsd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2f64 (vector_shuffle immAllZerosV,
                                   (v2f64 (scalar_to_vector
@@ -2149,14 +2149,14 @@
 let AddedComplexity = 15 in
 // movd / movq to XMM register zero-extends
 def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
-                       "movd {$src, $dst|$dst, $src}",
+                       "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (v4i32 (vector_shuffle immAllZerosV,
                                  (v4i32 (scalar_to_vector GR32:$src)),
                                  MOVL_shuffle_mask)))]>;
 let AddedComplexity = 20 in
 def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
-                       "movd {$src, $dst|$dst, $src}",
+                       "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (v4i32 (vector_shuffle immAllZerosV,
                                  (v4i32 (scalar_to_vector (loadi32 addr:$src))),
@@ -2165,12 +2165,12 @@
 // Moving from XMM to XMM but still clear upper 64 bits.
 let AddedComplexity = 15 in
 def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "movq {$src, $dst|$dst, $src}",
+                     "movq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
                    XS, Requires<[HasSSE2]>;
 let AddedComplexity = 20 in
 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                     "movq {$src, $dst|$dst, $src}",
+                     "movq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_movl_dq
                                         (bitconvert (memopv2i64 addr:$src))))]>,
                    XS, Requires<[HasSSE2]>;
@@ -2195,34 +2195,34 @@
 
 // Move Instructions
 def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movshdup {$src, $dst|$dst, $src}",
+                      "movshdup\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (v4f32 (vector_shuffle
                                                 VR128:$src, (undef),
                                                 MOVSHDUP_shuffle_mask)))]>;
 def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "movshdup {$src, $dst|$dst, $src}",
+                      "movshdup\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (v4f32 (vector_shuffle
                                                 (memopv4f32 addr:$src), (undef),
                                                 MOVSHDUP_shuffle_mask)))]>;
 
 def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movsldup {$src, $dst|$dst, $src}",
+                      "movsldup\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (v4f32 (vector_shuffle
                                                 VR128:$src, (undef),
                                                 MOVSLDUP_shuffle_mask)))]>;
 def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "movsldup {$src, $dst|$dst, $src}",
+                      "movsldup\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (v4f32 (vector_shuffle
                                                 (memopv4f32 addr:$src), (undef),
                                                 MOVSLDUP_shuffle_mask)))]>;
 
 def MOVDDUPrr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movddup {$src, $dst|$dst, $src}",
+                      "movddup\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (v2f64 (vector_shuffle
                                                 VR128:$src, (undef),
                                                 SSE_splat_lo_mask)))]>;
 def MOVDDUPrm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                      "movddup {$src, $dst|$dst, $src}",
+                      "movddup\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v2f64 (vector_shuffle
                                 (scalar_to_vector (loadf64 addr:$src)),
@@ -2233,46 +2233,46 @@
 let isTwoAddress = 1 in {
   def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                        "addsubps {$src2, $dst|$dst, $src2}",
+                        "addsubps\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
                                            VR128:$src2))]>;
   def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
                         (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                        "addsubps {$src2, $dst|$dst, $src2}",
+                        "addsubps\t{$src2, $dst|$dst, $src2}",
                         [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
                                            (load addr:$src2)))]>;
   def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                       "addsubpd {$src2, $dst|$dst, $src2}",
+                       "addsubpd\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
                                           VR128:$src2))]>;
   def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-                       "addsubpd {$src2, $dst|$dst, $src2}",
+                       "addsubpd\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
                                           (load addr:$src2)))]>;
 }
 
 def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                   "lddqu {$src, $dst|$dst, $src}",
+                   "lddqu\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
 
 // Horizontal ops
 class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
   : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-         !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
 class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
   : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-         !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
 class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
   : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-        !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
         [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
 class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
   : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
-        !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
         [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
 
 let isTwoAddress = 1 in {
@@ -2333,12 +2333,12 @@
   multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                                bit Commutable = 0> {
     def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                   !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                    [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
       let isCommutable = Commutable;
     }
     def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                   !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                    [(set VR128:$dst,
                      (IntId VR128:$src1,
                       (bitconvert (memopv2i64 addr:$src2))))]>;
Index: lib/Target/X86/X86InstrInfo.td
===================================================================
--- lib/Target/X86/X86InstrInfo.td	(revision 40587)
+++ lib/Target/X86/X86InstrInfo.td	(working copy)
@@ -388,7 +388,6 @@
 // Nop
 def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
 
-
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions...
 //
@@ -397,7 +396,7 @@
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1 in {
   def RET    : I<0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]>;
-  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), "ret $amt",
+  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), "ret\t$amt",
                     [(X86retflag imm:$amt)]>;
 }
 
@@ -408,49 +407,49 @@
 
 // Indirect branches
 let isBranch = 1, isBarrier = 1 in
-  def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp $dst", [(br bb:$dst)]>;
+  def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l} {*}$dst",
+  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
                      [(brind GR32:$dst)]>;
-  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l} {*}$dst",
+  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
                      [(brind (loadi32 addr:$dst))]>;
 }
 
 // Conditional branches
-def JE  : IBr<0x84, (ins brtarget:$dst), "je $dst",
+def JE  : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
               [(X86brcond bb:$dst, X86_COND_E)]>, TB;
-def JNE : IBr<0x85, (ins brtarget:$dst), "jne $dst",
+def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
               [(X86brcond bb:$dst, X86_COND_NE)]>, TB;
-def JL  : IBr<0x8C, (ins brtarget:$dst), "jl $dst",
+def JL  : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst",
               [(X86brcond bb:$dst, X86_COND_L)]>, TB;
-def JLE : IBr<0x8E, (ins brtarget:$dst), "jle $dst",
+def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst",
               [(X86brcond bb:$dst, X86_COND_LE)]>, TB;
-def JG  : IBr<0x8F, (ins brtarget:$dst), "jg $dst",
+def JG  : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst",
               [(X86brcond bb:$dst, X86_COND_G)]>, TB;
-def JGE : IBr<0x8D, (ins brtarget:$dst), "jge $dst",
+def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst",
               [(X86brcond bb:$dst, X86_COND_GE)]>, TB;
 
-def JB  : IBr<0x82, (ins brtarget:$dst), "jb $dst",
+def JB  : IBr<0x82, (ins brtarget:$dst), "jb\t$dst",
               [(X86brcond bb:$dst, X86_COND_B)]>, TB;
-def JBE : IBr<0x86, (ins brtarget:$dst), "jbe $dst",
+def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst",
               [(X86brcond bb:$dst, X86_COND_BE)]>, TB;
-def JA  : IBr<0x87, (ins brtarget:$dst), "ja $dst",
+def JA  : IBr<0x87, (ins brtarget:$dst), "ja\t$dst",
               [(X86brcond bb:$dst, X86_COND_A)]>, TB;
-def JAE : IBr<0x83, (ins brtarget:$dst), "jae $dst",
+def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst",
               [(X86brcond bb:$dst, X86_COND_AE)]>, TB;
 
-def JS  : IBr<0x88, (ins brtarget:$dst), "js $dst",
+def JS  : IBr<0x88, (ins brtarget:$dst), "js\t$dst",
               [(X86brcond bb:$dst, X86_COND_S)]>, TB;
-def JNS : IBr<0x89, (ins brtarget:$dst), "jns $dst",
+def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst",
               [(X86brcond bb:$dst, X86_COND_NS)]>, TB;
-def JP  : IBr<0x8A, (ins brtarget:$dst), "jp $dst",
+def JP  : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst",
               [(X86brcond bb:$dst, X86_COND_P)]>, TB;
-def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp $dst",
+def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst",
               [(X86brcond bb:$dst, X86_COND_NP)]>, TB;
-def JO  : IBr<0x80, (ins brtarget:$dst), "jo $dst",
+def JO  : IBr<0x80, (ins brtarget:$dst), "jo\t$dst",
               [(X86brcond bb:$dst, X86_COND_O)]>, TB;
-def JNO : IBr<0x81, (ins brtarget:$dst), "jno $dst",
+def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
               [(X86brcond bb:$dst, X86_COND_NO)]>, TB;
 
 //===----------------------------------------------------------------------===//
@@ -462,23 +461,23 @@
               MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in {
     def CALLpcrel32 : I<0xE8, RawFrm, (outs), (ins i32imm:$dst, variable_ops),
-                        "call ${dst:call}", []>;
+                        "call\t${dst:call}", []>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
-                        "call {*}$dst", [(X86call GR32:$dst)]>;
+                        "call\t{*}$dst", [(X86call GR32:$dst)]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
-                        "call {*}$dst", []>;
+                        "call\t{*}$dst", []>;
   }
 
 // Tail call stuff.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp ${dst:call}  # TAIL CALL",
+  def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call}  # TAIL CALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp {*}$dst  # TAIL CALL",
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp\t{*}$dst  # TAIL CALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
-                   "jmp {*}$dst  # TAIL CALL", []>;
+                   "jmp\t{*}$dst  # TAIL CALL", []>;
 
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions...
@@ -486,56 +485,56 @@
 def LEAVE    : I<0xC9, RawFrm,
                  (outs), (ins), "leave", []>, Imp<[EBP,ESP],[EBP,ESP]>;
 def POP32r   : I<0x58, AddRegFrm,
-                 (outs GR32:$reg), (ins), "pop{l} $reg", []>, Imp<[ESP],[ESP]>;
+                 (outs GR32:$reg), (ins), "pop{l}\t$reg", []>, Imp<[ESP],[ESP]>;
 
 def PUSH32r  : I<0x50, AddRegFrm,
-                 (outs), (ins GR32:$reg), "push{l} $reg", []>, Imp<[ESP],[ESP]>;
+                 (outs), (ins GR32:$reg), "push{l}\t$reg", []>, Imp<[ESP],[ESP]>;
 
 def MovePCtoStack : I<0, Pseudo, (outs), (ins piclabel:$label),
-                      "call $label", []>;
+                      "call\t$label", []>;
 
 let isTwoAddress = 1 in                               // GR32 = bswap GR32
   def BSWAP32r : I<0xC8, AddRegFrm,
                    (outs GR32:$dst), (ins GR32:$src),
-                   "bswap{l} $dst", 
+                   "bswap{l}\t$dst", 
                    [(set GR32:$dst, (bswap GR32:$src))]>, TB;
 
 // FIXME: Model xchg* as two address instructions?
 def XCHG8rr  : I<0x86, MRMDestReg,                    // xchg GR8, GR8
                  (outs), (ins GR8:$src1, GR8:$src2),
-                 "xchg{b} {$src2|$src1}, {$src1|$src2}", []>;
+                 "xchg{b}\t{$src2|$src1}, {$src1|$src2}", []>;
 def XCHG16rr : I<0x87, MRMDestReg,                    // xchg GR16, GR16
                  (outs), (ins GR16:$src1, GR16:$src2),
-                 "xchg{w} {$src2|$src1}, {$src1|$src2}", []>, OpSize;
+                 "xchg{w}\t{$src2|$src1}, {$src1|$src2}", []>, OpSize;
 def XCHG32rr : I<0x87, MRMDestReg,                    // xchg GR32, GR32
                  (outs), (ins GR32:$src1, GR32:$src2),
-                 "xchg{l} {$src2|$src1}, {$src1|$src2}", []>;
+                 "xchg{l}\t{$src2|$src1}, {$src1|$src2}", []>;
 
 def XCHG8mr  : I<0x86, MRMDestMem,
                  (outs), (ins i8mem:$src1, GR8:$src2),
-                 "xchg{b} {$src2|$src1}, {$src1|$src2}", []>;
+                 "xchg{b}\t{$src2|$src1}, {$src1|$src2}", []>;
 def XCHG16mr : I<0x87, MRMDestMem,
                  (outs), (ins i16mem:$src1, GR16:$src2),
-                 "xchg{w} {$src2|$src1}, {$src1|$src2}", []>, OpSize;
+                 "xchg{w}\t{$src2|$src1}, {$src1|$src2}", []>, OpSize;
 def XCHG32mr : I<0x87, MRMDestMem,
                  (outs), (ins i32mem:$src1, GR32:$src2),
-                 "xchg{l} {$src2|$src1}, {$src1|$src2}", []>;
+                 "xchg{l}\t{$src2|$src1}, {$src1|$src2}", []>;
 def XCHG8rm  : I<0x86, MRMSrcMem,
                  (outs), (ins GR8:$src1, i8mem:$src2),
-                 "xchg{b} {$src2|$src1}, {$src1|$src2}", []>;
+                 "xchg{b}\t{$src2|$src1}, {$src1|$src2}", []>;
 def XCHG16rm : I<0x87, MRMSrcMem,
                  (outs), (ins GR16:$src1, i16mem:$src2),
-                 "xchg{w} {$src2|$src1}, {$src1|$src2}", []>, OpSize;
+                 "xchg{w}\t{$src2|$src1}, {$src1|$src2}", []>, OpSize;
 def XCHG32rm : I<0x87, MRMSrcMem,
                  (outs), (ins GR32:$src1, i32mem:$src2),
-                 "xchg{l} {$src2|$src1}, {$src1|$src2}", []>;
+                 "xchg{l}\t{$src2|$src1}, {$src1|$src2}", []>;
 
 def LEA16r   : I<0x8D, MRMSrcMem,
                  (outs GR16:$dst), (ins i32mem:$src),
-                 "lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize;
+                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
 def LEA32r   : I<0x8D, MRMSrcMem,
                  (outs GR32:$dst), (ins lea32mem:$src),
-                 "lea{l} {$src|$dst}, {$dst|$src}",
+                 "lea{l}\t{$src|$dst}, {$dst|$src}",
                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
 
 def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
@@ -565,48 +564,48 @@
 //  Input/Output Instructions...
 //
 def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
-               "in{b} {%dx, %al|%AL, %DX}",
+               "in{b}\t{%dx, %al|%AL, %DX}",
                []>,  Imp<[DX], [AL]>;
 def IN16rr : I<0xED, RawFrm, (outs), (ins),
-               "in{w} {%dx, %ax|%AX, %DX}",
+               "in{w}\t{%dx, %ax|%AX, %DX}",
                []>,  Imp<[DX], [AX]>, OpSize;
 def IN32rr : I<0xED, RawFrm, (outs), (ins),
-               "in{l} {%dx, %eax|%EAX, %DX}",
+               "in{l}\t{%dx, %eax|%EAX, %DX}",
                []>, Imp<[DX],[EAX]>;
 
 def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{b} {$port, %al|%AL, $port}",
+                  "in{b}\t{$port, %al|%AL, $port}",
                  []>,
              Imp<[], [AL]>;
 def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{w} {$port, %ax|%AX, $port}",
+                  "in{w}\t{$port, %ax|%AX, $port}",
                  []>,
              Imp<[], [AX]>, OpSize;
 def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{l} {$port, %eax|%EAX, $port}",
+                  "in{l}\t{$port, %eax|%EAX, $port}",
                  []>,
              Imp<[],[EAX]>;
 
 def OUT8rr  : I<0xEE, RawFrm, (outs), (ins),
-                "out{b} {%al, %dx|%DX, %AL}",
+                "out{b}\t{%al, %dx|%DX, %AL}",
                 []>,  Imp<[DX,  AL], []>;
 def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
-                "out{w} {%ax, %dx|%DX, %AX}",
+                "out{w}\t{%ax, %dx|%DX, %AX}",
                 []>,  Imp<[DX,  AX], []>, OpSize;
 def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
-                "out{l} {%eax, %dx|%DX, %EAX}",
+                "out{l}\t{%eax, %dx|%DX, %EAX}",
                 []>, Imp<[DX, EAX], []>;
 
 def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{b} {%al, $port|$port, %AL}",
+                   "out{b}\t{%al, $port|$port, %AL}",
                    []>,
               Imp<[AL], []>;
 def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{w} {%ax, $port|$port, %AX}",
+                   "out{w}\t{%ax, $port|$port, %AX}",
                    []>,
               Imp<[AX], []>, OpSize;
 def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{l} {%eax, $port|$port, %EAX}",
+                   "out{l}\t{%eax, $port|$port, %EAX}",
                    []>,
               Imp<[EAX], []>;
 
@@ -614,50 +613,50 @@
 //  Move Instructions...
 //
 def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
-                "mov{b} {$src, $dst|$dst, $src}", []>;
+                "mov{b}\t{$src, $dst|$dst, $src}", []>;
 def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
-                "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
-                "mov{l} {$src, $dst|$dst, $src}", []>;
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 let isReMaterializable = 1 in {
 def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
-                   "mov{b} {$src, $dst|$dst, $src}",
+                   "mov{b}\t{$src, $dst|$dst, $src}",
                    [(set GR8:$dst, imm:$src)]>;
 def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
-                   "mov{w} {$src, $dst|$dst, $src}",
+                   "mov{w}\t{$src, $dst|$dst, $src}",
                    [(set GR16:$dst, imm:$src)]>, OpSize;
 def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
-                   "mov{l} {$src, $dst|$dst, $src}",
+                   "mov{l}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, imm:$src)]>;
 }
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
-                   "mov{b} {$src, $dst|$dst, $src}",
+                   "mov{b}\t{$src, $dst|$dst, $src}",
                    [(store (i8 imm:$src), addr:$dst)]>;
 def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
-                   "mov{w} {$src, $dst|$dst, $src}",
+                   "mov{w}\t{$src, $dst|$dst, $src}",
                    [(store (i16 imm:$src), addr:$dst)]>, OpSize;
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
-                   "mov{l} {$src, $dst|$dst, $src}",
+                   "mov{l}\t{$src, $dst|$dst, $src}",
                    [(store (i32 imm:$src), addr:$dst)]>;
 
 def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
-                "mov{b} {$src, $dst|$dst, $src}",
+                "mov{b}\t{$src, $dst|$dst, $src}",
                 [(set GR8:$dst, (load addr:$src))]>;
 def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                "mov{w} {$src, $dst|$dst, $src}",
+                "mov{w}\t{$src, $dst|$dst, $src}",
                 [(set GR16:$dst, (load addr:$src))]>, OpSize;
 def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                "mov{l} {$src, $dst|$dst, $src}",
+                "mov{l}\t{$src, $dst|$dst, $src}",
                 [(set GR32:$dst, (load addr:$src))]>;
 
 def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
-                "mov{b} {$src, $dst|$dst, $src}",
+                "mov{b}\t{$src, $dst|$dst, $src}",
                 [(store GR8:$src, addr:$dst)]>;
 def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                "mov{w} {$src, $dst|$dst, $src}",
+                "mov{w}\t{$src, $dst|$dst, $src}",
                 [(store GR16:$src, addr:$dst)]>, OpSize;
 def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                "mov{l} {$src, $dst|$dst, $src}",
+                "mov{l}\t{$src, $dst|$dst, $src}",
                 [(store GR32:$src, addr:$dst)]>;
                 
 //===----------------------------------------------------------------------===//
@@ -665,71 +664,71 @@
 //
 
 // Extra precision multiplication
-def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b} $src",
+def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, GR8:$src))]>,
              Imp<[AL],[AX]>;               // AL,AH = AL*GR8
-def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src), "mul{w} $src", []>,
+def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src), "mul{w}\t$src", []>,
              Imp<[AX],[AX,DX]>, OpSize;    // AX,DX = AX*GR16
-def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src), "mul{l} $src", []>,
+def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src), "mul{l}\t$src", []>,
              Imp<[EAX],[EAX,EDX]>;         // EAX,EDX = EAX*GR32
 def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
-               "mul{b} $src",
+               "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, (loadi8 addr:$src)))]>,
              Imp<[AL],[AX]>;          // AL,AH = AL*[mem8]
 def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
-               "mul{w} $src", []>, Imp<[AX],[AX,DX]>,
+               "mul{w}\t$src", []>, Imp<[AX],[AX,DX]>,
                OpSize; // AX,DX = AX*[mem16]
 def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
-               "mul{l} $src", []>, Imp<[EAX],[EAX,EDX]>;// EAX,EDX = EAX*[mem32]
+               "mul{l}\t$src", []>, Imp<[EAX],[EAX,EDX]>;// EAX,EDX = EAX*[mem32]
 
-def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b} $src", []>,
+def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>,
               Imp<[AL],[AX]>;               // AL,AH = AL*GR8
-def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w} $src", []>,
+def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
               Imp<[AX],[AX,DX]>, OpSize;    // AX,DX = AX*GR16
-def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l} $src", []>,
+def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>,
               Imp<[EAX],[EAX,EDX]>;         // EAX,EDX = EAX*GR32
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b} $src", []>, Imp<[AL],[AX]>;        // AL,AH = AL*[mem8]
+                "imul{b}\t$src", []>, Imp<[AL],[AX]>;        // AL,AH = AL*[mem8]
 def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w} $src", []>, Imp<[AX],[AX,DX]>,
+                "imul{w}\t$src", []>, Imp<[AX],[AX,DX]>,
                 OpSize; // AX,DX = AX*[mem16]
 def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l} $src", []>,
+                "imul{l}\t$src", []>,
                 Imp<[EAX],[EAX,EDX]>;  // EAX,EDX = EAX*[mem32]
 
 // unsigned division/remainder
 def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),          // AX/r8 = AL,AH
-               "div{b} $src", []>, Imp<[AX],[AX]>;
+               "div{b}\t$src", []>, Imp<[AX],[AX]>;
 def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),         // DX:AX/r16 = AX,DX
-               "div{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+               "div{w}\t$src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
 def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),         // EDX:EAX/r32 = EAX,EDX
-               "div{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+               "div{l}\t$src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),       // AX/[mem8] = AL,AH
-               "div{b} $src", []>, Imp<[AX],[AX]>;
+               "div{b}\t$src", []>, Imp<[AX],[AX]>;
 def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),      // DX:AX/[mem16] = AX,DX
-               "div{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+               "div{w}\t$src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
 def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),      // EDX:EAX/[mem32] = EAX,EDX
-               "div{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+               "div{l}\t$src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
 
 // Signed division/remainder.
 def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),          // AX/r8 = AL,AH
-               "idiv{b} $src", []>, Imp<[AX],[AX]>;
+               "idiv{b}\t$src", []>, Imp<[AX],[AX]>;
 def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),         // DX:AX/r16 = AX,DX
-               "idiv{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+               "idiv{w}\t$src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
 def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),         // EDX:EAX/r32 = EAX,EDX
-               "idiv{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+               "idiv{l}\t$src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),      // AX/[mem8] = AL,AH
-               "idiv{b} $src", []>, Imp<[AX],[AX]>;
+               "idiv{b}\t$src", []>, Imp<[AX],[AX]>;
 def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),     // DX:AX/[mem16] = AX,DX
-               "idiv{w} $src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
+               "idiv{w}\t$src", []>, Imp<[AX,DX],[AX,DX]>, OpSize;
 def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),     // EDX:EAX/[mem32] = EAX,EDX
-               "idiv{l} $src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
+               "idiv{l}\t$src", []>, Imp<[EAX,EDX],[EAX,EDX]>;
 
 
 //===----------------------------------------------------------------------===//
@@ -740,350 +739,350 @@
 // Conditional moves
 def CMOVB16rr : I<0x42, MRMSrcReg,       // if <u, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovb {$src2, $dst|$dst, $src2}",
+                  "cmovb\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_B))]>,
                   TB, OpSize;
 def CMOVB16rm : I<0x42, MRMSrcMem,       // if <u, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovb {$src2, $dst|$dst, $src2}",
+                  "cmovb\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_B))]>,
                   TB, OpSize;
 def CMOVB32rr : I<0x42, MRMSrcReg,       // if <u, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovb {$src2, $dst|$dst, $src2}",
+                  "cmovb\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_B))]>,
                    TB;
 def CMOVB32rm : I<0x42, MRMSrcMem,       // if <u, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovb {$src2, $dst|$dst, $src2}",
+                  "cmovb\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_B))]>,
                    TB;
 
 def CMOVAE16rr: I<0x43, MRMSrcReg,       // if >=u, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovae {$src2, $dst|$dst, $src2}",
+                  "cmovae\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_AE))]>,
                    TB, OpSize;
 def CMOVAE16rm: I<0x43, MRMSrcMem,       // if >=u, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovae {$src2, $dst|$dst, $src2}",
+                  "cmovae\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_AE))]>,
                    TB, OpSize;
 def CMOVAE32rr: I<0x43, MRMSrcReg,       // if >=u, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovae {$src2, $dst|$dst, $src2}",
+                  "cmovae\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_AE))]>,
                    TB;
 def CMOVAE32rm: I<0x43, MRMSrcMem,       // if >=u, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovae {$src2, $dst|$dst, $src2}",
+                  "cmovae\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_AE))]>,
                    TB;
 
 def CMOVE16rr : I<0x44, MRMSrcReg,       // if ==, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmove {$src2, $dst|$dst, $src2}",
+                  "cmove\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_E))]>,
                    TB, OpSize;
 def CMOVE16rm : I<0x44, MRMSrcMem,       // if ==, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmove {$src2, $dst|$dst, $src2}",
+                  "cmove\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_E))]>,
                    TB, OpSize;
 def CMOVE32rr : I<0x44, MRMSrcReg,       // if ==, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmove {$src2, $dst|$dst, $src2}",
+                  "cmove\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_E))]>,
                    TB;
 def CMOVE32rm : I<0x44, MRMSrcMem,       // if ==, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmove {$src2, $dst|$dst, $src2}",
+                  "cmove\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_E))]>,
                    TB;
 
 def CMOVNE16rr: I<0x45, MRMSrcReg,       // if !=, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovne {$src2, $dst|$dst, $src2}",
+                  "cmovne\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_NE))]>,
                    TB, OpSize;
 def CMOVNE16rm: I<0x45, MRMSrcMem,       // if !=, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovne {$src2, $dst|$dst, $src2}",
+                  "cmovne\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_NE))]>,
                    TB, OpSize;
 def CMOVNE32rr: I<0x45, MRMSrcReg,       // if !=, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovne {$src2, $dst|$dst, $src2}",
+                  "cmovne\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_NE))]>,
                    TB;
 def CMOVNE32rm: I<0x45, MRMSrcMem,       // if !=, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovne {$src2, $dst|$dst, $src2}",
+                  "cmovne\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_NE))]>,
                    TB;
 
 def CMOVBE16rr: I<0x46, MRMSrcReg,       // if <=u, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovbe {$src2, $dst|$dst, $src2}",
+                  "cmovbe\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_BE))]>,
                    TB, OpSize;
 def CMOVBE16rm: I<0x46, MRMSrcMem,       // if <=u, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovbe {$src2, $dst|$dst, $src2}",
+                  "cmovbe\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_BE))]>,
                    TB, OpSize;
 def CMOVBE32rr: I<0x46, MRMSrcReg,       // if <=u, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovbe {$src2, $dst|$dst, $src2}",
+                  "cmovbe\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_BE))]>,
                    TB;
 def CMOVBE32rm: I<0x46, MRMSrcMem,       // if <=u, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovbe {$src2, $dst|$dst, $src2}",
+                  "cmovbe\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_BE))]>,
                    TB;
 
 def CMOVA16rr : I<0x47, MRMSrcReg,       // if >u, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmova {$src2, $dst|$dst, $src2}",
+                  "cmova\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_A))]>,
                    TB, OpSize;
 def CMOVA16rm : I<0x47, MRMSrcMem,       // if >u, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmova {$src2, $dst|$dst, $src2}",
+                  "cmova\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_A))]>,
                    TB, OpSize;
 def CMOVA32rr : I<0x47, MRMSrcReg,       // if >u, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmova {$src2, $dst|$dst, $src2}",
+                  "cmova\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_A))]>,
                    TB;
 def CMOVA32rm : I<0x47, MRMSrcMem,       // if >u, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmova {$src2, $dst|$dst, $src2}",
+                  "cmova\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_A))]>,
                    TB;
 
 def CMOVL16rr : I<0x4C, MRMSrcReg,       // if <s, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovl {$src2, $dst|$dst, $src2}",
+                  "cmovl\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_L))]>,
                    TB, OpSize;
 def CMOVL16rm : I<0x4C, MRMSrcMem,       // if <s, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovl {$src2, $dst|$dst, $src2}",
+                  "cmovl\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_L))]>,
                    TB, OpSize;
 def CMOVL32rr : I<0x4C, MRMSrcReg,       // if <s, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovl {$src2, $dst|$dst, $src2}",
+                  "cmovl\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_L))]>,
                    TB;
 def CMOVL32rm : I<0x4C, MRMSrcMem,       // if <s, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovl {$src2, $dst|$dst, $src2}",
+                  "cmovl\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_L))]>,
                    TB;
 
 def CMOVGE16rr: I<0x4D, MRMSrcReg,       // if >=s, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovge {$src2, $dst|$dst, $src2}",
+                  "cmovge\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_GE))]>,
                    TB, OpSize;
 def CMOVGE16rm: I<0x4D, MRMSrcMem,       // if >=s, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovge {$src2, $dst|$dst, $src2}",
+                  "cmovge\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_GE))]>,
                    TB, OpSize;
 def CMOVGE32rr: I<0x4D, MRMSrcReg,       // if >=s, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovge {$src2, $dst|$dst, $src2}",
+                  "cmovge\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_GE))]>,
                    TB;
 def CMOVGE32rm: I<0x4D, MRMSrcMem,       // if >=s, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovge {$src2, $dst|$dst, $src2}",
+                  "cmovge\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_GE))]>,
                    TB;
 
 def CMOVLE16rr: I<0x4E, MRMSrcReg,       // if <=s, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovle {$src2, $dst|$dst, $src2}",
+                  "cmovle\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_LE))]>,
                    TB, OpSize;
 def CMOVLE16rm: I<0x4E, MRMSrcMem,       // if <=s, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovle {$src2, $dst|$dst, $src2}",
+                  "cmovle\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_LE))]>,
                    TB, OpSize;
 def CMOVLE32rr: I<0x4E, MRMSrcReg,       // if <=s, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovle {$src2, $dst|$dst, $src2}",
+                  "cmovle\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_LE))]>,
                    TB;
 def CMOVLE32rm: I<0x4E, MRMSrcMem,       // if <=s, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovle {$src2, $dst|$dst, $src2}",
+                  "cmovle\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_LE))]>,
                    TB;
 
 def CMOVG16rr : I<0x4F, MRMSrcReg,       // if >s, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovg {$src2, $dst|$dst, $src2}",
+                  "cmovg\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_G))]>,
                    TB, OpSize;
 def CMOVG16rm : I<0x4F, MRMSrcMem,       // if >s, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovg {$src2, $dst|$dst, $src2}",
+                  "cmovg\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_G))]>,
                    TB, OpSize;
 def CMOVG32rr : I<0x4F, MRMSrcReg,       // if >s, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovg {$src2, $dst|$dst, $src2}",
+                  "cmovg\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_G))]>,
                    TB;
 def CMOVG32rm : I<0x4F, MRMSrcMem,       // if >s, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovg {$src2, $dst|$dst, $src2}",
+                  "cmovg\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_G))]>,
                    TB;
 
 def CMOVS16rr : I<0x48, MRMSrcReg,       // if signed, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovs {$src2, $dst|$dst, $src2}",
+                  "cmovs\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_S))]>,
                   TB, OpSize;
 def CMOVS16rm : I<0x48, MRMSrcMem,       // if signed, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovs {$src2, $dst|$dst, $src2}",
+                  "cmovs\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_S))]>,
                   TB, OpSize;
 def CMOVS32rr : I<0x48, MRMSrcReg,       // if signed, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovs {$src2, $dst|$dst, $src2}",
+                  "cmovs\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_S))]>,
                   TB;
 def CMOVS32rm : I<0x48, MRMSrcMem,       // if signed, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovs {$src2, $dst|$dst, $src2}",
+                  "cmovs\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_S))]>,
                   TB;
 
 def CMOVNS16rr: I<0x49, MRMSrcReg,       // if !signed, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovns {$src2, $dst|$dst, $src2}",
+                  "cmovns\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_NS))]>,
                   TB, OpSize;
 def CMOVNS16rm: I<0x49, MRMSrcMem,       // if !signed, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovns {$src2, $dst|$dst, $src2}",
+                  "cmovns\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_NS))]>,
                   TB, OpSize;
 def CMOVNS32rr: I<0x49, MRMSrcReg,       // if !signed, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovns {$src2, $dst|$dst, $src2}",
+                  "cmovns\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_NS))]>,
                   TB;
 def CMOVNS32rm: I<0x49, MRMSrcMem,       // if !signed, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovns {$src2, $dst|$dst, $src2}",
+                  "cmovns\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_NS))]>,
                   TB;
 
 def CMOVP16rr : I<0x4A, MRMSrcReg,       // if parity, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovp {$src2, $dst|$dst, $src2}",
+                  "cmovp\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                    X86_COND_P))]>,
                   TB, OpSize;
 def CMOVP16rm : I<0x4A, MRMSrcMem,       // if parity, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovp {$src2, $dst|$dst, $src2}",
+                  "cmovp\t{$src2, $dst|$dst, $src2}",
                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                    X86_COND_P))]>,
                   TB, OpSize;
 def CMOVP32rr : I<0x4A, MRMSrcReg,       // if parity, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovp {$src2, $dst|$dst, $src2}",
+                  "cmovp\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                    X86_COND_P))]>,
                   TB;
 def CMOVP32rm : I<0x4A, MRMSrcMem,       // if parity, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovp {$src2, $dst|$dst, $src2}",
+                  "cmovp\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                    X86_COND_P))]>,
                   TB;
 
 def CMOVNP16rr : I<0x4B, MRMSrcReg,       // if !parity, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovnp {$src2, $dst|$dst, $src2}",
+                  "cmovnp\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
                                     X86_COND_NP))]>,
                   TB, OpSize;
 def CMOVNP16rm : I<0x4B, MRMSrcMem,       // if !parity, GR16 = [mem16]
                   (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovnp {$src2, $dst|$dst, $src2}",
+                  "cmovnp\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                     X86_COND_NP))]>,
                   TB, OpSize;
 def CMOVNP32rr : I<0x4B, MRMSrcReg,       // if !parity, GR32 = GR32
                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovnp {$src2, $dst|$dst, $src2}",
+                  "cmovnp\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
                                     X86_COND_NP))]>,
                   TB;
 def CMOVNP32rm : I<0x4B, MRMSrcMem,       // if !parity, GR32 = [mem32]
                   (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovnp {$src2, $dst|$dst, $src2}",
+                  "cmovnp\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                     X86_COND_NP))]>,
                   TB;
@@ -1091,75 +1090,75 @@
 
 // unary instructions
 let CodeSize = 2 in {
-def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b} $dst",
+def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
                [(set GR8:$dst, (ineg GR8:$src))]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w} $dst",
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
                [(set GR16:$dst, (ineg GR16:$src))]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l} $dst",
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
                [(set GR32:$dst, (ineg GR32:$src))]>;
 let isTwoAddress = 0 in {
-  def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b} $dst",
+  def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
                  [(store (ineg (loadi8 addr:$dst)), addr:$dst)]>;
-  def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w} $dst",
+  def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
                  [(store (ineg (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
-  def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l} $dst",
+  def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
                  [(store (ineg (loadi32 addr:$dst)), addr:$dst)]>;
 
 }
 
-def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b} $dst",
+def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
                [(set GR8:$dst, (not GR8:$src))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w} $dst",
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
                [(set GR16:$dst, (not GR16:$src))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l} $dst",
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
                [(set GR32:$dst, (not GR32:$src))]>;
 let isTwoAddress = 0 in {
-  def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b} $dst",
+  def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
                  [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
-  def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w} $dst",
+  def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
                  [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
-  def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l} $dst",
+  def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
                  [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
 }
 } // CodeSize
 
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
 let CodeSize = 2 in
-def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b} $dst",
+def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
                [(set GR8:$dst, (add GR8:$src, 1))]>;
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w} $dst",
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst",
                [(set GR16:$dst, (add GR16:$src, 1))]>,
              OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l} $dst",
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst",
                [(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>;
 }
 let isTwoAddress = 0, CodeSize = 2 in {
-  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b} $dst",
+  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
                [(store (add (loadi8 addr:$dst), 1), addr:$dst)]>;
-  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w} $dst",
+  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
                [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, OpSize;
-  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l} $dst",
+  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
                [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>;
 }
 
 let CodeSize = 2 in
-def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b} $dst",
+def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
                [(set GR8:$dst, (add GR8:$src, -1))]>;
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w} $dst",
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst",
                [(set GR16:$dst, (add GR16:$src, -1))]>,
              OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l} $dst",
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst",
                [(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>;
 }
 
 let isTwoAddress = 0, CodeSize = 2 in {
-  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b} $dst",
+  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
                [(store (add (loadi8 addr:$dst), -1), addr:$dst)]>;
-  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w} $dst",
+  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
                [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, OpSize;
-  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l} $dst",
+  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
                [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>;
 }
 
@@ -1167,155 +1166,155 @@
 let isCommutable = 1 in {   // X = AND Y, Z   --> X = AND Z, Y
 def AND8rr   : I<0x20, MRMDestReg,
                 (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                "and{b} {$src2, $dst|$dst, $src2}",
+                "and{b}\t{$src2, $dst|$dst, $src2}",
                 [(set GR8:$dst, (and GR8:$src1, GR8:$src2))]>;
 def AND16rr  : I<0x21, MRMDestReg,
                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                 "and{w} {$src2, $dst|$dst, $src2}",
+                 "and{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (and GR16:$src1, GR16:$src2))]>, OpSize;
 def AND32rr  : I<0x21, MRMDestReg, 
                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "and{l} {$src2, $dst|$dst, $src2}",
+                 "and{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
 }
 
 def AND8rm   : I<0x22, MRMSrcMem, 
                  (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
-                 "and{b} {$src2, $dst|$dst, $src2}",
+                 "and{b}\t{$src2, $dst|$dst, $src2}",
                 [(set GR8:$dst, (and GR8:$src1, (load addr:$src2)))]>;
 def AND16rm  : I<0x23, MRMSrcMem, 
                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "and{w} {$src2, $dst|$dst, $src2}",
+                 "and{w}\t{$src2, $dst|$dst, $src2}",
                 [(set GR16:$dst, (and GR16:$src1, (load addr:$src2)))]>, OpSize;
 def AND32rm  : I<0x23, MRMSrcMem,
                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "and{l} {$src2, $dst|$dst, $src2}",
+                 "and{l}\t{$src2, $dst|$dst, $src2}",
                 [(set GR32:$dst, (and GR32:$src1, (load addr:$src2)))]>;
 
 def AND8ri   : Ii8<0x80, MRM4r, 
                    (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
-                   "and{b} {$src2, $dst|$dst, $src2}",
+                   "and{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (and GR8:$src1, imm:$src2))]>;
 def AND16ri  : Ii16<0x81, MRM4r, 
                     (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "and{w} {$src2, $dst|$dst, $src2}",
+                    "and{w}\t{$src2, $dst|$dst, $src2}",
                     [(set GR16:$dst, (and GR16:$src1, imm:$src2))]>, OpSize;
 def AND32ri  : Ii32<0x81, MRM4r, 
                     (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "and{l} {$src2, $dst|$dst, $src2}",
+                    "and{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
 def AND16ri8 : Ii8<0x83, MRM4r, 
                    (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "and{w} {$src2, $dst|$dst, $src2}",
+                   "and{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2))]>,
                    OpSize;
 def AND32ri8 : Ii8<0x83, MRM4r, 
                    (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "and{l} {$src2, $dst|$dst, $src2}",
+                   "and{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2))]>;
 
 let isTwoAddress = 0 in {
   def AND8mr   : I<0x20, MRMDestMem,
                    (outs), (ins i8mem :$dst, GR8 :$src),
-                   "and{b} {$src, $dst|$dst, $src}",
+                   "and{b}\t{$src, $dst|$dst, $src}",
                    [(store (and (load addr:$dst), GR8:$src), addr:$dst)]>;
   def AND16mr  : I<0x21, MRMDestMem,
                    (outs), (ins i16mem:$dst, GR16:$src),
-                   "and{w} {$src, $dst|$dst, $src}",
+                   "and{w}\t{$src, $dst|$dst, $src}",
                    [(store (and (load addr:$dst), GR16:$src), addr:$dst)]>,
                    OpSize;
   def AND32mr  : I<0x21, MRMDestMem,
                    (outs), (ins i32mem:$dst, GR32:$src),
-                   "and{l} {$src, $dst|$dst, $src}",
+                   "and{l}\t{$src, $dst|$dst, $src}",
                    [(store (and (load addr:$dst), GR32:$src), addr:$dst)]>;
   def AND8mi   : Ii8<0x80, MRM4m,
                      (outs), (ins i8mem :$dst, i8imm :$src),
-                     "and{b} {$src, $dst|$dst, $src}",
+                     "and{b}\t{$src, $dst|$dst, $src}",
                       [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst)]>;
   def AND16mi  : Ii16<0x81, MRM4m,
                       (outs), (ins i16mem:$dst, i16imm:$src),
-                      "and{w} {$src, $dst|$dst, $src}",
+                      "and{w}\t{$src, $dst|$dst, $src}",
                       [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst)]>,
                       OpSize;
   def AND32mi  : Ii32<0x81, MRM4m,
                       (outs), (ins i32mem:$dst, i32imm:$src),
-                      "and{l} {$src, $dst|$dst, $src}",
+                      "and{l}\t{$src, $dst|$dst, $src}",
                       [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst)]>;
   def AND16mi8 : Ii8<0x83, MRM4m,
                      (outs), (ins i16mem:$dst, i16i8imm :$src),
-                     "and{w} {$src, $dst|$dst, $src}",
+                     "and{w}\t{$src, $dst|$dst, $src}",
                 [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst)]>,
                      OpSize;
   def AND32mi8 : Ii8<0x83, MRM4m,
                      (outs), (ins i32mem:$dst, i32i8imm :$src),
-                     "and{l} {$src, $dst|$dst, $src}",
+                     "and{l}\t{$src, $dst|$dst, $src}",
                 [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
 }
 
 
 let isCommutable = 1 in {   // X = OR Y, Z   --> X = OR Z, Y
 def OR8rr    : I<0x08, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                 "or{b} {$src2, $dst|$dst, $src2}",
+                 "or{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
 def OR16rr   : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                 "or{w} {$src2, $dst|$dst, $src2}",
+                 "or{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>, OpSize;
 def OR32rr   : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "or{l} {$src2, $dst|$dst, $src2}",
+                 "or{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
 }
 def OR8rm    : I<0x0A, MRMSrcMem , (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
-                 "or{b} {$src2, $dst|$dst, $src2}",
+                 "or{b}\t{$src2, $dst|$dst, $src2}",
                 [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
 def OR16rm   : I<0x0B, MRMSrcMem , (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "or{w} {$src2, $dst|$dst, $src2}",
+                 "or{w}\t{$src2, $dst|$dst, $src2}",
                 [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>, OpSize;
 def OR32rm   : I<0x0B, MRMSrcMem , (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "or{l} {$src2, $dst|$dst, $src2}",
+                 "or{l}\t{$src2, $dst|$dst, $src2}",
                 [(set GR32:$dst, (or GR32:$src1, (load addr:$src2)))]>;
 
 def OR8ri    : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                    "or{b} {$src2, $dst|$dst, $src2}",
+                    "or{b}\t{$src2, $dst|$dst, $src2}",
                     [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
 def OR16ri   : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "or{w} {$src2, $dst|$dst, $src2}", 
+                    "or{w}\t{$src2, $dst|$dst, $src2}", 
                     [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>, OpSize;
 def OR32ri   : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "or{l} {$src2, $dst|$dst, $src2}",
+                    "or{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
 
 def OR16ri8  : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "or{w} {$src2, $dst|$dst, $src2}",
+                   "or{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2))]>, OpSize;
 def OR32ri8  : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "or{l} {$src2, $dst|$dst, $src2}",
+                   "or{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2))]>;
 let isTwoAddress = 0 in {
   def OR8mr  : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
-                 "or{b} {$src, $dst|$dst, $src}",
+                 "or{b}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
   def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                 "or{w} {$src, $dst|$dst, $src}",
+                 "or{w}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>, OpSize;
   def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 "or{l} {$src, $dst|$dst, $src}",
+                 "or{l}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), GR32:$src), addr:$dst)]>;
   def OR8mi    : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
-                 "or{b} {$src, $dst|$dst, $src}",
+                 "or{b}\t{$src, $dst|$dst, $src}",
                  [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst)]>;
   def OR16mi   : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
-                 "or{w} {$src, $dst|$dst, $src}",
+                 "or{w}\t{$src, $dst|$dst, $src}",
                  [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst)]>,
                  OpSize;
   def OR32mi   : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
-                 "or{l} {$src, $dst|$dst, $src}",
+                 "or{l}\t{$src, $dst|$dst, $src}",
                  [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst)]>;
   def OR16mi8  : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
-                 "or{w} {$src, $dst|$dst, $src}",
+                 "or{w}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst)]>,
                      OpSize;
   def OR32mi8  : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
-                 "or{l} {$src, $dst|$dst, $src}",
+                 "or{l}\t{$src, $dst|$dst, $src}",
                  [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
 }
 
@@ -1323,429 +1322,429 @@
 let isCommutable = 1 in {   // X = XOR Y, Z   --> X = XOR Z, Y
 def XOR8rr   : I<0x30, MRMDestReg,
                  (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                 "xor{b} {$src2, $dst|$dst, $src2}",
+                 "xor{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (xor GR8:$src1, GR8:$src2))]>;
 def XOR16rr  : I<0x31, MRMDestReg, 
                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), 
-                 "xor{w} {$src2, $dst|$dst, $src2}",
+                 "xor{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (xor GR16:$src1, GR16:$src2))]>, OpSize;
 def XOR32rr  : I<0x31, MRMDestReg, 
                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), 
-                 "xor{l} {$src2, $dst|$dst, $src2}",
+                 "xor{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
 }
 
 def XOR8rm   : I<0x32, MRMSrcMem , 
                  (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), 
-                 "xor{b} {$src2, $dst|$dst, $src2}",
+                 "xor{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2)))]>;
 def XOR16rm  : I<0x33, MRMSrcMem , 
                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), 
-                 "xor{w} {$src2, $dst|$dst, $src2}",
+                 "xor{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2)))]>, OpSize;
 def XOR32rm  : I<0x33, MRMSrcMem , 
                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), 
-                 "xor{l} {$src2, $dst|$dst, $src2}",
+                 "xor{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2)))]>;
 
 def XOR8ri   : Ii8<0x80, MRM6r, 
                    (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), 
-                   "xor{b} {$src2, $dst|$dst, $src2}",
+                   "xor{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (xor GR8:$src1, imm:$src2))]>;
 def XOR16ri  : Ii16<0x81, MRM6r, 
                     (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), 
-                    "xor{w} {$src2, $dst|$dst, $src2}",
+                    "xor{w}\t{$src2, $dst|$dst, $src2}",
                     [(set GR16:$dst, (xor GR16:$src1, imm:$src2))]>, OpSize;
 def XOR32ri  : Ii32<0x81, MRM6r, 
                     (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), 
-                    "xor{l} {$src2, $dst|$dst, $src2}",
+                    "xor{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
 def XOR16ri8 : Ii8<0x83, MRM6r, 
                    (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "xor{w} {$src2, $dst|$dst, $src2}",
+                   "xor{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2))]>,
                    OpSize;
 def XOR32ri8 : Ii8<0x83, MRM6r, 
                    (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "xor{l} {$src2, $dst|$dst, $src2}",
+                   "xor{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2))]>;
 let isTwoAddress = 0 in {
   def XOR8mr   : I<0x30, MRMDestMem,
                    (outs), (ins i8mem :$dst, GR8 :$src),
-                   "xor{b} {$src, $dst|$dst, $src}",
+                   "xor{b}\t{$src, $dst|$dst, $src}",
                    [(store (xor (load addr:$dst), GR8:$src), addr:$dst)]>;
   def XOR16mr  : I<0x31, MRMDestMem,
                    (outs), (ins i16mem:$dst, GR16:$src),
-                   "xor{w} {$src, $dst|$dst, $src}",
+                   "xor{w}\t{$src, $dst|$dst, $src}",
                    [(store (xor (load addr:$dst), GR16:$src), addr:$dst)]>,
                    OpSize;
   def XOR32mr  : I<0x31, MRMDestMem,
                    (outs), (ins i32mem:$dst, GR32:$src),
-                   "xor{l} {$src, $dst|$dst, $src}",
+                   "xor{l}\t{$src, $dst|$dst, $src}",
                    [(store (xor (load addr:$dst), GR32:$src), addr:$dst)]>;
   def XOR8mi   : Ii8<0x80, MRM6m,
                      (outs), (ins i8mem :$dst, i8imm :$src),
-                     "xor{b} {$src, $dst|$dst, $src}",
+                     "xor{b}\t{$src, $dst|$dst, $src}",
                     [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst)]>;
   def XOR16mi  : Ii16<0x81, MRM6m,
                       (outs), (ins i16mem:$dst, i16imm:$src),
-                      "xor{w} {$src, $dst|$dst, $src}",
+                      "xor{w}\t{$src, $dst|$dst, $src}",
                    [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst)]>,
                       OpSize;
   def XOR32mi  : Ii32<0x81, MRM6m,
                       (outs), (ins i32mem:$dst, i32imm:$src),
-                      "xor{l} {$src, $dst|$dst, $src}",
+                      "xor{l}\t{$src, $dst|$dst, $src}",
                    [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst)]>;
   def XOR16mi8 : Ii8<0x83, MRM6m,
                      (outs), (ins i16mem:$dst, i16i8imm :$src),
-                     "xor{w} {$src, $dst|$dst, $src}",
+                     "xor{w}\t{$src, $dst|$dst, $src}",
                  [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst)]>,
                      OpSize;
   def XOR32mi8 : Ii8<0x83, MRM6m,
                      (outs), (ins i32mem:$dst, i32i8imm :$src),
-                     "xor{l} {$src, $dst|$dst, $src}",
+                     "xor{l}\t{$src, $dst|$dst, $src}",
                  [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
 }
 
 // Shift instructions
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shl{b} {%cl, $dst|$dst, %CL}",
+                 "shl{b}\t{%cl, $dst|$dst, %CL}",
                  [(set GR8:$dst, (shl GR8:$src, CL))]>, Imp<[CL],[]>;
 def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
-                 "shl{w} {%cl, $dst|$dst, %CL}",
+                 "shl{w}\t{%cl, $dst|$dst, %CL}",
                  [(set GR16:$dst, (shl GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
 def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
-                 "shl{l} {%cl, $dst|$dst, %CL}",
+                 "shl{l}\t{%cl, $dst|$dst, %CL}",
                  [(set GR32:$dst, (shl GR32:$src, CL))]>, Imp<[CL],[]>;
 
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "shl{b} {$src2, $dst|$dst, $src2}",
+                   "shl{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "shl{w} {$src2, $dst|$dst, $src2}",
+                   "shl{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
 def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "shl{l} {$src2, $dst|$dst, $src2}",
+                   "shl{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
 }
 
 // Shift left by one. Not used because (add x, x) is slightly cheaper.
 def SHL8r1   : I<0xD0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "shl{b} $dst", []>;
+                 "shl{b}\t$dst", []>;
 def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shl{w} $dst", []>, OpSize;
+                 "shl{w}\t$dst", []>, OpSize;
 def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shl{l} $dst", []>;
+                 "shl{l}\t$dst", []>;
 
 let isTwoAddress = 0 in {
   def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b} {%cl, $dst|$dst, %CL}",
+                   "shl{b}\t{%cl, $dst|$dst, %CL}",
                    [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w} {%cl, $dst|$dst, %CL}",
+                   "shl{w}\t{%cl, $dst|$dst, %CL}",
                    [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>, OpSize;
   def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l} {%cl, $dst|$dst, %CL}",
+                   "shl{l}\t{%cl, $dst|$dst, %CL}",
                    [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "shl{b} {$src, $dst|$dst, $src}",
+                     "shl{b}\t{$src, $dst|$dst, $src}",
                   [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
   def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "shl{w} {$src, $dst|$dst, $src}",
+                     "shl{w}\t{$src, $dst|$dst, $src}",
                  [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
                      OpSize;
   def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "shl{l} {$src, $dst|$dst, $src}",
+                     "shl{l}\t{$src, $dst|$dst, $src}",
                  [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 
   // Shift by 1
   def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b} $dst",
+                   "shl{b}\t$dst",
                   [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
   def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w} $dst",
+                   "shl{w}\t$dst",
                  [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                      OpSize;
   def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l} $dst",
+                   "shl{l}\t$dst",
                  [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
 }
 
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shr{b} {%cl, $dst|$dst, %CL}",
+                 "shr{b}\t{%cl, $dst|$dst, %CL}",
                  [(set GR8:$dst, (srl GR8:$src, CL))]>, Imp<[CL],[]>;
 def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
-                 "shr{w} {%cl, $dst|$dst, %CL}",
+                 "shr{w}\t{%cl, $dst|$dst, %CL}",
                  [(set GR16:$dst, (srl GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
 def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
-                 "shr{l} {%cl, $dst|$dst, %CL}",
+                 "shr{l}\t{%cl, $dst|$dst, %CL}",
                  [(set GR32:$dst, (srl GR32:$src, CL))]>, Imp<[CL],[]>;
 
 def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                   "shr{b} {$src2, $dst|$dst, $src2}",
+                   "shr{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
 def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "shr{w} {$src2, $dst|$dst, $src2}",
+                   "shr{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
 def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "shr{l} {$src2, $dst|$dst, $src2}",
+                   "shr{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
 
 // Shift by 1
 def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
-                 "shr{b} $dst",
+                 "shr{b}\t$dst",
                  [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
 def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shr{w} $dst",
+                 "shr{w}\t$dst",
                  [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
 def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shr{l} $dst",
+                 "shr{l}\t$dst",
                  [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
 
 let isTwoAddress = 0 in {
   def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b} {%cl, $dst|$dst, %CL}",
+                   "shr{b}\t{%cl, $dst|$dst, %CL}",
                    [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w} {%cl, $dst|$dst, %CL}",
+                   "shr{w}\t{%cl, $dst|$dst, %CL}",
                    [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>, OpSize;
   def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l} {%cl, $dst|$dst, %CL}",
+                   "shr{l}\t{%cl, $dst|$dst, %CL}",
                    [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "shr{b} {$src, $dst|$dst, $src}",
+                     "shr{b}\t{$src, $dst|$dst, $src}",
                   [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
   def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "shr{w} {$src, $dst|$dst, $src}",
+                     "shr{w}\t{$src, $dst|$dst, $src}",
                  [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
                      OpSize;
   def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "shr{l} {$src, $dst|$dst, $src}",
+                     "shr{l}\t{$src, $dst|$dst, $src}",
                  [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 
   // Shift by 1
   def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b} $dst",
+                   "shr{b}\t$dst",
                   [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
   def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w} $dst",
+                   "shr{w}\t$dst",
                  [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
   def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l} $dst",
+                   "shr{l}\t$dst",
                  [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
 }
 
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "sar{b} {%cl, $dst|$dst, %CL}",
+                 "sar{b}\t{%cl, $dst|$dst, %CL}",
                  [(set GR8:$dst, (sra GR8:$src, CL))]>, Imp<[CL],[]>;
 def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
-                 "sar{w} {%cl, $dst|$dst, %CL}",
+                 "sar{w}\t{%cl, $dst|$dst, %CL}",
                  [(set GR16:$dst, (sra GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
 def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
-                 "sar{l} {%cl, $dst|$dst, %CL}",
+                 "sar{l}\t{%cl, $dst|$dst, %CL}",
                  [(set GR32:$dst, (sra GR32:$src, CL))]>, Imp<[CL],[]>;
 
 def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "sar{b} {$src2, $dst|$dst, $src2}",
+                   "sar{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
 def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "sar{w} {$src2, $dst|$dst, $src2}",
+                   "sar{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
                    OpSize;
 def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "sar{l} {$src2, $dst|$dst, $src2}",
+                   "sar{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
 
 // Shift by 1
 def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "sar{b} $dst",
+                 "sar{b}\t$dst",
                  [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
 def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
-                 "sar{w} $dst",
+                 "sar{w}\t$dst",
                  [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
 def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
-                 "sar{l} $dst",
+                 "sar{l}\t$dst",
                  [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
 
 let isTwoAddress = 0 in {
   def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b} {%cl, $dst|$dst, %CL}",
+                   "sar{b}\t{%cl, $dst|$dst, %CL}",
                    [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w} {%cl, $dst|$dst, %CL}",
+                   "sar{w}\t{%cl, $dst|$dst, %CL}",
                    [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>, OpSize;
   def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
-                   "sar{l} {%cl, $dst|$dst, %CL}",
+                   "sar{l}\t{%cl, $dst|$dst, %CL}",
                    [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "sar{b} {$src, $dst|$dst, $src}",
+                     "sar{b}\t{$src, $dst|$dst, $src}",
                   [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
   def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "sar{w} {$src, $dst|$dst, $src}",
+                     "sar{w}\t{$src, $dst|$dst, $src}",
                  [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
                      OpSize;
   def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "sar{l} {$src, $dst|$dst, $src}",
+                     "sar{l}\t{$src, $dst|$dst, $src}",
                  [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 
   // Shift by 1
   def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b} $dst",
+                   "sar{b}\t$dst",
                   [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
   def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w} $dst",
+                   "sar{w}\t$dst",
                  [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                      OpSize;
   def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
-                   "sar{l} $dst",
+                   "sar{l}\t$dst",
                  [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
 }
 
 // Rotate instructions
 // FIXME: provide shorter instructions when imm8 == 1
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "rol{b} {%cl, $dst|$dst, %CL}",
+                 "rol{b}\t{%cl, $dst|$dst, %CL}",
                  [(set GR8:$dst, (rotl GR8:$src, CL))]>, Imp<[CL],[]>;
 def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
-                 "rol{w} {%cl, $dst|$dst, %CL}",
+                 "rol{w}\t{%cl, $dst|$dst, %CL}",
                  [(set GR16:$dst, (rotl GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
 def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
-                 "rol{l} {%cl, $dst|$dst, %CL}",
+                 "rol{l}\t{%cl, $dst|$dst, %CL}",
                  [(set GR32:$dst, (rotl GR32:$src, CL))]>, Imp<[CL],[]>;
 
 def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "rol{b} {$src2, $dst|$dst, $src2}",
+                   "rol{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
 def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "rol{w} {$src2, $dst|$dst, $src2}",
+                   "rol{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
 def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "rol{l} {$src2, $dst|$dst, $src2}",
+                   "rol{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
 
 // Rotate by 1
 def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "rol{b} $dst",
+                 "rol{b}\t$dst",
                  [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
 def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rol{w} $dst",
+                 "rol{w}\t$dst",
                  [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
 def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rol{l} $dst",
+                 "rol{l}\t$dst",
                  [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
 
 let isTwoAddress = 0 in {
   def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b} {%cl, $dst|$dst, %CL}",
+                   "rol{b}\t{%cl, $dst|$dst, %CL}",
                    [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w} {%cl, $dst|$dst, %CL}",
+                   "rol{w}\t{%cl, $dst|$dst, %CL}",
                    [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>, OpSize;
   def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l} {%cl, $dst|$dst, %CL}",
+                   "rol{l}\t{%cl, $dst|$dst, %CL}",
                    [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "rol{b} {$src, $dst|$dst, $src}",
+                     "rol{b}\t{$src, $dst|$dst, $src}",
                  [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
   def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "rol{w} {$src, $dst|$dst, $src}",
+                     "rol{w}\t{$src, $dst|$dst, $src}",
                 [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
                      OpSize;
   def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "rol{l} {$src, $dst|$dst, $src}",
+                     "rol{l}\t{$src, $dst|$dst, $src}",
                 [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 
   // Rotate by 1
   def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b} $dst",
+                   "rol{b}\t$dst",
                  [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
   def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w} $dst",
+                   "rol{w}\t$dst",
                 [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                      OpSize;
   def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l} $dst",
+                   "rol{l}\t$dst",
                 [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
 }
 
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "ror{b} {%cl, $dst|$dst, %CL}",
+                 "ror{b}\t{%cl, $dst|$dst, %CL}",
                  [(set GR8:$dst, (rotr GR8:$src, CL))]>, Imp<[CL],[]>;
 def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
-                 "ror{w} {%cl, $dst|$dst, %CL}",
+                 "ror{w}\t{%cl, $dst|$dst, %CL}",
                  [(set GR16:$dst, (rotr GR16:$src, CL))]>, Imp<[CL],[]>, OpSize;
 def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
-                 "ror{l} {%cl, $dst|$dst, %CL}",
+                 "ror{l}\t{%cl, $dst|$dst, %CL}",
                  [(set GR32:$dst, (rotr GR32:$src, CL))]>, Imp<[CL],[]>;
 
 def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "ror{b} {$src2, $dst|$dst, $src2}",
+                   "ror{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
 def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "ror{w} {$src2, $dst|$dst, $src2}",
+                   "ror{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, OpSize;
 def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "ror{l} {$src2, $dst|$dst, $src2}",
+                   "ror{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
 
 // Rotate by 1
 def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "ror{b} $dst",
+                 "ror{b}\t$dst",
                  [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
 def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
-                 "ror{w} $dst",
+                 "ror{w}\t$dst",
                  [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
 def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
-                 "ror{l} $dst",
+                 "ror{l}\t$dst",
                  [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
 
 let isTwoAddress = 0 in {
   def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b} {%cl, $dst|$dst, %CL}",
+                   "ror{b}\t{%cl, $dst|$dst, %CL}",
                    [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w} {%cl, $dst|$dst, %CL}",
+                   "ror{w}\t{%cl, $dst|$dst, %CL}",
                    [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>, OpSize;
   def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
-                   "ror{l} {%cl, $dst|$dst, %CL}",
+                   "ror{l}\t{%cl, $dst|$dst, %CL}",
                    [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>,
                    Imp<[CL],[]>;
   def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "ror{b} {$src, $dst|$dst, $src}",
+                     "ror{b}\t{$src, $dst|$dst, $src}",
                  [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
   def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "ror{w} {$src, $dst|$dst, $src}",
+                     "ror{w}\t{$src, $dst|$dst, $src}",
                 [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
                      OpSize;
   def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "ror{l} {$src, $dst|$dst, $src}",
+                     "ror{l}\t{$src, $dst|$dst, $src}",
                 [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 
   // Rotate by 1
   def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b} $dst",
+                   "ror{b}\t$dst",
                  [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
   def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w} $dst",
+                   "ror{w}\t$dst",
                 [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                      OpSize;
   def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
-                   "ror{l} $dst",
+                   "ror{l}\t$dst",
                 [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
 }
 
@@ -1753,44 +1752,44 @@
 
 // Double shift instructions (generalizations of rotate)
 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                    [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>,
                    Imp<[CL],[]>, TB;
 def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shrd{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                    [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>,
                    Imp<[CL],[]>, TB;
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shld{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                    [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
                    Imp<[CL],[]>, TB, OpSize;
 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shrd{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                    [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
                    Imp<[CL],[]>, TB, OpSize;
 
 let isCommutable = 1 in {  // These instructions commute to each other.
 def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
                      (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3),
-                     "shld{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
                                       (i8 imm:$src3)))]>,
                  TB;
 def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
                      (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3),
-                     "shrd{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
                                       (i8 imm:$src3)))]>,
                  TB;
 def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
                      (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3),
-                     "shld{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
                                       (i8 imm:$src3)))]>,
                      TB, OpSize;
 def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
                      (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3),
-                     "shrd{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
                                       (i8 imm:$src3)))]>,
                      TB, OpSize;
@@ -1798,47 +1797,47 @@
 
 let isTwoAddress = 0 in {
   def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                     "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                      [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
                        addr:$dst)]>,
                      Imp<[CL],[]>, TB;
   def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "shrd{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
                       addr:$dst)]>,
                     Imp<[CL],[]>, TB;
   def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
                       (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
-                      "shld{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                      "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
                                         (i8 imm:$src3)), addr:$dst)]>,
                       TB;
   def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
                        (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
-                       "shrd{l} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                       "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                        [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
                                          (i8 imm:$src3)), addr:$dst)]>,
                        TB;
 
   def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                     "shld{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                      [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
                        addr:$dst)]>,
                      Imp<[CL],[]>, TB, OpSize;
   def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "shrd{w} {%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
                     [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
                       addr:$dst)]>,
                     Imp<[CL],[]>, TB, OpSize;
   def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
                       (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
-                      "shld{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                      "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
                                         (i8 imm:$src3)), addr:$dst)]>,
                       TB, OpSize;
   def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
                        (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
-                       "shrd{w} {$src3, $src2, $dst|$dst, $src2, $src3}",
+                       "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
                                         (i8 imm:$src3)), addr:$dst)]>,
                        TB, OpSize;
@@ -1848,211 +1847,211 @@
 // Arithmetic.
 let isCommutable = 1 in {   // X = ADD Y, Z   --> X = ADD Z, Y
 def ADD8rr   : I<0x00, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                 "add{b} {$src2, $dst|$dst, $src2}",
+                 "add{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (add GR8:$src1, GR8:$src2))]>;
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def ADD16rr  : I<0x01, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                 "add{w} {$src2, $dst|$dst, $src2}",
+                 "add{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (add GR16:$src1, GR16:$src2))]>, OpSize;
 def ADD32rr  : I<0x01, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "add{l} {$src2, $dst|$dst, $src2}",
+                 "add{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>;
 } // end isConvertibleToThreeAddress
 } // end isCommutable
 def ADD8rm   : I<0x02, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
-                 "add{b} {$src2, $dst|$dst, $src2}",
+                 "add{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (add GR8:$src1, (load addr:$src2)))]>;
 def ADD16rm  : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "add{w} {$src2, $dst|$dst, $src2}",
+                 "add{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (add GR16:$src1, (load addr:$src2)))]>, OpSize;
 def ADD32rm  : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "add{l} {$src2, $dst|$dst, $src2}",
+                 "add{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (add GR32:$src1, (load addr:$src2)))]>;
 
 def ADD8ri   : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                   "add{b} {$src2, $dst|$dst, $src2}",
+                   "add{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (add GR8:$src1, imm:$src2))]>;
 
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def ADD16ri  : Ii16<0x81, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "add{w} {$src2, $dst|$dst, $src2}",
+                    "add{w}\t{$src2, $dst|$dst, $src2}",
                     [(set GR16:$dst, (add GR16:$src1, imm:$src2))]>, OpSize;
 def ADD32ri  : Ii32<0x81, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "add{l} {$src2, $dst|$dst, $src2}",
+                    "add{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (add GR32:$src1, imm:$src2))]>;
 def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "add{w} {$src2, $dst|$dst, $src2}",
+                   "add{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2))]>,
                    OpSize;
 def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "add{l} {$src2, $dst|$dst, $src2}",
+                   "add{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2))]>;
 }
 
 let isTwoAddress = 0 in {
   def ADD8mr   : I<0x00, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
-                   "add{b} {$src2, $dst|$dst, $src2}",
+                   "add{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (add (load addr:$dst), GR8:$src2), addr:$dst)]>;
   def ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "add{w} {$src2, $dst|$dst, $src2}",
+                   "add{w}\t{$src2, $dst|$dst, $src2}",
                    [(store (add (load addr:$dst), GR16:$src2), addr:$dst)]>,
                    OpSize;
   def ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                   "add{l} {$src2, $dst|$dst, $src2}",
+                   "add{l}\t{$src2, $dst|$dst, $src2}",
                    [(store (add (load addr:$dst), GR32:$src2), addr:$dst)]>;
   def ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
-                     "add{b} {$src2, $dst|$dst, $src2}",
+                     "add{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
   def ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                      "add{w} {$src2, $dst|$dst, $src2}",
+                      "add{w}\t{$src2, $dst|$dst, $src2}",
                   [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
                    OpSize;
   def ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                      "add{l} {$src2, $dst|$dst, $src2}",
+                      "add{l}\t{$src2, $dst|$dst, $src2}",
                   [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
   def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                     "add{w} {$src2, $dst|$dst, $src2}",
+                     "add{w}\t{$src2, $dst|$dst, $src2}",
                 [(store (add (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
                    OpSize;
   def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "add{l} {$src2, $dst|$dst, $src2}",
+                     "add{l}\t{$src2, $dst|$dst, $src2}",
                 [(store (add (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
 }
 
 let isCommutable = 1 in {  // X = ADC Y, Z --> X = ADC Z, Y
 def ADC32rr  : I<0x11, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "adc{l} {$src2, $dst|$dst, $src2}",
+                 "adc{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
 }
 def ADC32rm  : I<0x13, MRMSrcMem , (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "adc{l} {$src2, $dst|$dst, $src2}",
+                 "adc{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
 def ADC32ri  : Ii32<0x81, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "adc{l} {$src2, $dst|$dst, $src2}",
+                    "adc{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
 def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "adc{l} {$src2, $dst|$dst, $src2}",
+                   "adc{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
 
 let isTwoAddress = 0 in {
   def ADC32mr  : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                   "adc{l} {$src2, $dst|$dst, $src2}",
+                   "adc{l}\t{$src2, $dst|$dst, $src2}",
                    [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
   def ADC32mi  : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                      "adc{l} {$src2, $dst|$dst, $src2}",
+                      "adc{l}\t{$src2, $dst|$dst, $src2}",
                   [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
   def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "adc{l} {$src2, $dst|$dst, $src2}",
+                     "adc{l}\t{$src2, $dst|$dst, $src2}",
              [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
 }
 
 def SUB8rr   : I<0x28, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                 "sub{b} {$src2, $dst|$dst, $src2}",
+                 "sub{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (sub GR8:$src1, GR8:$src2))]>;
 def SUB16rr  : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                 "sub{w} {$src2, $dst|$dst, $src2}",
+                 "sub{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (sub GR16:$src1, GR16:$src2))]>, OpSize;
 def SUB32rr  : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "sub{l} {$src2, $dst|$dst, $src2}",
+                 "sub{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
 def SUB8rm   : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
-                 "sub{b} {$src2, $dst|$dst, $src2}",
+                 "sub{b}\t{$src2, $dst|$dst, $src2}",
                  [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2)))]>;
 def SUB16rm  : I<0x2B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "sub{w} {$src2, $dst|$dst, $src2}",
+                 "sub{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2)))]>, OpSize;
 def SUB32rm  : I<0x2B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "sub{l} {$src2, $dst|$dst, $src2}",
+                 "sub{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2)))]>;
 
 def SUB8ri   : Ii8 <0x80, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "sub{b} {$src2, $dst|$dst, $src2}",
+                    "sub{b}\t{$src2, $dst|$dst, $src2}",
                     [(set GR8:$dst, (sub GR8:$src1, imm:$src2))]>;
 def SUB16ri  : Ii16<0x81, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "sub{w} {$src2, $dst|$dst, $src2}",
+                    "sub{w}\t{$src2, $dst|$dst, $src2}",
                     [(set GR16:$dst, (sub GR16:$src1, imm:$src2))]>, OpSize;
 def SUB32ri  : Ii32<0x81, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "sub{l} {$src2, $dst|$dst, $src2}",
+                    "sub{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (sub GR32:$src1, imm:$src2))]>;
 def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "sub{w} {$src2, $dst|$dst, $src2}",
+                   "sub{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2))]>,
                    OpSize;
 def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "sub{l} {$src2, $dst|$dst, $src2}",
+                   "sub{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2))]>;
 let isTwoAddress = 0 in {
   def SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
-                   "sub{b} {$src2, $dst|$dst, $src2}",
+                   "sub{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (sub (load addr:$dst), GR8:$src2), addr:$dst)]>;
   def SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "sub{w} {$src2, $dst|$dst, $src2}",
+                   "sub{w}\t{$src2, $dst|$dst, $src2}",
                    [(store (sub (load addr:$dst), GR16:$src2), addr:$dst)]>,
                    OpSize;
   def SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                   "sub{l} {$src2, $dst|$dst, $src2}",
+                   "sub{l}\t{$src2, $dst|$dst, $src2}",
                    [(store (sub (load addr:$dst), GR32:$src2), addr:$dst)]>;
   def SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
-                     "sub{b} {$src2, $dst|$dst, $src2}",
+                     "sub{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
   def SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                      "sub{w} {$src2, $dst|$dst, $src2}",
+                      "sub{w}\t{$src2, $dst|$dst, $src2}",
                   [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
                    OpSize;
   def SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                      "sub{l} {$src2, $dst|$dst, $src2}",
+                      "sub{l}\t{$src2, $dst|$dst, $src2}",
                   [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
   def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
-                     "sub{w} {$src2, $dst|$dst, $src2}",
+                     "sub{w}\t{$src2, $dst|$dst, $src2}",
                 [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
                    OpSize;
   def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
-                     "sub{l} {$src2, $dst|$dst, $src2}",
+                     "sub{l}\t{$src2, $dst|$dst, $src2}",
                 [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
 }
 
 def SBB32rr    : I<0x19, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "sbb{l} {$src2, $dst|$dst, $src2}",
+                  "sbb{l}\t{$src2, $dst|$dst, $src2}",
                   [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
 
 let isTwoAddress = 0 in {
   def SBB32mr  : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                   "sbb{l} {$src2, $dst|$dst, $src2}",
+                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
                    [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
   def SBB8mi  : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
-                      "sbb{b} {$src2, $dst|$dst, $src2}",
+                      "sbb{b}\t{$src2, $dst|$dst, $src2}",
                    [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
   def SBB32mi  : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                      "sbb{l} {$src2, $dst|$dst, $src2}",
+                      "sbb{l}\t{$src2, $dst|$dst, $src2}",
                   [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
   def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
-                     "sbb{l} {$src2, $dst|$dst, $src2}",
+                     "sbb{l}\t{$src2, $dst|$dst, $src2}",
              [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
 }
 def SBB32rm  : I<0x1B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                    "sbb{l} {$src2, $dst|$dst, $src2}",
+                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
 def SBB32ri  : Ii32<0x81, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "sbb{l} {$src2, $dst|$dst, $src2}",
+                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
                     [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
 def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "sbb{l} {$src2, $dst|$dst, $src2}",
+                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
 
 let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
 def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                 "imul{w} {$src2, $dst|$dst, $src2}",
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (mul GR16:$src1, GR16:$src2))]>, TB, OpSize;
 def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "imul{l} {$src2, $dst|$dst, $src2}",
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>, TB;
 }
 def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "imul{w} {$src2, $dst|$dst, $src2}",
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2)))]>,
                  TB, OpSize;
 def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "imul{l} {$src2, $dst|$dst, $src2}",
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2)))]>, TB;
 
 } // end Two Address instructions
@@ -2060,39 +2059,39 @@
 // Suprisingly enough, these are not two address instructions!
 def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                       (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                      "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR16:$dst, (mul GR16:$src1, imm:$src2))]>, OpSize;
 def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                      "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                      (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                     "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2))]>,
                      OpSize;
 def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
                      (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                     "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2))]>;
 
 def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                      // GR16 = [mem16]*I16
                       (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
-                      "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR16:$dst, (mul (load addr:$src1), imm:$src2))]>,
                       OpSize;
 def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                      // GR32 = [mem32]*I32
                       (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
-                      "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR32:$dst, (mul (load addr:$src1), imm:$src2))]>;
 def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                      (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
-                     "imul{w} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                   [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2))]>,
                      OpSize;
 def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                      (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
-                     "imul{l} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                   [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2))]>;
 
 //===----------------------------------------------------------------------===//
@@ -2100,52 +2099,52 @@
 //
 let isCommutable = 1 in {   // TEST X, Y   --> TEST Y, X
 def TEST8rr  : I<0x84, MRMDestReg, (outs),  (ins GR8:$src1, GR8:$src2),
-                 "test{b} {$src2, $src1|$src1, $src2}",
+                 "test{b}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (and GR8:$src1, GR8:$src2), 0)]>;
 def TEST16rr : I<0x85, MRMDestReg, (outs),  (ins GR16:$src1, GR16:$src2),
-                 "test{w} {$src2, $src1|$src1, $src2}",
+                 "test{w}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (and GR16:$src1, GR16:$src2), 0)]>, OpSize;
 def TEST32rr : I<0x85, MRMDestReg, (outs),  (ins GR32:$src1, GR32:$src2),
-                 "test{l} {$src2, $src1|$src1, $src2}",
+                 "test{l}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (and GR32:$src1, GR32:$src2), 0)]>;
 }
 
 def TEST8rm  : I<0x84, MRMSrcMem, (outs),  (ins GR8 :$src1, i8mem :$src2),
-                 "test{b} {$src2, $src1|$src1, $src2}",
+                 "test{b}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0)]>;
 def TEST16rm : I<0x85, MRMSrcMem, (outs),  (ins GR16:$src1, i16mem:$src2),
-                 "test{w} {$src2, $src1|$src1, $src2}",
+                 "test{w}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0)]>,
                OpSize;
 def TEST32rm : I<0x85, MRMSrcMem, (outs),  (ins GR32:$src1, i32mem:$src2),
-                 "test{l} {$src2, $src1|$src1, $src2}",
+                 "test{l}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0)]>;
 
 def TEST8ri  : Ii8 <0xF6, MRM0r,                     // flags = GR8  & imm8
                     (outs),  (ins GR8:$src1, i8imm:$src2),
-                    "test{b} {$src2, $src1|$src1, $src2}",
+                    "test{b}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (and GR8:$src1, imm:$src2), 0)]>;
 def TEST16ri : Ii16<0xF7, MRM0r,                     // flags = GR16 & imm16
                     (outs),  (ins GR16:$src1, i16imm:$src2),
-                    "test{w} {$src2, $src1|$src1, $src2}",
+                    "test{w}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (and GR16:$src1, imm:$src2), 0)]>, OpSize;
 def TEST32ri : Ii32<0xF7, MRM0r,                     // flags = GR32 & imm32
                     (outs),  (ins GR32:$src1, i32imm:$src2),
-                    "test{l} {$src2, $src1|$src1, $src2}",
+                    "test{l}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (and GR32:$src1, imm:$src2), 0)]>;
 
 def TEST8mi  : Ii8 <0xF6, MRM0m,                     // flags = [mem8]  & imm8
                     (outs), (ins i8mem:$src1, i8imm:$src2),
-                    "test{b} {$src2, $src1|$src1, $src2}",
+                    "test{b}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0)]>;
 def TEST16mi : Ii16<0xF7, MRM0m,                     // flags = [mem16] & imm16
                     (outs), (ins i16mem:$src1, i16imm:$src2),
-                    "test{w} {$src2, $src1|$src1, $src2}",
+                    "test{w}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0)]>,
                OpSize;
 def TEST32mi : Ii32<0xF7, MRM0m,                     // flags = [mem32] & imm32
                     (outs), (ins i32mem:$src1, i32imm:$src2),
-                    "test{l} {$src2, $src1|$src1, $src2}",
+                    "test{l}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0)]>;
 
 
@@ -2155,262 +2154,262 @@
 
 def SETEr    : I<0x94, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "sete $dst",
+                 "sete\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_E))]>,
                TB;                        // GR8 = ==
 def SETEm    : I<0x94, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "sete $dst",
+                 "sete\t$dst",
                  [(store (X86setcc X86_COND_E), addr:$dst)]>,
                TB;                        // [mem8] = ==
 def SETNEr   : I<0x95, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setne $dst",
+                 "setne\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_NE))]>,
                TB;                        // GR8 = !=
 def SETNEm   : I<0x95, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setne $dst",
+                 "setne\t$dst",
                  [(store (X86setcc X86_COND_NE), addr:$dst)]>,
                TB;                        // [mem8] = !=
 def SETLr    : I<0x9C, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setl $dst",
+                 "setl\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_L))]>,
                TB;                        // GR8 = <  signed
 def SETLm    : I<0x9C, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setl $dst",
+                 "setl\t$dst",
                  [(store (X86setcc X86_COND_L), addr:$dst)]>,
                TB;                        // [mem8] = <  signed
 def SETGEr   : I<0x9D, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setge $dst",
+                 "setge\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_GE))]>,
                TB;                        // GR8 = >= signed
 def SETGEm   : I<0x9D, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setge $dst",
+                 "setge\t$dst",
                  [(store (X86setcc X86_COND_GE), addr:$dst)]>,
                TB;                        // [mem8] = >= signed
 def SETLEr   : I<0x9E, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setle $dst",
+                 "setle\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_LE))]>,
                TB;                        // GR8 = <= signed
 def SETLEm   : I<0x9E, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setle $dst",
+                 "setle\t$dst",
                  [(store (X86setcc X86_COND_LE), addr:$dst)]>,
                TB;                        // [mem8] = <= signed
 def SETGr    : I<0x9F, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setg $dst",
+                 "setg\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_G))]>,
                TB;                        // GR8 = >  signed
 def SETGm    : I<0x9F, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setg $dst",
+                 "setg\t$dst",
                  [(store (X86setcc X86_COND_G), addr:$dst)]>,
                TB;                        // [mem8] = >  signed
 
 def SETBr    : I<0x92, MRM0r,
                  (outs GR8   :$dst), (ins),
-                 "setb $dst",
+                 "setb\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_B))]>,
                TB;                        // GR8 = <  unsign
 def SETBm    : I<0x92, MRM0m,
                  (outs), (ins i8mem:$dst),
-                 "setb $dst",
+                 "setb\t$dst",
                  [(store (X86setcc X86_COND_B), addr:$dst)]>,
                TB;                        // [mem8] = <  unsign
 def SETAEr   : I<0x93, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setae $dst",
+                 "setae\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_AE))]>,
                TB;                        // GR8 = >= unsign
 def SETAEm   : I<0x93, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setae $dst",
+                 "setae\t$dst",
                  [(store (X86setcc X86_COND_AE), addr:$dst)]>,
                TB;                        // [mem8] = >= unsign
 def SETBEr   : I<0x96, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setbe $dst",
+                 "setbe\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_BE))]>,
                TB;                        // GR8 = <= unsign
 def SETBEm   : I<0x96, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setbe $dst",
+                 "setbe\t$dst",
                  [(store (X86setcc X86_COND_BE), addr:$dst)]>,
                TB;                        // [mem8] = <= unsign
 def SETAr    : I<0x97, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "seta $dst",
+                 "seta\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_A))]>,
                TB;                        // GR8 = >  signed
 def SETAm    : I<0x97, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "seta $dst",
+                 "seta\t$dst",
                  [(store (X86setcc X86_COND_A), addr:$dst)]>,
                TB;                        // [mem8] = >  signed
 
 def SETSr    : I<0x98, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "sets $dst",
+                 "sets\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_S))]>,
                TB;                        // GR8 = <sign bit>
 def SETSm    : I<0x98, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "sets $dst",
+                 "sets\t$dst",
                  [(store (X86setcc X86_COND_S), addr:$dst)]>,
                TB;                        // [mem8] = <sign bit>
 def SETNSr   : I<0x99, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setns $dst",
+                 "setns\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_NS))]>,
                TB;                        // GR8 = !<sign bit>
 def SETNSm   : I<0x99, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setns $dst",
+                 "setns\t$dst",
                  [(store (X86setcc X86_COND_NS), addr:$dst)]>,
                TB;                        // [mem8] = !<sign bit>
 def SETPr    : I<0x9A, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setp $dst",
+                 "setp\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_P))]>,
                TB;                        // GR8 = parity
 def SETPm    : I<0x9A, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setp $dst",
+                 "setp\t$dst",
                  [(store (X86setcc X86_COND_P), addr:$dst)]>,
                TB;                        // [mem8] = parity
 def SETNPr   : I<0x9B, MRM0r, 
                  (outs GR8   :$dst), (ins),
-                 "setnp $dst",
+                 "setnp\t$dst",
                  [(set GR8:$dst, (X86setcc X86_COND_NP))]>,
                TB;                        // GR8 = not parity
 def SETNPm   : I<0x9B, MRM0m, 
                  (outs), (ins i8mem:$dst),
-                 "setnp $dst",
+                 "setnp\t$dst",
                  [(store (X86setcc X86_COND_NP), addr:$dst)]>,
                TB;                        // [mem8] = not parity
 
 // Integer comparisons
 def CMP8rr  : I<0x38, MRMDestReg,
                 (outs), (ins GR8 :$src1, GR8 :$src2),
-                "cmp{b} {$src2, $src1|$src1, $src2}",
+                "cmp{b}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR8:$src1, GR8:$src2)]>;
 def CMP16rr : I<0x39, MRMDestReg,
                 (outs), (ins GR16:$src1, GR16:$src2),
-                "cmp{w} {$src2, $src1|$src1, $src2}",
+                "cmp{w}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR16:$src1, GR16:$src2)]>, OpSize;
 def CMP32rr : I<0x39, MRMDestReg,
                 (outs), (ins GR32:$src1, GR32:$src2),
-                "cmp{l} {$src2, $src1|$src1, $src2}",
+                "cmp{l}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR32:$src1, GR32:$src2)]>;
 def CMP8mr  : I<0x38, MRMDestMem,
                 (outs), (ins i8mem :$src1, GR8 :$src2),
-                "cmp{b} {$src2, $src1|$src1, $src2}",
+                "cmp{b}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp (loadi8 addr:$src1), GR8:$src2)]>;
 def CMP16mr : I<0x39, MRMDestMem,
                 (outs), (ins i16mem:$src1, GR16:$src2),
-                "cmp{w} {$src2, $src1|$src1, $src2}",
+                "cmp{w}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp (loadi16 addr:$src1), GR16:$src2)]>, OpSize;
 def CMP32mr : I<0x39, MRMDestMem,
                 (outs), (ins i32mem:$src1, GR32:$src2),
-                "cmp{l} {$src2, $src1|$src1, $src2}",
+                "cmp{l}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp (loadi32 addr:$src1), GR32:$src2)]>;
 def CMP8rm  : I<0x3A, MRMSrcMem,
                 (outs), (ins GR8 :$src1, i8mem :$src2),
-                "cmp{b} {$src2, $src1|$src1, $src2}",
+                "cmp{b}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR8:$src1, (loadi8 addr:$src2))]>;
 def CMP16rm : I<0x3B, MRMSrcMem,
                 (outs), (ins GR16:$src1, i16mem:$src2),
-                "cmp{w} {$src2, $src1|$src1, $src2}",
+                "cmp{w}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR16:$src1, (loadi16 addr:$src2))]>, OpSize;
 def CMP32rm : I<0x3B, MRMSrcMem,
                 (outs), (ins GR32:$src1, i32mem:$src2),
-                "cmp{l} {$src2, $src1|$src1, $src2}",
+                "cmp{l}\t{$src2, $src1|$src1, $src2}",
                 [(X86cmp GR32:$src1, (loadi32 addr:$src2))]>;
 def CMP8ri  : Ii8<0x80, MRM7r,
                   (outs), (ins GR8:$src1, i8imm:$src2),
-                  "cmp{b} {$src2, $src1|$src1, $src2}",
+                  "cmp{b}\t{$src2, $src1|$src1, $src2}",
                   [(X86cmp GR8:$src1, imm:$src2)]>;
 def CMP16ri : Ii16<0x81, MRM7r,
                    (outs), (ins GR16:$src1, i16imm:$src2),
-                   "cmp{w} {$src2, $src1|$src1, $src2}",
+                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp GR16:$src1, imm:$src2)]>, OpSize;
 def CMP32ri : Ii32<0x81, MRM7r,
                    (outs), (ins GR32:$src1, i32imm:$src2),
-                   "cmp{l} {$src2, $src1|$src1, $src2}",
+                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp GR32:$src1, imm:$src2)]>;
 def CMP8mi  : Ii8 <0x80, MRM7m,
                    (outs), (ins i8mem :$src1, i8imm :$src2),
-                   "cmp{b} {$src2, $src1|$src1, $src2}",
+                   "cmp{b}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp (loadi8 addr:$src1), imm:$src2)]>;
 def CMP16mi : Ii16<0x81, MRM7m,
                    (outs), (ins i16mem:$src1, i16imm:$src2),
-                   "cmp{w} {$src2, $src1|$src1, $src2}",
+                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp (loadi16 addr:$src1), imm:$src2)]>, OpSize;
 def CMP32mi : Ii32<0x81, MRM7m,
                    (outs), (ins i32mem:$src1, i32imm:$src2),
-                   "cmp{l} {$src2, $src1|$src1, $src2}",
+                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp (loadi32 addr:$src1), imm:$src2)]>;
 def CMP16ri8 : Ii8<0x83, MRM7r,
                    (outs), (ins GR16:$src1, i16i8imm:$src2),
-                   "cmp{w} {$src2, $src1|$src1, $src2}",
+                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp GR16:$src1, i16immSExt8:$src2)]>, OpSize;
 def CMP16mi8 : Ii8<0x83, MRM7m,
                    (outs), (ins i16mem:$src1, i16i8imm:$src2),
-                   "cmp{w} {$src2, $src1|$src1, $src2}",
+                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2)]>, OpSize;
 def CMP32mi8 : Ii8<0x83, MRM7m,
                    (outs), (ins i32mem:$src1, i32i8imm:$src2),
-                   "cmp{l} {$src2, $src1|$src1, $src2}",
+                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2)]>;
 def CMP32ri8 : Ii8<0x83, MRM7r,
                    (outs), (ins GR32:$src1, i32i8imm:$src2),
-                   "cmp{l} {$src2, $src1|$src1, $src2}",
+                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
                    [(X86cmp GR32:$src1, i32immSExt8:$src2)]>;
 
 // Sign/Zero extenders
 def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "movs{bw|x} {$src, $dst|$dst, $src}",
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}",
                    [(set GR16:$dst, (sext GR8:$src))]>, TB, OpSize;
 def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "movs{bw|x} {$src, $dst|$dst, $src}",
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}",
                    [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB, OpSize;
 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
-                   "movs{bl|x} {$src, $dst|$dst, $src}",
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sext GR8:$src))]>, TB;
 def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
-                   "movs{bl|x} {$src, $dst|$dst, $src}",
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
 def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
-                   "movs{wl|x} {$src, $dst|$dst, $src}",
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sext GR16:$src))]>, TB;
 def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
-                   "movs{wl|x} {$src, $dst|$dst, $src}",
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
 
 def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "movz{bw|x} {$src, $dst|$dst, $src}",
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}",
                    [(set GR16:$dst, (zext GR8:$src))]>, TB, OpSize;
 def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "movz{bw|x} {$src, $dst|$dst, $src}",
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}",
                    [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB, OpSize;
 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
-                   "movz{bl|x} {$src, $dst|$dst, $src}",
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zext GR8:$src))]>, TB;
 def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
-                   "movz{bl|x} {$src, $dst|$dst, $src}",
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
 def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
-                   "movz{wl|x} {$src, $dst|$dst, $src}",
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zext GR16:$src))]>, TB;
 def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
-                   "movz{wl|x} {$src, $dst|$dst, $src}",
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
 
 def CBW : I<0x98, RawFrm, (outs), (ins),
@@ -2431,57 +2430,57 @@
 // Alias instructions that map movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
-                 "xor{b} $dst, $dst",
+                 "xor{b}\t$dst, $dst",
                  [(set GR8:$dst, 0)]>;
 def MOV16r0  : I<0x31, MRMInitReg,  (outs GR16:$dst), (ins),
-                 "xor{w} $dst, $dst",
+                 "xor{w}\t$dst, $dst",
                  [(set GR16:$dst, 0)]>, OpSize;
 def MOV32r0  : I<0x31, MRMInitReg,  (outs GR32:$dst), (ins),
-                 "xor{l} $dst, $dst",
+                 "xor{l}\t$dst, $dst",
                  [(set GR32:$dst, 0)]>;
 
 // Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only
 // those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX).
 def MOV16to16_ : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16:$src),
-                "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32to32_ : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32:$src),
-                "mov{l} {$src, $dst|$dst, $src}", []>;
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 
 def MOV16_rr : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16_:$src),
-                "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32_rr : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32_:$src),
-                "mov{l} {$src, $dst|$dst, $src}", []>;
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 def MOV16_rm : I<0x8B, MRMSrcMem, (outs GR16_:$dst), (ins i16mem:$src),
-                "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32_rm : I<0x8B, MRMSrcMem, (outs GR32_:$dst), (ins i32mem:$src),
-                "mov{l} {$src, $dst|$dst, $src}", []>;
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 def MOV16_mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16_:$src),
-                "mov{w} {$src, $dst|$dst, $src}", []>, OpSize;
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32_mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32_:$src),
-                "mov{l} {$src, $dst|$dst, $src}", []>;
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
 
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
 //
 
 def TLS_addr : I<0, Pseudo, (outs GR32:$dst), (ins i32imm:$sym),
-               "leal ${sym:mem}(,%ebx,1), $dst",
+               "leal\t${sym:mem}(,%ebx,1), $dst",
                [(set GR32:$dst, (X86tlsaddr tglobaltlsaddr:$sym))]>,
                Imp<[EBX],[]>;
 
 let AddedComplexity = 10 in
 def TLS_gs_rr : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
-                  "movl %gs:($src), $dst",
+                  "movl\t%gs:($src), $dst",
                   [(set GR32:$dst, (load (add X86TLStp, GR32:$src)))]>;
 
 let AddedComplexity = 15 in
 def TLS_gs_ri : I<0, Pseudo, (outs GR32:$dst), (ins i32imm:$src),
-                  "movl %gs:${src:mem}, $dst",
+                  "movl\t%gs:${src:mem}, $dst",
                   [(set GR32:$dst,
                     (load (add X86TLStp, (X86Wrapper tglobaltlsaddr:$src))))]>;
 
 def TLS_tp : I<0, Pseudo, (outs GR32:$dst), (ins),
-               "movl %gs:0, $dst",
+               "movl\t%gs:0, $dst",
                [(set GR32:$dst, X86TLStp)]>;
 
 //===----------------------------------------------------------------------===//
@@ -2500,7 +2499,7 @@
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1 in {
 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
-                    "ret #eh_return, addr: $addr",
+                    "ret\t#eh_return, addr: $addr",
                     [(X86ehret GR32:$addr)]>;
 
 }
Index: lib/Target/X86/X86InstrFPStack.td
===================================================================
--- lib/Target/X86/X86InstrFPStack.td	(revision 40587)
+++ lib/Target/X86/X86InstrFPStack.td	(working copy)
@@ -176,9 +176,9 @@
                   [(set RFP64:$dst, 
                     (OpNode RFP64:$src1, (extloadf32 addr:$src2)))]>;
 def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src), 
-                 !strconcat("f", !strconcat(asmstring, "{s} $src"))>;
+                 !strconcat("f", !strconcat(asmstring, "{s}\t$src"))>;
 def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src), 
-                 !strconcat("f", !strconcat(asmstring, "{l} $src"))>;
+                 !strconcat("f", !strconcat(asmstring, "{l}\t$src"))>;
 // ST(0) = ST(0) + [memint]
 def _FpI16m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
                     [(set RFP32:$dst, (OpNode RFP32:$src1,
@@ -193,9 +193,9 @@
                     [(set RFP64:$dst, (OpNode RFP64:$src1,
                                        (X86fild addr:$src2, i32)))]>;
 def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src), 
-                  !strconcat("fi", !strconcat(asmstring, "{s} $src"))>;
+                  !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))>;
 def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src), 
-                  !strconcat("fi", !strconcat(asmstring, "{l} $src"))>;
+                  !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))>;
 }
 
 defm ADD : FPBinary_rr<fadd>;
@@ -219,24 +219,24 @@
 // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
 // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
 // we have to put some 'r's in and take them out of weird places.
-def ADD_FST0r   : FPST0rInst <0xC0, "fadd $op">;
-def ADD_FrST0   : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">;
-def ADD_FPrST0  : FPrST0PInst<0xC0, "faddp $op">;
-def SUBR_FST0r  : FPST0rInst <0xE8, "fsubr $op">;
-def SUB_FrST0   : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">;
-def SUB_FPrST0  : FPrST0PInst<0xE8, "fsub{r}p $op">;
-def SUB_FST0r   : FPST0rInst <0xE0, "fsub $op">;
-def SUBR_FrST0  : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">;
-def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">;
-def MUL_FST0r   : FPST0rInst <0xC8, "fmul $op">;
-def MUL_FrST0   : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">;
-def MUL_FPrST0  : FPrST0PInst<0xC8, "fmulp $op">;
-def DIVR_FST0r  : FPST0rInst <0xF8, "fdivr $op">;
-def DIV_FrST0   : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">;
-def DIV_FPrST0  : FPrST0PInst<0xF8, "fdiv{r}p $op">;
-def DIV_FST0r   : FPST0rInst <0xF0, "fdiv $op">;
-def DIVR_FrST0  : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
-def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
+def ADD_FST0r   : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0   : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0  : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r  : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0   : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0  : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r   : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0  : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r   : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0   : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0  : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r  : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0   : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0  : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r   : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0  : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
 
 // Unary operations.
 multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
@@ -281,21 +281,21 @@
 
 // These are not factored because there's no clean way to pass DA/DB.
 def CMOVB_F  : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovb {$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
 def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
 def CMOVE_F  : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmove {$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
 def CMOVP_F  : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovu  {$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
 def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
 def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
 def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovne {$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
 def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
 
 // Floating point loads & stores.
 def LD_Fp32m   : FpI<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
@@ -332,20 +332,20 @@
 def IST_Fp32m64  : FpI<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
 def IST_Fp64m64  : FpI<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
 
-def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s} $src">;
-def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l} $src">;
-def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s} $src">;
-def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l} $src">;
-def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll} $src">;
-def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s} $dst">;
-def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l} $dst">;
-def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s} $dst">;
-def ST_FP64m  : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l} $dst">;
-def IST_F16m  : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s} $dst">;
-def IST_F32m  : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l} $dst">;
-def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s} $dst">;
-def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l} $dst">;
-def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll} $dst">;
+def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m  : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def IST_F16m  : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m  : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
 
 // FISTTP requires SSE3 even though it's a FPStack op.
 def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
@@ -367,15 +367,15 @@
                     [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
                     Requires<[HasSSE3]>;
 
-def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s} $dst">;
-def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l} $dst">;
-def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll} $dst">;
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
 
 // FP Stack manipulation instructions.
-def LD_Frr   : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld $op">, D9;
-def ST_Frr   : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst $op">, DD;
-def ST_FPrr  : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp $op">, DD;
-def XCH_F    : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch $op">, D9;
+def LD_Frr   : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr   : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr  : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F    : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
 
 // Floating point constant loads.
 let isReMaterializable = 1 in {
@@ -405,29 +405,29 @@
 
 def UCOM_Fr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
                     (outs), (ins RST:$reg),
-                    "fucom $reg">, DD, Imp<[ST0],[]>;
+                    "fucom\t$reg">, DD, Imp<[ST0],[]>;
 def UCOM_FPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
                     (outs), (ins RST:$reg),
-                    "fucomp $reg">, DD, Imp<[ST0],[]>;
+                    "fucomp\t$reg">, DD, Imp<[ST0],[]>;
 def UCOM_FPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
                     (outs), (ins),
                     "fucompp">, DA, Imp<[ST0],[]>;
 
 def UCOM_FIr   : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
                     (outs), (ins RST:$reg),
-                    "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
+                    "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
 def UCOM_FIPr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
                     (outs), (ins RST:$reg),
-                    "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
+                    "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
 
 // Floating point flag ops.
 def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
                   (outs), (ins), "fnstsw", []>, DF, Imp<[],[AX]>;
 
 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
-                  (outs), (ins i16mem:$dst), "fnstcw $dst", []>;
+                  (outs), (ins i16mem:$dst), "fnstcw\t$dst", []>;
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
-                  (outs), (ins i16mem:$dst), "fldcw $dst", []>;
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
Index: lib/Target/X86/X86InstrX86-64.td
===================================================================
--- lib/Target/X86/X86InstrX86-64.td	(revision 40587)
+++ lib/Target/X86/X86InstrX86-64.td	(working copy)
@@ -125,18 +125,18 @@
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15] in {
     def CALL64pcrel32 : I<0xE8, RawFrm, (outs), (ins i64imm:$dst, variable_ops),
-                          "call ${dst:call}", []>;
+                          "call\t${dst:call}", []>;
     def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                          "call {*}$dst", [(X86call GR64:$dst)]>;
+                          "call\t{*}$dst", [(X86call GR64:$dst)]>;
     def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
-                          "call {*}$dst", []>;
+                          "call\t{*}$dst", []>;
   }
 
 // Branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q} {*}$dst",
+  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
                      [(brind GR64:$dst)]>;
-  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q} {*}$dst",
+  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
                      [(brind (loadi64 addr:$dst))]>;
 }
 
@@ -146,30 +146,30 @@
 def LEAVE64  : I<0xC9, RawFrm,
                  (outs), (ins), "leave", []>, Imp<[RBP,RSP],[RBP,RSP]>;
 def POP64r   : I<0x58, AddRegFrm,
-                 (outs GR64:$reg), (ins), "pop{q} $reg", []>, Imp<[RSP],[RSP]>;
+                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>, Imp<[RSP],[RSP]>;
 def PUSH64r  : I<0x50, AddRegFrm,
-                 (outs), (ins GR64:$reg), "push{q} $reg", []>, Imp<[RSP],[RSP]>;
+                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>, Imp<[RSP],[RSP]>;
 
 def LEA64_32r : I<0x8D, MRMSrcMem,
                   (outs GR32:$dst), (ins lea64_32mem:$src),
-                  "lea{l} {$src|$dst}, {$dst|$src}",
+                  "lea{l}\t{$src|$dst}, {$dst|$src}",
                   [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
 
 def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
-                  "lea{q} {$src|$dst}, {$dst|$src}",
+                  "lea{q}\t{$src|$dst}, {$dst|$src}",
                   [(set GR64:$dst, lea64addr:$src)]>;
 
 let isTwoAddress = 1 in
 def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
-                  "bswap{q} $dst", 
+                  "bswap{q}\t$dst", 
                   [(set GR64:$dst, (bswap GR64:$src))]>, TB;
 // Exchange
 def XCHG64rr : RI<0x87, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                  "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+                  "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
 def XCHG64mr : RI<0x87, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                  "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+                  "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
 def XCHG64rm : RI<0x87, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                  "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+                  "xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
 
 // Repeat string ops
 def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
@@ -184,58 +184,58 @@
 //
 
 def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                 "mov{q} {$src, $dst|$dst, $src}", []>;
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
 
 def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
-                    "movabs{q} {$src, $dst|$dst, $src}",
+                    "movabs{q}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, imm:$src)]>;
 def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
-                      "mov{q} {$src, $dst|$dst, $src}",
+                      "mov{q}\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, i64immSExt32:$src)]>;
 
 def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "mov{q} {$src, $dst|$dst, $src}",
+                 "mov{q}\t{$src, $dst|$dst, $src}",
                  [(set GR64:$dst, (load addr:$src))]>;
 
 def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                 "mov{q} {$src, $dst|$dst, $src}",
+                 "mov{q}\t{$src, $dst|$dst, $src}",
                  [(store GR64:$src, addr:$dst)]>;
 def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                      "mov{q} {$src, $dst|$dst, $src}",
+                      "mov{q}\t{$src, $dst|$dst, $src}",
                       [(store i64immSExt32:$src, addr:$dst)]>;
 
 // Sign/Zero extenders
 
 def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                    "movs{bq|x} {$src, $dst|$dst, $src}",
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sext GR8:$src))]>, TB;
 def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                    "movs{bq|x} {$src, $dst|$dst, $src}",
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
 def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                    "movs{wq|x} {$src, $dst|$dst, $src}",
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sext GR16:$src))]>, TB;
 def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                    "movs{wq|x} {$src, $dst|$dst, $src}",
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
 def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
-                    "movs{lq|xd} {$src, $dst|$dst, $src}",
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sext GR32:$src))]>;
 def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                    "movs{lq|xd} {$src, $dst|$dst, $src}",
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
 
 def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                    "movz{bq|x} {$src, $dst|$dst, $src}",
+                    "movz{bq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (zext GR8:$src))]>, TB;
 def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                    "movz{bq|x} {$src, $dst|$dst, $src}",
+                    "movz{bq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
 def MOVZX64rr16: RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                    "movz{wq|x} {$src, $dst|$dst, $src}",
+                    "movz{wq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (zext GR16:$src))]>, TB;
 def MOVZX64rm16: RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                    "movz{wq|x} {$src, $dst|$dst, $src}",
+                    "movz{wq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
 
 def CDQE : RI<0x98, RawFrm, (outs), (ins),
@@ -252,204 +252,204 @@
 let isConvertibleToThreeAddress = 1 in {
 let isCommutable = 1 in
 def ADD64rr  : RI<0x01, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "add{q} {$src2, $dst|$dst, $src2}",
+                  "add{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (add GR64:$src1, GR64:$src2))]>;
 
 def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                      "add{q} {$src2, $dst|$dst, $src2}",
+                      "add{q}\t{$src2, $dst|$dst, $src2}",
                       [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2))]>;
 def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "add{q} {$src2, $dst|$dst, $src2}",
+                    "add{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2))]>;
 } // isConvertibleToThreeAddress
 
 def ADD64rm  : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "add{q} {$src2, $dst|$dst, $src2}",
+                  "add{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (add GR64:$src1, (load addr:$src2)))]>;
 } // isTwoAddress
 
 def ADD64mr  : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                  "add{q} {$src2, $dst|$dst, $src2}",
+                  "add{q}\t{$src2, $dst|$dst, $src2}",
                   [(store (add (load addr:$dst), GR64:$src2), addr:$dst)]>;
 def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
-                      "add{q} {$src2, $dst|$dst, $src2}",
+                      "add{q}\t{$src2, $dst|$dst, $src2}",
                [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
 def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
-                    "add{q} {$src2, $dst|$dst, $src2}",
+                    "add{q}\t{$src2, $dst|$dst, $src2}",
                 [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def ADC64rr  : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "adc{q} {$src2, $dst|$dst, $src2}",
+                  "adc{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
 
 def ADC64rm  : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "adc{q} {$src2, $dst|$dst, $src2}",
+                  "adc{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
 
 def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                      "adc{q} {$src2, $dst|$dst, $src2}",
+                      "adc{q}\t{$src2, $dst|$dst, $src2}",
                       [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
 def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "adc{q} {$src2, $dst|$dst, $src2}",
+                    "adc{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
 } // isTwoAddress
 
 def ADC64mr  : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                  "adc{q} {$src2, $dst|$dst, $src2}",
+                  "adc{q}\t{$src2, $dst|$dst, $src2}",
                   [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
 def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
-                      "adc{q} {$src2, $dst|$dst, $src2}",
+                      "adc{q}\t{$src2, $dst|$dst, $src2}",
                [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
 def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
-                    "adc{q} {$src2, $dst|$dst, $src2}",
+                    "adc{q}\t{$src2, $dst|$dst, $src2}",
                [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 def SUB64rr  : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "sub{q} {$src2, $dst|$dst, $src2}",
+                  "sub{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
 
 def SUB64rm  : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "sub{q} {$src2, $dst|$dst, $src2}",
+                  "sub{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>;
 
 def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                      "sub{q} {$src2, $dst|$dst, $src2}",
+                      "sub{q}\t{$src2, $dst|$dst, $src2}",
                       [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>;
 def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "sub{q} {$src2, $dst|$dst, $src2}",
+                    "sub{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>;
 } // isTwoAddress
 
 def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                  "sub{q} {$src2, $dst|$dst, $src2}",
+                  "sub{q}\t{$src2, $dst|$dst, $src2}",
                   [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>;
 def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), 
-                      "sub{q} {$src2, $dst|$dst, $src2}",
+                      "sub{q}\t{$src2, $dst|$dst, $src2}",
                [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
 def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
-                    "sub{q} {$src2, $dst|$dst, $src2}",
+                    "sub{q}\t{$src2, $dst|$dst, $src2}",
                 [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 def SBB64rr    : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                    "sbb{q} {$src2, $dst|$dst, $src2}",
+                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
 
 def SBB64rm  : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "sbb{q} {$src2, $dst|$dst, $src2}",
+                  "sbb{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
 
 def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                      "sbb{q} {$src2, $dst|$dst, $src2}",
+                      "sbb{q}\t{$src2, $dst|$dst, $src2}",
                       [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
 def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "sbb{q} {$src2, $dst|$dst, $src2}",
+                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
 } // isTwoAddress
 
 def SBB64mr  : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                  "sbb{q} {$src2, $dst|$dst, $src2}",
+                  "sbb{q}\t{$src2, $dst|$dst, $src2}",
                   [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
 def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2), 
-                      "sbb{q} {$src2, $dst|$dst, $src2}",
+                      "sbb{q}\t{$src2, $dst|$dst, $src2}",
               [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
 def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
-                    "sbb{q} {$src2, $dst|$dst, $src2}",
+                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
                [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
 
 // Unsigned multiplication
 def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
-                "mul{q} $src", []>,
+                "mul{q}\t$src", []>,
              Imp<[RAX],[RAX,RDX]>;         // RAX,RDX = RAX*GR64
 def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q} $src", []>,
+                "mul{q}\t$src", []>,
              Imp<[RAX],[RAX,RDX]>;         // RAX,RDX = RAX*[mem64]
 
 // Signed multiplication
 def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
-                 "imul{q} $src", []>,
+                 "imul{q}\t$src", []>,
               Imp<[RAX],[RAX,RDX]>;         // RAX,RDX = RAX*GR64
 def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q} $src", []>,
+                 "imul{q}\t$src", []>,
               Imp<[RAX],[RAX,RDX]>;         // RAX,RDX = RAX*[mem64]
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "imul{q} {$src2, $dst|$dst, $src2}",
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB;
 
 def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "imul{q} {$src2, $dst|$dst, $src2}",
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
                  [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB;
 } // isTwoAddress
 
 // Suprisingly enough, these are not two address instructions!
 def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
                         (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                        "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>;
 def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                      "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>;
 def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
                         (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
-                        "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                 [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>;
 def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                       (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
-                      "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                  [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>;
 
 // Unsigned division / remainder
 def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),        // RDX:RAX/r64 = RAX,RDX
-                "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+                "div{q}\t$src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
 def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),      // RDX:RAX/[mem64] = RAX,RDX
-                "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+                "div{q}\t$src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
 
 // Signed division / remainder
 def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),        // RDX:RAX/r64 = RAX,RDX
-                "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+                "idiv{q}\t$src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
 def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),      // RDX:RAX/[mem64] = RAX,RDX
-                "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+                "idiv{q}\t$src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
 
 // Unary instructions
 let CodeSize = 2 in {
 let isTwoAddress = 1 in
-def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q} $dst",
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
                 [(set GR64:$dst, (ineg GR64:$src))]>;
-def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q} $dst",
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
                 [(store (ineg (loadi64 addr:$dst)), addr:$dst)]>;
 
 let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
-def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q} $dst",
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
                 [(set GR64:$dst, (add GR64:$src, 1))]>;
-def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q} $dst",
+def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
                 [(store (add (loadi64 addr:$dst), 1), addr:$dst)]>;
 
 let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
-def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q} $dst",
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
                 [(set GR64:$dst, (add GR64:$src, -1))]>;
-def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q} $dst",
+def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
                 [(store (add (loadi64 addr:$dst), -1), addr:$dst)]>;
 
 // In 64-bit mode, single byte INC and DEC cannot be encoded.
 let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
 // Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w} $dst",
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst",
                   [(set GR16:$dst, (add GR16:$src, 1))]>,
                 OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l} $dst",
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst",
                   [(set GR32:$dst, (add GR32:$src, 1))]>,
                 Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w} $dst",
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst",
                   [(set GR16:$dst, (add GR16:$src, -1))]>,
                 OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l} $dst",
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst",
                   [(set GR32:$dst, (add GR32:$src, -1))]>,
                 Requires<[In64BitMode]>;
 } // isConvertibleToThreeAddress
@@ -459,138 +459,138 @@
 // Shift instructions
 let isTwoAddress = 1 in {
 def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src),
-                  "shl{q} {%cl, $dst|$dst, %CL}",
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
                   [(set GR64:$dst, (shl GR64:$src, CL))]>,
                Imp<[CL],[]>;
 def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                    "shl{q} {$src2, $dst|$dst, $src2}",
+                    "shl{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
 def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shl{q} $dst", []>;
+                 "shl{q}\t$dst", []>;
 } // isTwoAddress
 
 def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q} {%cl, $dst|$dst, %CL}",
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
                   [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>,
                Imp<[CL],[]>;
 def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
-                  "shl{q} {$src, $dst|$dst, $src}",
+                  "shl{q}\t{$src, $dst|$dst, $src}",
                  [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q} $dst",
+                  "shl{q}\t$dst",
                  [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src),
-                  "shr{q} {%cl, $dst|$dst, %CL}",
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
                   [(set GR64:$dst, (srl GR64:$src, CL))]>,
                Imp<[CL],[]>;
 def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                  "shr{q} {$src2, $dst|$dst, $src2}",
+                  "shr{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
 def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shr{q} $dst",
+                 "shr{q}\t$dst",
                  [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
 } // isTwoAddress
 
 def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q} {%cl, $dst|$dst, %CL}",
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
                   [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>,
                Imp<[CL],[]>;
 def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
-                  "shr{q} {$src, $dst|$dst, $src}",
+                  "shr{q}\t{$src, $dst|$dst, $src}",
                  [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q} $dst",
+                  "shr{q}\t$dst",
                  [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src),
-                 "sar{q} {%cl, $dst|$dst, %CL}",
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
                  [(set GR64:$dst, (sra GR64:$src, CL))]>, Imp<[CL],[]>;
 def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                   "sar{q} {$src2, $dst|$dst, $src2}",
+                   "sar{q}\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
 def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
-                 "sar{q} $dst",
+                 "sar{q}\t$dst",
                  [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
 } // isTwoAddress
 
 def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
-                 "sar{q} {%cl, $dst|$dst, %CL}",
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
                  [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>,
                Imp<[CL],[]>;
 def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "sar{q} {$src, $dst|$dst, $src}",
+                    "sar{q}\t{$src, $dst|$dst, $src}",
                  [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
-                  "sar{q} $dst",
+                  "sar{q}\t$dst",
                  [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
 
 // Rotate instructions
 let isTwoAddress = 1 in {
 def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
-                  "rol{q} {%cl, $dst|$dst, %CL}",
+                  "rol{q}\t{%cl, $dst|$dst, %CL}",
                   [(set GR64:$dst, (rotl GR64:$src, CL))]>, Imp<[CL],[]>;
 def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                    "rol{q} {$src2, $dst|$dst, $src2}",
+                    "rol{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
 def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rol{q} $dst",
+                  "rol{q}\t$dst",
                   [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
 } // isTwoAddress
 
 def ROL64mCL :  I<0xD3, MRM0m, (outs), (ins i64mem:$dst),
-                  "rol{q} {%cl, $dst|$dst, %CL}",
+                  "rol{q}\t{%cl, $dst|$dst, %CL}",
                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>,
                Imp<[CL],[]>;
 def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "rol{q} {$src, $dst|$dst, $src}",
+                    "rol{q}\t{$src, $dst|$dst, $src}",
                 [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
-                 "rol{q} $dst",
+                 "rol{q}\t$dst",
                [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src),
-                  "ror{q} {%cl, $dst|$dst, %CL}",
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
                   [(set GR64:$dst, (rotr GR64:$src, CL))]>, Imp<[CL],[]>;
 def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                    "ror{q} {$src2, $dst|$dst, $src2}",
+                    "ror{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
 def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
-                  "ror{q} $dst",
+                  "ror{q}\t$dst",
                   [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
 } // isTwoAddress
 
 def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
-                  "ror{q} {%cl, $dst|$dst, %CL}",
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
                   [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>,
                Imp<[CL],[]>;
 def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "ror{q} {$src, $dst|$dst, $src}",
+                    "ror{q}\t{$src, $dst|$dst, $src}",
                 [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
 def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
-                 "ror{q} $dst",
+                 "ror{q}\t$dst",
                [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
 
 // Double shift instructions (generalizations of rotate)
 let isTwoAddress = 1 in {
 def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                    "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", []>,
                  Imp<[CL],[]>, TB;
 def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                    "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", []>,
                  Imp<[CL],[]>, TB;
 
 let isCommutable = 1 in {  // FIXME: Update X86InstrInfo::commuteInstruction
 def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
                       (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3),
-                      "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
                       TB;
 def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
                       (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3),
-                      "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
                  TB;
 } // isCommutable
 } // isTwoAddress
@@ -598,18 +598,18 @@
 // Temporary hack: there is no patterns associated with these instructions
 // so we have to tell tblgen that these do not produce results.
 def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", []>,
                  Imp<[CL],[]>, TB;
 def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", []>,
                  Imp<[CL],[]>, TB;
 def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
                       (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
-                      "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
                  TB;
 def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
                       (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
-                      "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
                  TB;
 
 //===----------------------------------------------------------------------===//
@@ -617,95 +617,95 @@
 //
 
 let isTwoAddress = 1 in
-def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q} $dst",
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
                 [(set GR64:$dst, (not GR64:$src))]>;
-def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q} $dst",
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
                 [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def AND64rr  : RI<0x21, MRMDestReg, 
                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "and{q} {$src2, $dst|$dst, $src2}",
+                  "and{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
 def AND64rm  : RI<0x23, MRMSrcMem,
                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "and{q} {$src2, $dst|$dst, $src2}",
+                  "and{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (and GR64:$src1, (load addr:$src2)))]>;
 def AND64ri32  : RIi32<0x81, MRM4r, 
                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                       "and{q} {$src2, $dst|$dst, $src2}",
+                       "and{q}\t{$src2, $dst|$dst, $src2}",
                        [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2))]>;
 def AND64ri8 : RIi8<0x83, MRM4r, 
                     (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "and{q} {$src2, $dst|$dst, $src2}",
+                    "and{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2))]>;
 } // isTwoAddress
 
 def AND64mr  : RI<0x21, MRMDestMem,
                   (outs), (ins i64mem:$dst, GR64:$src),
-                  "and{q} {$src, $dst|$dst, $src}",
+                  "and{q}\t{$src, $dst|$dst, $src}",
                   [(store (and (load addr:$dst), GR64:$src), addr:$dst)]>;
 def AND64mi32  : RIi32<0x81, MRM4m,
                        (outs), (ins i64mem:$dst, i64i32imm:$src),
-                       "and{q} {$src, $dst|$dst, $src}",
+                       "and{q}\t{$src, $dst|$dst, $src}",
              [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
 def AND64mi8 : RIi8<0x83, MRM4m,
                     (outs), (ins i64mem:$dst, i64i8imm :$src),
-                    "and{q} {$src, $dst|$dst, $src}",
+                    "and{q}\t{$src, $dst|$dst, $src}",
                  [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def OR64rr   : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "or{q} {$src2, $dst|$dst, $src2}",
+                  "or{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
 def OR64rm   : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "or{q} {$src2, $dst|$dst, $src2}",
+                  "or{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (or GR64:$src1, (load addr:$src2)))]>;
 def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                     "or{q} {$src2, $dst|$dst, $src2}",
+                     "or{q}\t{$src2, $dst|$dst, $src2}",
                      [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2))]>;
 def OR64ri8  : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "or{q} {$src2, $dst|$dst, $src2}",
+                    "or{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2))]>;
 } // isTwoAddress
 
 def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                "or{q} {$src, $dst|$dst, $src}",
+                "or{q}\t{$src, $dst|$dst, $src}",
                 [(store (or (load addr:$dst), GR64:$src), addr:$dst)]>;
 def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                     "or{q} {$src, $dst|$dst, $src}",
+                     "or{q}\t{$src, $dst|$dst, $src}",
               [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
 def OR64mi8  : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
-                    "or{q} {$src, $dst|$dst, $src}",
+                    "or{q}\t{$src, $dst|$dst, $src}",
                   [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def XOR64rr  : RI<0x31, MRMDestReg,  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), 
-                  "xor{q} {$src2, $dst|$dst, $src2}",
+                  "xor{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
 def XOR64rm  : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), 
-                  "xor{q} {$src2, $dst|$dst, $src2}",
+                  "xor{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2)))]>;
 def XOR64ri32 : RIi32<0x81, MRM6r, 
                       (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), 
-                      "xor{q} {$src2, $dst|$dst, $src2}",
+                      "xor{q}\t{$src2, $dst|$dst, $src2}",
                       [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2))]>;
 def XOR64ri8 : RIi8<0x83, MRM6r,  (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "xor{q} {$src2, $dst|$dst, $src2}",
+                    "xor{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2))]>;
 } // isTwoAddress
 
 def XOR64mr  : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                  "xor{q} {$src, $dst|$dst, $src}",
+                  "xor{q}\t{$src, $dst|$dst, $src}",
                   [(store (xor (load addr:$dst), GR64:$src), addr:$dst)]>;
 def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                      "xor{q} {$src, $dst|$dst, $src}",
+                      "xor{q}\t{$src, $dst|$dst, $src}",
              [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
 def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
-                    "xor{q} {$src, $dst|$dst, $src}",
+                    "xor{q}\t{$src, $dst|$dst, $src}",
                  [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
 
 //===----------------------------------------------------------------------===//
@@ -715,180 +715,180 @@
 // Integer comparison
 let isCommutable = 1 in
 def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                  "test{q} {$src2, $src1|$src1, $src2}",
+                  "test{q}\t{$src2, $src1|$src1, $src2}",
                   [(X86cmp (and GR64:$src1, GR64:$src2), 0)]>;
 def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                  "test{q} {$src2, $src1|$src1, $src2}",
+                  "test{q}\t{$src2, $src1|$src1, $src2}",
                   [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0)]>;
 def TEST64ri32 : RIi32<0xF7, MRM0r, (outs), (ins GR64:$src1, i64i32imm:$src2),
-                       "test{q} {$src2, $src1|$src1, $src2}",
+                       "test{q}\t{$src2, $src1|$src1, $src2}",
                        [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0)]>;
 def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), (ins i64mem:$src1, i64i32imm:$src2),
-                       "test{q} {$src2, $src1|$src1, $src2}",
+                       "test{q}\t{$src2, $src1|$src1, $src2}",
                   [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0)]>;
 
 def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "cmp{q} {$src2, $src1|$src1, $src2}",
+                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp GR64:$src1, GR64:$src2)]>;
 def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "cmp{q} {$src2, $src1|$src1, $src2}",
+                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp (loadi64 addr:$src1), GR64:$src2)]>;
 def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                 "cmp{q} {$src2, $src1|$src1, $src2}",
+                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp GR64:$src1, (loadi64 addr:$src2))]>;
 def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
-                      "cmp{q} {$src2, $src1|$src1, $src2}",
+                      "cmp{q}\t{$src2, $src1|$src1, $src2}",
                       [(X86cmp GR64:$src1, i64immSExt32:$src2)]>;
 def CMP64mi32 : RIi32<0x81, MRM7m, (outs), (ins i64mem:$src1, i64i32imm:$src2),
-                      "cmp{q} {$src2, $src1|$src1, $src2}",
+                      "cmp{q}\t{$src2, $src1|$src1, $src2}",
                       [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2)]>;
 def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "cmp{q} {$src2, $src1|$src1, $src2}",
+                    "cmp{q}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2)]>;
 def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "cmp{q} {$src2, $src1|$src1, $src2}",
+                    "cmp{q}\t{$src2, $src1|$src1, $src2}",
                     [(X86cmp GR64:$src1, i64immSExt8:$src2)]>;
 
 // Conditional moves
 let isTwoAddress = 1 in {
 def CMOVB64rr : RI<0x42, MRMSrcReg,       // if <u, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovb {$src2, $dst|$dst, $src2}",
+                   "cmovb\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                      X86_COND_B))]>, TB;
 def CMOVB64rm : RI<0x42, MRMSrcMem,       // if <u, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovb {$src2, $dst|$dst, $src2}",
+                   "cmovb\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                      X86_COND_B))]>, TB;
 def CMOVAE64rr: RI<0x43, MRMSrcReg,       // if >=u, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovae {$src2, $dst|$dst, $src2}",
+                   "cmovae\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                      X86_COND_AE))]>, TB;
 def CMOVAE64rm: RI<0x43, MRMSrcMem,       // if >=u, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovae {$src2, $dst|$dst, $src2}",
+                   "cmovae\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                      X86_COND_AE))]>, TB;
 def CMOVE64rr : RI<0x44, MRMSrcReg,       // if ==, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmove {$src2, $dst|$dst, $src2}",
+                   "cmove\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                      X86_COND_E))]>, TB;
 def CMOVE64rm : RI<0x44, MRMSrcMem,       // if ==, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmove {$src2, $dst|$dst, $src2}",
+                   "cmove\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                      X86_COND_E))]>, TB;
 def CMOVNE64rr: RI<0x45, MRMSrcReg,       // if !=, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovne {$src2, $dst|$dst, $src2}",
+                   "cmovne\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_NE))]>, TB;
 def CMOVNE64rm: RI<0x45, MRMSrcMem,       // if !=, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovne {$src2, $dst|$dst, $src2}",
+                   "cmovne\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_NE))]>, TB;
 def CMOVBE64rr: RI<0x46, MRMSrcReg,       // if <=u, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovbe {$src2, $dst|$dst, $src2}",
+                   "cmovbe\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_BE))]>, TB;
 def CMOVBE64rm: RI<0x46, MRMSrcMem,       // if <=u, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovbe {$src2, $dst|$dst, $src2}",
+                   "cmovbe\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_BE))]>, TB;
 def CMOVA64rr : RI<0x47, MRMSrcReg,       // if >u, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmova {$src2, $dst|$dst, $src2}",
+                   "cmova\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_A))]>, TB;
 def CMOVA64rm : RI<0x47, MRMSrcMem,       // if >u, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmova {$src2, $dst|$dst, $src2}",
+                   "cmova\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_A))]>, TB;
 def CMOVL64rr : RI<0x4C, MRMSrcReg,       // if <s, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovl {$src2, $dst|$dst, $src2}",
+                   "cmovl\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_L))]>, TB;
 def CMOVL64rm : RI<0x4C, MRMSrcMem,       // if <s, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovl {$src2, $dst|$dst, $src2}",
+                   "cmovl\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_L))]>, TB;
 def CMOVGE64rr: RI<0x4D, MRMSrcReg,       // if >=s, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovge {$src2, $dst|$dst, $src2}",
+                   "cmovge\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_GE))]>, TB;
 def CMOVGE64rm: RI<0x4D, MRMSrcMem,       // if >=s, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovge {$src2, $dst|$dst, $src2}",
+                   "cmovge\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_GE))]>, TB;
 def CMOVLE64rr: RI<0x4E, MRMSrcReg,       // if <=s, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovle {$src2, $dst|$dst, $src2}",
+                   "cmovle\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_LE))]>, TB;
 def CMOVLE64rm: RI<0x4E, MRMSrcMem,       // if <=s, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovle {$src2, $dst|$dst, $src2}",
+                   "cmovle\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_LE))]>, TB;
 def CMOVG64rr : RI<0x4F, MRMSrcReg,       // if >s, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovg {$src2, $dst|$dst, $src2}",
+                   "cmovg\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_G))]>, TB;
 def CMOVG64rm : RI<0x4F, MRMSrcMem,       // if >s, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovg {$src2, $dst|$dst, $src2}",
+                   "cmovg\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_G))]>, TB;
 def CMOVS64rr : RI<0x48, MRMSrcReg,       // if signed, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovs {$src2, $dst|$dst, $src2}",
+                   "cmovs\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_S))]>, TB;
 def CMOVS64rm : RI<0x48, MRMSrcMem,       // if signed, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovs {$src2, $dst|$dst, $src2}",
+                   "cmovs\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_S))]>, TB;
 def CMOVNS64rr: RI<0x49, MRMSrcReg,       // if !signed, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovns {$src2, $dst|$dst, $src2}",
+                   "cmovns\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_NS))]>, TB;
 def CMOVNS64rm: RI<0x49, MRMSrcMem,       // if !signed, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovns {$src2, $dst|$dst, $src2}",
+                   "cmovns\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_NS))]>, TB;
 def CMOVP64rr : RI<0x4A, MRMSrcReg,       // if parity, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovp {$src2, $dst|$dst, $src2}",
+                   "cmovp\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                     X86_COND_P))]>, TB;
 def CMOVP64rm : RI<0x4A, MRMSrcMem,       // if parity, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovp {$src2, $dst|$dst, $src2}",
+                   "cmovp\t{$src2, $dst|$dst, $src2}",
                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                     X86_COND_P))]>, TB;
 def CMOVNP64rr : RI<0x4B, MRMSrcReg,       // if !parity, GR64 = GR64
                    (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovnp {$src2, $dst|$dst, $src2}",
+                   "cmovnp\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
                                      X86_COND_NP))]>, TB;
 def CMOVNP64rm : RI<0x4B, MRMSrcMem,       // if !parity, GR64 = [mem64]
                    (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovnp {$src2, $dst|$dst, $src2}",
+                   "cmovnp\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
                                      X86_COND_NP))]>, TB;
 } // isTwoAddress
@@ -899,46 +899,46 @@
 
 // f64 -> signed i64
 def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-                           "cvtsd2si{q} {$src, $dst|$dst, $src}",
+                           "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
                            [(set GR64:$dst,
                              (int_x86_sse2_cvtsd2si64 VR128:$src))]>;
 def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src),
-                           "cvtsd2si{q} {$src, $dst|$dst, $src}",
+                           "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
                            [(set GR64:$dst, (int_x86_sse2_cvtsd2si64
                                              (load addr:$src)))]>;
 def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
-                        "cvttsd2si{q} {$src, $dst|$dst, $src}",
+                        "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
 def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
-                        "cvttsd2si{q} {$src, $dst|$dst, $src}",
+                        "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
 def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-                            "cvttsd2si{q} {$src, $dst|$dst, $src}",
+                            "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
                             [(set GR64:$dst,
                               (int_x86_sse2_cvttsd2si64 VR128:$src))]>;
 def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src),
-                            "cvttsd2si{q} {$src, $dst|$dst, $src}",
+                            "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
                             [(set GR64:$dst,
                               (int_x86_sse2_cvttsd2si64
                                (load addr:$src)))]>;
 
 // Signed i64 -> f64
 def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
-                       "cvtsi2sd{q} {$src, $dst|$dst, $src}",
+                       "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
 def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
-                       "cvtsi2sd{q} {$src, $dst|$dst, $src}",
+                       "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
 let isTwoAddress = 1 in {
 def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
                            (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
-                           "cvtsi2sd{q} {$src2, $dst|$dst, $src2}",
+                           "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
                            [(set VR128:$dst,
                              (int_x86_sse2_cvtsi642sd VR128:$src1,
                               GR64:$src2))]>;
 def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
                            (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
-                           "cvtsi2sd{q} {$src2, $dst|$dst, $src2}",
+                           "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
                            [(set VR128:$dst,
                              (int_x86_sse2_cvtsi642sd VR128:$src1,
                               (loadi64 addr:$src2)))]>;
@@ -946,56 +946,56 @@
 
 // Signed i64 -> f32
 def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
-                       "cvtsi2ss{q} {$src, $dst|$dst, $src}",
+                       "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
                        [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
 def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
-                       "cvtsi2ss{q} {$src, $dst|$dst, $src}",
+                       "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
                        [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
 let isTwoAddress = 1 in {
 def Int_CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg,
                            (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
-                           "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+                           "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
                            []>; // TODO: add intrinsic
 def Int_CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem,
                            (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
-                           "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+                           "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
                            []>; // TODO: add intrinsic
 } // isTwoAddress
 
 // f32 -> signed i64
 def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-                           "cvtss2si{q} {$src, $dst|$dst, $src}",
+                           "cvtss2si{q}\t{$src, $dst|$dst, $src}",
                            [(set GR64:$dst,
                              (int_x86_sse_cvtss2si64 VR128:$src))]>;
 def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
-                           "cvtss2si{q} {$src, $dst|$dst, $src}",
+                           "cvtss2si{q}\t{$src, $dst|$dst, $src}",
                            [(set GR64:$dst, (int_x86_sse_cvtss2si64
                                              (load addr:$src)))]>;
 def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
-                        "cvttss2si{q} {$src, $dst|$dst, $src}",
+                        "cvttss2si{q}\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
 def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
-                        "cvttss2si{q} {$src, $dst|$dst, $src}",
+                        "cvttss2si{q}\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
 def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-                            "cvttss2si{q} {$src, $dst|$dst, $src}",
+                            "cvttss2si{q}\t{$src, $dst|$dst, $src}",
                             [(set GR64:$dst,
                               (int_x86_sse_cvttss2si64 VR128:$src))]>;
 def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
-                            "cvttss2si{q} {$src, $dst|$dst, $src}",
+                            "cvttss2si{q}\t{$src, $dst|$dst, $src}",
                             [(set GR64:$dst,
                               (int_x86_sse_cvttss2si64 (load addr:$src)))]>;
 
 let isTwoAddress = 1 in {
   def Int_CVTSI642SSrr : RSSI<0x2A, MRMSrcReg,
                               (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
-                              "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+                              "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
                               [(set VR128:$dst,
                                 (int_x86_sse_cvtsi642ss VR128:$src1,
                                  GR64:$src2))]>;
   def Int_CVTSI642SSrm : RSSI<0x2A, MRMSrcMem,
                               (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
-                              "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+                              "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
                               [(set VR128:$dst,
                                 (int_x86_sse_cvtsi642ss VR128:$src1,
                                  (loadi64 addr:$src2)))]>;
@@ -1008,10 +1008,10 @@
 // Zero-extension
 // TODO: Remove this after proper i32 -> i64 zext support.
 def PsMOVZX64rr32: I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                     "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+                     "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
                      [(set GR64:$dst, (zext GR32:$src))]>;
 def PsMOVZX64rm32: I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                     "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+                     "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
                      [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
 
 
@@ -1021,13 +1021,13 @@
 // when we have a better way to specify isel priority.
 let AddedComplexity = 1 in
 def MOV64r0  : RI<0x31, MRMInitReg,  (outs GR64:$dst), (ins),
-                 "xor{q} $dst, $dst",
+                 "xor{q}\t$dst, $dst",
                  [(set GR64:$dst, 0)]>;
 
 // Materialize i64 constant where top 32-bits are zero.
 let AddedComplexity = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
-                        "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+                        "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
                         [(set GR64:$dst, i64immZExt32:$src)]>;
 
 //===----------------------------------------------------------------------===//
@@ -1143,33 +1143,33 @@
 // Move instructions...
 
 def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                        "mov{d|q} {$src, $dst|$dst, $src}",
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))]>;
 def MOV64toPQIrm : RPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                        "mov{d|q} {$src, $dst|$dst, $src}",
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>;
 
 def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                         "mov{d|q} {$src, $dst|$dst, $src}",
+                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
                                            (iPTR 0)))]>;
 def MOVPQIto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
-                         "mov{d|q} {$src, $dst|$dst, $src}",
+                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                          [(store (i64 (vector_extract (v2i64 VR128:$src),
                                        (iPTR 0))), addr:$dst)]>;
 
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
-                       "mov{d|q} {$src, $dst|$dst, $src}",
+                       "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))]>;
 def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
-                       "mov{d|q} {$src, $dst|$dst, $src}",
+                       "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
 
 def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
-                        "mov{d|q} {$src, $dst|$dst, $src}",
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (bitconvert FR64:$src))]>;
 def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
-                        "mov{d|q} {$src, $dst|$dst, $src}",
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
Index: lib/Target/X86/X86InstrMMX.td
===================================================================
--- lib/Target/X86/X86InstrMMX.td	(revision 40587)
+++ lib/Target/X86/X86InstrMMX.td	(working copy)
@@ -108,12 +108,12 @@
   multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            ValueType OpVT, bit Commutable = 0> {
     def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
       let isCommutable = Commutable;
     }
     def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
                                          (bitconvert
                                           (load_mmx addr:$src2)))))]>;
@@ -122,12 +122,12 @@
   multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                                bit Commutable = 0> {
     def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
       let isCommutable = Commutable;
     }
     def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))]>;
   }
@@ -140,12 +140,12 @@
   multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                  bit Commutable = 0> {
     def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
       let isCommutable = Commutable;
     }
     def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst,
                     (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
   }
@@ -153,14 +153,14 @@
   multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                                 string OpcodeStr, Intrinsic IntId> {
     def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
     def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (IntId VR64:$src1,
                                     (bitconvert (load_mmx addr:$src2))))]>;
     def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst), (ins VR64:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                     [(set VR64:$dst, (IntId VR64:$src1,
                                       (scalar_to_vector (i32 imm:$src2))))]>;
   }
@@ -179,50 +179,50 @@
 
 // Data Transfer Instructions
 def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
-                        "movd {$src, $dst|$dst, $src}", []>;
+                        "movd\t{$src, $dst|$dst, $src}", []>;
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
-                        "movd {$src, $dst|$dst, $src}", []>;
+                        "movd\t{$src, $dst|$dst, $src}", []>;
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
-                        "movd {$src, $dst|$dst, $src}", []>;
+                        "movd\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
-                             "movd {$src, $dst|$dst, $src}", []>;
+                             "movd\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
-                        "movq {$src, $dst|$dst, $src}", []>;
+                        "movq\t{$src, $dst|$dst, $src}", []>;
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
-                        "movq {$src, $dst|$dst, $src}",
+                        "movq\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, (load_mmx addr:$src))]>;
 def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
-                        "movq {$src, $dst|$dst, $src}",
+                        "movq\t{$src, $dst|$dst, $src}",
                         [(store (v1i64 VR64:$src), addr:$dst)]>;
 
 def MMX_MOVDQ2Qrr : MMXID<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
-                          "movdq2q {$src, $dst|$dst, $src}",
+                          "movdq2q\t{$src, $dst|$dst, $src}",
                           [(set VR64:$dst,
                             (v1i64 (vector_extract (v2i64 VR128:$src),
                                   (iPTR 0))))]>;
 
 def MMX_MOVQ2DQrr : MMXIS<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
-                          "movq2dq {$src, $dst|$dst, $src}",
+                          "movq2dq\t{$src, $dst|$dst, $src}",
                           [(set VR128:$dst,
                             (bitconvert (v1i64 VR64:$src)))]>;
 
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
-                         "movntq {$src, $dst|$dst, $src}",
+                         "movntq\t{$src, $dst|$dst, $src}",
                          [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
 
 let AddedComplexity = 15 in
 // movd to MMX register zero-extends
 def MMX_MOVZDI2PDIrr : MMX2I<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
-                             "movd {$src, $dst|$dst, $src}",
+                             "movd\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
                                (v2i32 (vector_shuffle immAllZerosV,
                                        (v2i32 (scalar_to_vector GR32:$src)),
                                        MMX_MOVL_shuffle_mask)))]>;
 let AddedComplexity = 20 in
 def MMX_MOVZDI2PDIrm : MMX2I<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
-                             "movd {$src, $dst|$dst, $src}",
+                             "movd\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
                                (v2i32 (vector_shuffle immAllZerosV,
                                        (v2i32 (scalar_to_vector
@@ -284,12 +284,12 @@
 let isTwoAddress = 1 in {
   def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
                          (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                         "pandn {$src2, $dst|$dst, $src2}",
+                         "pandn\t{$src2, $dst|$dst, $src2}",
                          [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
                                                   VR64:$src2)))]>;
   def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
                          (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                         "pandn {$src2, $dst|$dst, $src2}",
+                         "pandn\t{$src2, $dst|$dst, $src2}",
                          [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
                                                   (load addr:$src2))))]>;
 }
@@ -330,13 +330,13 @@
   // Unpack High Packed Data Instructions
   def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, 
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhbw {$src2, $dst|$dst, $src2}",
+                             "punpckhbw\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
                                       MMX_UNPCKH_shuffle_mask)))]>;
   def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, 
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhbw {$src2, $dst|$dst, $src2}",
+                             "punpckhbw\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v8i8 (vector_shuffle VR64:$src1,
                                       (bc_v8i8 (load_mmx addr:$src2)),
@@ -344,13 +344,13 @@
 
   def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, 
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhwd {$src2, $dst|$dst, $src2}",
+                             "punpckhwd\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
                                        MMX_UNPCKH_shuffle_mask)))]>;
   def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, 
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhwd {$src2, $dst|$dst, $src2}",
+                             "punpckhwd\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v4i16 (vector_shuffle VR64:$src1,
                                        (bc_v4i16 (load_mmx addr:$src2)),
@@ -358,13 +358,13 @@
 
   def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, 
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhdq {$src2, $dst|$dst, $src2}",
+                             "punpckhdq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
                                        MMX_UNPCKH_shuffle_mask)))]>;
   def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhdq {$src2, $dst|$dst, $src2}",
+                             "punpckhdq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v2i32 (vector_shuffle VR64:$src1,
                                        (bc_v2i32 (load_mmx addr:$src2)),
@@ -373,13 +373,13 @@
   // Unpack Low Packed Data Instructions
   def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpcklbw {$src2, $dst|$dst, $src2}",
+                             "punpcklbw\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
                                       MMX_UNPCKL_shuffle_mask)))]>;
   def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpcklbw {$src2, $dst|$dst, $src2}",
+                             "punpcklbw\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v8i8 (vector_shuffle VR64:$src1,
                                       (bc_v8i8 (load_mmx addr:$src2)),
@@ -387,13 +387,13 @@
 
   def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpcklwd {$src2, $dst|$dst, $src2}",
+                             "punpcklwd\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
                                        MMX_UNPCKL_shuffle_mask)))]>;
   def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpcklwd {$src2, $dst|$dst, $src2}",
+                             "punpcklwd\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v4i16 (vector_shuffle VR64:$src1,
                                        (bc_v4i16 (load_mmx addr:$src2)),
@@ -401,13 +401,13 @@
 
   def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, 
                              (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckldq {$src2, $dst|$dst, $src2}",
+                             "punpckldq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
                                        MMX_UNPCKL_shuffle_mask)))]>;
   def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, 
                              (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckldq {$src2, $dst|$dst, $src2}",
+                             "punpckldq\t{$src2, $dst|$dst, $src2}",
                              [(set VR64:$dst,
                                (v2i32 (vector_shuffle VR64:$src1,
                                        (bc_v2i32 (load_mmx addr:$src2)),
@@ -422,14 +422,14 @@
 // -- Shuffle Instructions
 def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
                           (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
-                          "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                             (v4i16 (vector_shuffle
                                     VR64:$src1, (undef),
                                     MMX_PSHUFW_shuffle_mask:$src2)))]>;
 def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
                           (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
-                          "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                             (v4i16 (vector_shuffle
                                     (bc_v4i16 (load_mmx addr:$src1)),
@@ -438,34 +438,34 @@
 
 // -- Conversion Instructions
 def MMX_CVTPD2PIrr  : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                            "cvtpd2pi {$src, $dst|$dst, $src}", []>;
+                            "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
 def MMX_CVTPD2PIrm  : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
-                            "cvtpd2pi {$src, $dst|$dst, $src}", []>;
+                            "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTPI2PDrr  : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                            "cvtpi2pd {$src, $dst|$dst, $src}", []>;
+                            "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
 def MMX_CVTPI2PDrm  : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                            "cvtpi2pd {$src, $dst|$dst, $src}", []>;
+                            "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTPI2PSrr  : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                           "cvtpi2ps {$src, $dst|$dst, $src}", []>;
+                           "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
 def MMX_CVTPI2PSrm  : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                           "cvtpi2ps {$src, $dst|$dst, $src}", []>;
+                           "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTPS2PIrr  : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                           "cvtps2pi {$src, $dst|$dst, $src}", []>;
+                           "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
 def MMX_CVTPS2PIrm  : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                           "cvtps2pi {$src, $dst|$dst, $src}", []>;
+                           "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                            "cvttpd2pi {$src, $dst|$dst, $src}", []>;
+                            "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
 def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
-                            "cvttpd2pi {$src, $dst|$dst, $src}", []>;
+                            "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                           "cvttps2pi {$src, $dst|$dst, $src}", []>;
+                           "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
 def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                           "cvttps2pi {$src, $dst|$dst, $src}", []>;
+                           "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
 
 // Extract / Insert
 def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
@@ -473,18 +473,18 @@
 
 def MMX_PEXTRWri  : MMXIi8<0xC5, MRMSrcReg,
                            (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
-                           "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                           "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                            [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
                                              (iPTR imm:$src2)))]>;
 let isTwoAddress = 1 in {
   def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3),
-                      "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
                                                GR32:$src2, (iPTR imm:$src3))))]>;
   def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
                      (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
-                     "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
+                     "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set VR64:$dst,
                        (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
                                (i32 (anyext (loadi16 addr:$src2))),
@@ -493,12 +493,12 @@
 
 // Mask creation
 def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
-                          "pmovmskb {$src, $dst|$dst, $src}",
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
                           [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
 
 // Misc.
 def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
-                        "maskmovq {$mask, $src|$src, $mask}",
+                        "maskmovq\t{$mask, $src|$src, $mask}",
                         [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>,
                         Imp<[EDI],[]>;
 
@@ -510,10 +510,10 @@
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let isReMaterializable = 1 in {
   def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
-                              "pxor $dst, $dst",
+                              "pxor\t$dst, $dst",
                               [(set VR64:$dst, (v1i64 immAllZerosV))]>;
   def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins),
-                              "pcmpeqd $dst, $dst",
+                              "pcmpeqd\t$dst, $dst",
                               [(set VR64:$dst, (v1i64 immAllOnesV))]>;
 }
 


More information about the llvm-commits mailing list