[llvm-commits] [llvm] r77718 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td

Evan Cheng evan.cheng at apple.com
Fri Jul 31 13:36:33 PDT 2009


Thanks. This has been on my todo list forever. Where you're cleaning  
up x86 td file,  can you replace all the "let isTwoAddress=1" with  
"let Constraints = ..." as well?

Evan

On Jul 31, 2009, at 1:07 PM, Eric Christopher wrote:

> Author: echristo
> Date: Fri Jul 31 15:07:27 2009
> New Revision: 77718
>
> URL: http://llvm.org/viewvc/llvm-project?rev=77718&view=rev
> Log:
> Whitespace and 80-col cleanup.
>
> Modified:
>    llvm/trunk/lib/Target/X86/X86InstrSSE.td
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=77718&r1=77717&r2=77718&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Jul 31 15:07:27 2009
> @@ -1,10 +1,10 @@
> //====- X86InstrSSE.td - Describe the X86 Instruction Set --*-  
> tablegen -*-===//
> -//
> +//
> //                     The LLVM Compiler Infrastructure
> //
> // This file is distributed under the University of Illinois Open  
> Source
> // License. See LICENSE.TXT for details.
> -//
> +//
> // 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> //
> // This file describes the X86 SSE instruction set, defining the  
> instructions,
> @@ -36,20 +36,20 @@
> def X86fsrl    : SDNode<"X86ISD::FSRL",      SDTX86FPShiftOp>;
> def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
> def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
> -def X86pshufb  : SDNode<"X86ISD::PSHUFB",
> +def X86pshufb  : SDNode<"X86ISD::PSHUFB",
>                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>,  
> SDTCisSameAs<0,1>,
>                                       SDTCisSameAs<0,2>]>>;
> def X86pextrb  : SDNode<"X86ISD::PEXTRB",
>                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>,  
> SDTCisPtrTy<2>]>>;
> def X86pextrw  : SDNode<"X86ISD::PEXTRW",
>                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>,  
> SDTCisPtrTy<2>]>>;
> -def X86pinsrb  : SDNode<"X86ISD::PINSRB",
> +def X86pinsrb  : SDNode<"X86ISD::PINSRB",
>                  SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>,  
> SDTCisSameAs<0,1>,
>                                       SDTCisVT<2, i32>,  
> SDTCisPtrTy<3>]>>;
> -def X86pinsrw  : SDNode<"X86ISD::PINSRW",
> +def X86pinsrw  : SDNode<"X86ISD::PINSRW",
>                  SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>,  
> SDTCisSameAs<0,1>,
>                                       SDTCisVT<2, i32>,  
> SDTCisPtrTy<3>]>>;
> -def X86insrtps : SDNode<"X86ISD::INSERTPS",
> +def X86insrtps : SDNode<"X86ISD::INSERTPS",
>                  SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>,  
> SDTCisSameAs<0,1>,
>                                       SDTCisVT<2, v4f32>,  
> SDTCisPtrTy<3>]>>;
> def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
> @@ -182,13 +182,13 @@
>   return getI8Imm(X86::getShuffleSHUFImmediate(N));
> }]>;
>
> -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle  
> mask to
> +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle  
> mask to
> // PSHUFHW imm.
> def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
>   return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
> }]>;
>
> -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle  
> mask to
> +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle  
> mask to
> // PSHUFLW imm.
> def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
>   return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
> @@ -363,25 +363,25 @@
>                          [(set VR64:$dst, (int_x86_sse_cvtps2pi  
> VR128:$src))]>;
> def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins  
> f64mem:$src),
>                          "cvtps2pi\t{$src, $dst|$dst, $src}",
> -                         [(set VR64:$dst, (int_x86_sse_cvtps2pi
> +                         [(set VR64:$dst, (int_x86_sse_cvtps2pi
>                                            (load addr:$src)))]>;
> def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins  
> VR128:$src),
>                          "cvttps2pi\t{$src, $dst|$dst, $src}",
>                          [(set VR64:$dst, (int_x86_sse_cvttps2pi  
> VR128:$src))]>;
> def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins  
> f64mem:$src),
>                          "cvttps2pi\t{$src, $dst|$dst, $src}",
> -                         [(set VR64:$dst, (int_x86_sse_cvttps2pi
> +                         [(set VR64:$dst, (int_x86_sse_cvttps2pi
>                                            (load addr:$src)))]>;
> let Constraints = "$src1 = $dst" in {
> -  def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
> +  def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
>                            (outs VR128:$dst), (ins VR128:$src1,  
> VR64:$src2),
>                         "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst, (int_x86_sse_cvtpi2ps  
> VR128:$src1,
>                                            VR64:$src2))]>;
> -  def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
> +  def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
>                            (outs VR128:$dst), (ins VR128:$src1,  
> i64mem:$src2),
>                         "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
> -                        [(set VR128:$dst, (int_x86_sse_cvtpi2ps  
> VR128:$src1,
> +                        [(set VR128:$dst, (int_x86_sse_cvtpi2ps  
> VR128:$src1,
>                                             (load addr:$src2)))]>;
> }
>
> @@ -410,11 +410,11 @@
>
> // Comparison instructions
> let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
> -  def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> +  def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
>                     (outs FR32:$dst), (ins FR32:$src1, FR32:$src,  
> SSECC:$cc),
>                     "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
> let mayLoad = 1 in
> -  def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> +  def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
>                     (outs FR32:$dst), (ins FR32:$src1, f32mem:$src,  
> SSECC:$cc),
>                     "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
> }
> @@ -431,13 +431,15 @@
>
> // Aliases to match intrinsics which expect XMM operand(s).
> let Constraints = "$src1 = $dst" in {
> -  def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> -                        (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src, SSECC:$cc),
> +  def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> +                        (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src,
> +					        SSECC:$cc),
>                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
>                         [(set VR128:$dst, (int_x86_sse_cmp_ss  
> VR128:$src1,
> -                                           VR128:$src, imm:$cc))]>;
> -  def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> -                        (outs VR128:$dst), (ins VR128:$src1, f32mem: 
> $src, SSECC:$cc),
> +                                           	VR128:$src, imm:$cc))]>;
> +  def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> +                        (outs VR128:$dst), (ins VR128:$src1, f32mem: 
> $src,
> +						SSECC:$cc),
>                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
>                         [(set VR128:$dst, (int_x86_sse_cmp_ss  
> VR128:$src1,
>                                            (load addr:$src), imm: 
> $cc))]>;
> @@ -463,8 +465,8 @@
>                        (implicit EFLAGS)]>;
> } // Defs = [EFLAGS]
>
> -// Aliases of packed SSE1 instructions for scalar use. These all  
> have names that
> -// start with 'Fs'.
> +// Aliases of packed SSE1 instructions for scalar use. These all  
> have names
> +// that start with 'Fs'.
>
> // Alias instructions that map fld0 to pxor for sse.
> let isReMaterializable = 1, isAsCheapAsAMove = 1 in
> @@ -474,7 +476,7 @@
>
> // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper  
> bits are
> // disregarded.
> -let neverHasSideEffects = 1 in
> +let neverHasSideEffects = 1 in
> def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins  
> FR32:$src),
>                      "movaps\t{$src, $dst|$dst, $src}", []>;
>
> @@ -555,7 +557,7 @@
>                                  (ins FR32:$src1, f32mem:$src2),
>                  !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst,  
> $src2}"),
>                  [(set FR32:$dst, (OpNode FR32:$src1, (load addr: 
> $src2)))]>;
> -
> +
>   // Vector operation, reg+reg.
>   def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
>                                  (ins VR128:$src1, VR128:$src2),
> @@ -619,7 +621,7 @@
>                                  (ins FR32:$src1, f32mem:$src2),
>                  !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst,  
> $src2}"),
>                  [(set FR32:$dst, (OpNode FR32:$src1, (load addr: 
> $src2)))]>;
> -
> +
>   // Vector operation, reg+reg.
>   def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
>                                  (ins VR128:$src1, VR128:$src2),
> @@ -674,7 +676,7 @@
> // SSE packed FP Instructions
>
> // Move Instructions
> -let neverHasSideEffects = 1 in
> +let neverHasSideEffects = 1 in
> def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins  
> VR128:$src),
>                    "movaps\t{$src, $dst|$dst, $src}", []>;
> let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects =  
> 1 in
> @@ -711,13 +713,13 @@
>     def MOVLPSrm : PSI<0x12, MRMSrcMem,
>                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src2),
>                        "movlps\t{$src2, $dst|$dst, $src2}",
> -       [(set VR128:$dst,
> +       [(set VR128:$dst,
>          (movlp VR128:$src1,
>                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr: 
> $src2))))))]>;
>     def MOVHPSrm : PSI<0x16, MRMSrcMem,
>                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src2),
>                        "movhps\t{$src2, $dst|$dst, $src2}",
> -       [(set VR128:$dst,
> +       [(set VR128:$dst,
>          (movhp VR128:$src1,
>                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr: 
> $src2))))))]>;
>   } // AddedComplexity
> @@ -792,7 +794,7 @@
>   def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
>                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
>                 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
> -
> +
>   // Vector operation, reg.
>   def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
>               !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
> @@ -893,12 +895,12 @@
> }
>
> let Constraints = "$src1 = $dst" in {
> -  def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
> +  def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
>                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src,  
> SSECC:$cc),
>                     "cmp${cc}ps\t{$src, $dst|$dst, $src}",
>                     [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
>                                                         VR128:$src,  
> imm:$cc))]>;
> -  def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
> +  def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
>                   (outs VR128:$dst), (ins VR128:$src1, f128mem:$src,  
> SSECC:$cc),
>                   "cmp${cc}ps\t{$src, $dst|$dst, $src}",
>                   [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
> @@ -912,13 +914,13 @@
> // Shuffle and unpack instructions
> let Constraints = "$src1 = $dst" in {
>   let isConvertibleToThreeAddress = 1 in // Convert to pshufd
> -    def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
> +    def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
>                           (outs VR128:$dst), (ins VR128:$src1,
>                            VR128:$src2, i8imm:$src3),
>                           "shufps\t{$src3, $src2, $dst|$dst, $src2,  
> $src3}",
>                           [(set VR128:$dst,
>                             (v4f32 (shufp:$src3 VR128:$src1,  
> VR128:$src2)))]>;
> -  def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
> +  def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1,
>                          f128mem:$src2, i8imm:$src3),
>                         "shufps\t{$src3, $src2, $dst|$dst, $src2,  
> $src3}",
> @@ -927,24 +929,24 @@
>                                   VR128:$src1, (memopv4f32 addr: 
> $src2))))]>;
>
>   let AddedComplexity = 10 in {
> -    def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
> +    def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                          "unpckhps\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
>                            (v4f32 (unpckh VR128:$src1,  
> VR128:$src2)))]>;
> -    def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
> +    def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> f128mem:$src2),
>                          "unpckhps\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
>                            (v4f32 (unpckh VR128:$src1,
>                                           (memopv4f32 addr:$src2))))] 
> >;
>
> -    def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
> +    def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                          "unpcklps\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
>                            (v4f32 (unpckl VR128:$src1,  
> VR128:$src2)))]>;
> -    def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
> +    def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> f128mem:$src2),
>                          "unpcklps\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
> @@ -1049,14 +1051,14 @@
> def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins  
> f32mem:$src),
>                       "movss\t{$src, $dst|$dst, $src}",
>                    [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32  
> (scalar_to_vector
> -                                                      (loadf32 addr: 
> $src))))))]>;
> +                                                    (loadf32 addr: 
> $src))))))]>;
>
> def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
>           (MOVZSS2PSrm addr:$src)>;
>
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
> // SSE2 Instructions
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
>
> // Move Instructions
> let neverHasSideEffects = 1 in
> @@ -1080,7 +1082,7 @@
> def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins  
> FR64:$src),
>                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
>                       [(set FR32:$dst, (fround FR64:$src))]>;
> -def CVTSD2SSrm  : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins  
> f64mem:$src),
> +def CVTSD2SSrm  : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins  
> f64mem:$src),
>                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
>                       [(set FR32:$dst, (fround (loadf64 addr:$src)))] 
> >;
> def CVTSI2SDrr  : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins  
> GR32:$src),
> @@ -1115,21 +1117,21 @@
>                          [(set VR64:$dst, (int_x86_sse_cvtpd2pi  
> VR128:$src))]>;
> def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins  
> f128mem:$src),
>                          "cvtpd2pi\t{$src, $dst|$dst, $src}",
> -                         [(set VR64:$dst, (int_x86_sse_cvtpd2pi
> +                         [(set VR64:$dst, (int_x86_sse_cvtpd2pi
>                                            (memop addr:$src)))]>;
> def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins  
> VR128:$src),
>                          "cvttpd2pi\t{$src, $dst|$dst, $src}",
>                          [(set VR64:$dst, (int_x86_sse_cvttpd2pi  
> VR128:$src))]>;
> def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins  
> f128mem:$src),
>                          "cvttpd2pi\t{$src, $dst|$dst, $src}",
> -                         [(set VR64:$dst, (int_x86_sse_cvttpd2pi
> +                         [(set VR64:$dst, (int_x86_sse_cvttpd2pi
>                                            (memop addr:$src)))]>;
> def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins  
> VR64:$src),
>                          "cvtpi2pd\t{$src, $dst|$dst, $src}",
>                          [(set VR128:$dst, (int_x86_sse_cvtpi2pd  
> VR64:$src))]>;
> def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins  
> i64mem:$src),
>                          "cvtpi2pd\t{$src, $dst|$dst, $src}",
> -                         [(set VR128:$dst, (int_x86_sse_cvtpi2pd
> +                         [(set VR128:$dst, (int_x86_sse_cvtpi2pd
>                                             (load addr:$src)))]>;
>
> // Aliases for intrinsics
> @@ -1144,11 +1146,11 @@
>
> // Comparison instructions
> let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
> -  def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> +  def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
>                     (outs FR64:$dst), (ins FR64:$src1, FR64:$src,  
> SSECC:$cc),
>                     "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
> let mayLoad = 1 in
> -  def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> +  def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
>                     (outs FR64:$dst), (ins FR64:$src1, f64mem:$src,  
> SSECC:$cc),
>                     "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
> }
> @@ -1165,13 +1167,15 @@
>
> // Aliases to match intrinsics which expect XMM operand(s).
> let Constraints = "$src1 = $dst" in {
> -  def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> -                        (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src, SSECC:$cc),
> +  def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> +                        (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src,
> +						SSECC:$cc),
>                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
>                         [(set VR128:$dst, (int_x86_sse2_cmp_sd  
> VR128:$src1,
>                                            VR128:$src, imm:$cc))]>;
> -  def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> -                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src, SSECC:$cc),
> +  def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> +                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src,
> +						SSECC:$cc),
>                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
>                         [(set VR128:$dst, (int_x86_sse2_cmp_sd  
> VR128:$src1,
>                                            (load addr:$src), imm: 
> $cc))]>;
> @@ -1197,8 +1201,8 @@
>                        (implicit EFLAGS)]>;
> } // Defs = [EFLAGS]
>
> -// Aliases of packed SSE2 instructions for scalar use. These all  
> have names that
> -// start with 'Fs'.
> +// Aliases of packed SSE2 instructions for scalar use. These all  
> have names
> +// that start with 'Fs'.
>
> // Alias instructions that map fld0 to pxor for sse.
> let isReMaterializable = 1, isAsCheapAsAMove = 1 in
> @@ -1289,7 +1293,7 @@
>                                  (ins FR64:$src1, f64mem:$src2),
>                  !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst,  
> $src2}"),
>                  [(set FR64:$dst, (OpNode FR64:$src1, (load addr: 
> $src2)))]>;
> -
> +
>   // Vector operation, reg+reg.
>   def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
>                                  (ins VR128:$src1, VR128:$src2),
> @@ -1353,7 +1357,7 @@
>                                  (ins FR64:$src1, f64mem:$src2),
>                  !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst,  
> $src2}"),
>                  [(set FR64:$dst, (OpNode FR64:$src1, (load addr: 
> $src2)))]>;
> -
> +
>   // Vector operation, reg+reg.
>   def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
>                                  (ins VR128:$src1, VR128:$src2),
> @@ -1405,7 +1409,7 @@
> defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
>                             int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
>
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
> // SSE packed FP Instructions
>
> // Move Instructions
> @@ -1445,13 +1449,13 @@
>     def MOVLPDrm : PDI<0x12, MRMSrcMem,
>                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src2),
>                        "movlpd\t{$src2, $dst|$dst, $src2}",
> -                       [(set VR128:$dst,
> +                       [(set VR128:$dst,
>                          (v2f64 (movlp VR128:$src1,
>                                  (scalar_to_vector (loadf64 addr: 
> $src2)))))]>;
>     def MOVHPDrm : PDI<0x16, MRMSrcMem,
>                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src2),
>                        "movhpd\t{$src2, $dst|$dst, $src2}",
> -                       [(set VR128:$dst,
> +                       [(set VR128:$dst,
>                          (v2f64 (movhp VR128:$src1,
>                                  (scalar_to_vector (loadf64 addr: 
> $src2)))))]>;
>   } // AddedComplexity
> @@ -1567,7 +1571,7 @@
>                    [(set VR128:$dst, (int_x86_sse2_cvtsd2ss  
> VR128:$src1,
>                                       VR128:$src2))]>;
> def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
> -                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src2),
> +                        (outs VR128:$dst), (ins VR128:$src1, f64mem: 
> $src2),
>                    "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
>                    [(set VR128:$dst, (int_x86_sse2_cvtsd2ss  
> VR128:$src1,
>                                       (load addr:$src2)))]>;
> @@ -1615,7 +1619,7 @@
>   def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
>                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
>                 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
> -
> +
>   // Vector operation, reg.
>   def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
>               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
> @@ -1715,12 +1719,12 @@
> }
>
> let Constraints = "$src1 = $dst" in {
> -  def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
> +  def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
>                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src,  
> SSECC:$cc),
>                     "cmp${cc}pd\t{$src, $dst|$dst, $src}",
>                     [(set VR128:$dst, (int_x86_sse2_cmp_pd  
> VR128:$src1,
>                                                         VR128:$src,  
> imm:$cc))]>;
> -  def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
> +  def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
>                   (outs VR128:$dst), (ins VR128:$src1, f128mem:$src,  
> SSECC:$cc),
>                   "cmp${cc}pd\t{$src, $dst|$dst, $src}",
>                   [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
> @@ -1733,12 +1737,12 @@
>
> // Shuffle and unpack instructions
> let Constraints = "$src1 = $dst" in {
> -  def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
> +  def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
>                  (outs VR128:$dst), (ins VR128:$src1, VR128:$src2,  
> i8imm:$src3),
>                  "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
>                  [(set VR128:$dst,
>                    (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
> -  def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
> +  def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1,
>                          f128mem:$src2, i8imm:$src3),
>                         "shufpd\t{$src3, $src2, $dst|$dst, $src2,  
> $src3}",
> @@ -1747,24 +1751,24 @@
>                                   VR128:$src1, (memopv2f64 addr: 
> $src2))))]>;
>
>   let AddedComplexity = 10 in {
> -    def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
> +    def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                          "unpckhpd\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
>                            (v2f64 (unpckh VR128:$src1,  
> VR128:$src2)))]>;
> -    def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
> +    def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> f128mem:$src2),
>                          "unpckhpd\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
>                            (v2f64 (unpckh VR128:$src1,
>                                           (memopv2f64 addr:$src2))))] 
> >;
>
> -    def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
> +    def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                          "unpcklpd\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
>                            (v2f64 (unpckl VR128:$src1,  
> VR128:$src2)))]>;
> -    def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
> +    def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> f128mem:$src2),
>                          "unpcklpd\t{$src2, $dst|$dst, $src2}",
>                          [(set VR128:$dst,
> @@ -1773,7 +1777,7 @@
> } // Constraints = "$src1 = $dst"
>
>
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
> // SSE integer instructions
>
> // Move Instructions
> @@ -1828,14 +1832,17 @@
> multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format  
> ImmForm,
>                              string OpcodeStr,
>                              Intrinsic IntId, Intrinsic IntId2> {
> -  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
> +  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> +						       VR128:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>                [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
> -  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,  
> i128mem:$src2),
> +  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> +						       i128mem:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>                [(set VR128:$dst, (IntId VR128:$src1,
> -                                        (bitconvert (memopv2i64  
> addr:$src2))))]>;
> -  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins  
> VR128:$src1, i32i8imm:$src2),
> +                                      (bitconvert (memopv2i64 addr: 
> $src2))))]>;
> +  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1,
> +							i32i8imm:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>                [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm: 
> $src2)))]>;
> }
> @@ -1843,15 +1850,17 @@
> /// PDI_binop_rm - Simple SSE2 binary operator.
> multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
>                         ValueType OpVT, bit Commutable = 0> {
> -  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
> +  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> +						       VR128:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>                [(set VR128:$dst, (OpVT (OpNode VR128:$src1,  
> VR128:$src2)))]> {
>     let isCommutable = Commutable;
>   }
> -  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,  
> i128mem:$src2),
> +  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> +						       i128mem:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>                [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
> -                                       (bitconvert (memopv2i64 addr: 
> $src2)))))]>;
> +                                     (bitconvert (memopv2i64 addr: 
> $src2)))))]>;
> }
>
> /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is  
> v2i64.
> @@ -1861,14 +1870,17 @@
> ///
> multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode  
> OpNode,
>                               bit Commutable = 0> {
> -  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
> +  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
> +			       (ins VR128:$src1, VR128:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
>                [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,  
> VR128:$src2)))]> {
>     let isCommutable = Commutable;
>   }
> -  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,  
> i128mem:$src2),
> +  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
> +			       (ins VR128:$src1, i128mem:$src2),
>                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> -               [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64  
> addr:$src2)))]>;
> +               [(set VR128:$dst, (OpNode VR128:$src1,
> +					 (memopv2i64 addr:$src2)))]>;
> }
>
> } // Constraints = "$src1 = $dst"
> @@ -2032,8 +2044,8 @@
>                      (outs VR128:$dst), (ins i128mem:$src1, i8imm: 
> $src2),
>                      "pshufd\t{$src2, $src1, $dst|$dst, $src1,  
> $src2}",
>                      [(set VR128:$dst, (v4i32 (pshufd:$src2
> -                                               (bc_v4i32(memopv2i64  
> addr:$src1)),
> -                                               (undef))))]>;
> +                                             (bc_v4i32(memopv2i64  
> addr:$src1)),
> +                                             (undef))))]>;
>
> // SSE2 with ImmT == Imm8 and XS prefix.
> def PSHUFHWri : Ii8<0x70, MRMSrcReg,
> @@ -2046,8 +2058,8 @@
>                     (outs VR128:$dst), (ins i128mem:$src1, i8imm: 
> $src2),
>                     "pshufhw\t{$src2, $src1, $dst|$dst, $src1,  
> $src2}",
>                     [(set VR128:$dst, (v8i16 (pshufhw:$src2
> -                                             (bc_v8i16 (memopv2i64  
> addr:$src1)),
> -                                             (undef))))]>,
> +                                            (bc_v8i16 (memopv2i64  
> addr:$src1)),
> +                                            (undef))))]>,
>                 XS, Requires<[HasSSE2]>;
>
> // SSE2 with ImmT == Imm8 and XD prefix.
> @@ -2067,90 +2079,90 @@
>
>
> let Constraints = "$src1 = $dst" in {
> -  def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
> +  def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                         "punpcklbw\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v16i8 (unpckl VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
> +  def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpcklbw\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (unpckl VR128:$src1,
>                                   (bc_v16i8 (memopv2i64 addr: 
> $src2))))]>;
> -  def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
> +  def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                         "punpcklwd\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v8i16 (unpckl VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
> +  def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpcklwd\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (unpckl VR128:$src1,
>                                   (bc_v8i16 (memopv2i64 addr: 
> $src2))))]>;
> -  def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
> +  def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                         "punpckldq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v4i32 (unpckl VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
> +  def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpckldq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (unpckl VR128:$src1,
>                                   (bc_v4i32 (memopv2i64 addr: 
> $src2))))]>;
> -  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
> +  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                          "punpcklqdq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v2i64 (unpckl VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
> +  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> i128mem:$src2),
>                          "punpcklqdq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v2i64 (unpckl VR128:$src1,
>                                          (memopv2i64 addr:$src2))))]>;
> -
> -  def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
> +
> +  def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                         "punpckhbw\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v16i8 (unpckh VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
> +  def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpckhbw\t{$src2, $dst|$dst, $src2}",
> -                        [(set VR128:$dst,
> -                          (unpckh VR128:$src1,
> +                        [(set VR128:$dst,
> +                          (unpckh VR128:$src1,
>                                   (bc_v16i8 (memopv2i64 addr: 
> $src2))))]>;
> -  def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
> +  def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                         "punpckhwd\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v8i16 (unpckh VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
> +  def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpckhwd\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (unpckh VR128:$src1,
>                                   (bc_v8i16 (memopv2i64 addr: 
> $src2))))]>;
> -  def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
> +  def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                         "punpckhdq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v4i32 (unpckh VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
> +  def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpckhdq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (unpckh VR128:$src1,
>                                   (bc_v4i32 (memopv2i64 addr: 
> $src2))))]>;
> -  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
> +  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
>                          (outs VR128:$dst), (ins VR128:$src1,  
> VR128:$src2),
>                          "punpckhqdq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
>                           (v2i64 (unpckh VR128:$src1, VR128:$src2)))] 
> >;
> -  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
> +  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
>                         (outs VR128:$dst), (ins VR128:$src1, i128mem: 
> $src2),
>                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
>                         [(set VR128:$dst,
> @@ -2175,7 +2187,7 @@
>                        (outs VR128:$dst), (ins VR128:$src1,
>                         i16mem:$src2, i32i8imm:$src3),
>                        "pinsrw\t{$src3, $src2, $dst|$dst, $src2,  
> $src3}",
> -                       [(set VR128:$dst,
> +                       [(set VR128:$dst,
>                          (X86pinsrw VR128:$src1, (extloadi16 addr: 
> $src2),
>                                     imm:$src3))]>;
> }
> @@ -2205,7 +2217,7 @@
>                     [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
> def MOVNTImr  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst,  
> GR32:$src),
>                     "movnti\t{$src, $dst|$dst, $src}",
> -                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
> +                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
>                   TB, Requires<[HasSSE2]>;
>
> // Flush cache
> @@ -2220,11 +2232,11 @@
>                "mfence", [(int_x86_sse2_mfence)]>, TB, Requires< 
> [HasSSE2]>;
>
> //TODO: custom lower this so as to never even generate the noop
> -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8  
> imm:$ss),
> +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8  
> imm:$ss),
>            (i8 0)), (NOOP)>;
> def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)),  
> (SFENCE)>;
> def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)),  
> (LFENCE)>;
> -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8  
> imm:$ss),
> +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8  
> imm:$ss),
>            (i8 1)), (MFENCE)>;
>
> // Alias instructions that map zero vector to pxor / xorp* for sse.
> @@ -2243,7 +2255,7 @@
>                         (v2f64 (scalar_to_vector FR64:$src)))]>;
> def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem: 
> $src),
>                      "movsd\t{$src, $dst|$dst, $src}",
> -                     [(set VR128:$dst,
> +                     [(set VR128:$dst,
>                        (v2f64 (scalar_to_vector (loadf64 addr: 
> $src))))]>;
>
> def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins  
> GR32:$src),
> @@ -2402,9 +2414,9 @@
>             (MOVZPQILo2PQIrm addr:$src)>;
> }
>
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
> // SSE3 Instructions
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
>
> // Move Instructions
> def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins  
> VR128:$src),
> @@ -2528,9 +2540,9 @@
>   def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)),  
> (undef))),
>             (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
>
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
> // SSSE3 Instructions
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
>
> /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is  
> v*i8.
> multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
> @@ -2804,12 +2816,13 @@
> def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
>           (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
>
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
> // Non-Instruction Patterns
> -// 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> +// 
> = 
> = 
> = 
> --------------------------------------------------------------------- 
> ===//
>
> -// extload f32 -> f64.  This matches load+fextend because we have a  
> hack in
> -// the isel (PreprocessForFPConvert) that can introduce loads after  
> dag combine.
> +// extload f32 -> f64.  This matches load+fextend because we have a  
> hack in
> +// the isel (PreprocessForFPConvert) that can introduce loads after  
> dag
> +// combine.
> // Since these loads aren't folded into the fextend, we have to  
> match it
> // explicitly here.
> let Predicates = [HasSSE2] in
> @@ -2887,12 +2900,12 @@
>       Requires<[HasSSE2]>;
> // Special unary SHUFPDrri case.
> def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
> -          (SHUFPDrri VR128:$src1, VR128:$src1,
> +          (SHUFPDrri VR128:$src1, VR128:$src1,
>                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
>       Requires<[HasSSE2]>;
> // Special unary SHUFPDrri case.
> def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
> -          (SHUFPDrri VR128:$src1, VR128:$src1,
> +          (SHUFPDrri VR128:$src1, VR128:$src1,
>                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
>       Requires<[HasSSE2]>;
> // Unary v4f32 shuffle with PSHUF* in order to fold a load.
> @@ -2902,16 +2915,16 @@
>
> // Special binary v4i32 shuffle cases with SHUFPS.
> def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
> -          (SHUFPSrri VR128:$src1, VR128:$src2,
> +          (SHUFPSrri VR128:$src1, VR128:$src2,
>                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
>            Requires<[HasSSE2]>;
> def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64  
> addr:$src2)))),
> -          (SHUFPSrmi VR128:$src1, addr:$src2,
> +          (SHUFPSrmi VR128:$src1, addr:$src2,
>                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
>            Requires<[HasSSE2]>;
> // Special binary v2i64 shuffle cases using SHUFPDrri.
> def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
> -          (SHUFPDrri VR128:$src1, VR128:$src2,
> +          (SHUFPDrri VR128:$src1, VR128:$src2,
>                      (SHUFFLE_get_shuf_imm VR128:$src3))>,
>           Requires<[HasSSE2]>;
>
> @@ -3033,7 +3046,7 @@
> // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer  
> movsd, but
> // fall back to this for SSE1)
> def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
> -          (SHUFPSrri VR128:$src2, VR128:$src1,
> +          (SHUFPSrri VR128:$src2, VR128:$src1,
>                      (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires< 
> [HasSSE1]>;
>
> // Set lowest element and zero upper elements.
> @@ -3100,7 +3113,7 @@
>           (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
> def : Pat<(store (v16i8 VR128:$src), addr:$dst),
>           (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
> -
> +
> // 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> // SSE4.1 Instructions
> // 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> @@ -3111,7 +3124,7 @@
>                             Intrinsic V2F64Int> {
>   // Intrinsic operation, reg.
>   // Vector intrinsic operation, reg
> -  def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
> +  def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
>                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm: 
> $src2),
>                     !strconcat(OpcodeStr,
>                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> @@ -3152,41 +3165,41 @@
>                             Intrinsic F64Int> {
>   // Intrinsic operation, reg.
>   def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
> -                    (outs VR128:$dst),
> +                    (outs VR128:$dst),
>                                  (ins VR128:$src1, VR128:$src2,  
> i32i8imm:$src3),
>                     !strconcat(OpcodeStr,
>                     "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                    [(set VR128:$dst,
> +                    [(set VR128:$dst,
>                             (F32Int VR128:$src1, VR128:$src2, imm: 
> $src3))]>,
>                     OpSize;
>
>   // Intrinsic operation, mem.
> -  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
> -                    (outs VR128:$dst),
> +  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
> +                    (outs VR128:$dst),
>                                 (ins VR128:$src1, ssmem:$src2,  
> i32i8imm:$src3),
> -                    !strconcat(OpcodeStr,
> +                    !strconcat(OpcodeStr,
>                     "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                    [(set VR128:$dst,
> +                    [(set VR128:$dst,
>                          (F32Int VR128:$src1, sse_load_f32:$src2,  
> imm:$src3))]>,
>                     OpSize;
>
>   // Intrinsic operation, reg.
>   def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
> -                    (outs VR128:$dst),
> +                    (outs VR128:$dst),
>                             (ins VR128:$src1, VR128:$src2, i32i8imm: 
> $src3),
>                     !strconcat(OpcodeStr,
>                     "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                    [(set VR128:$dst,
> +                    [(set VR128:$dst,
>                             (F64Int VR128:$src1, VR128:$src2, imm: 
> $src3))]>,
>                     OpSize;
>
>   // Intrinsic operation, mem.
>   def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
> -                    (outs VR128:$dst),
> +                    (outs VR128:$dst),
>                             (ins VR128:$src1, sdmem:$src2, i32i8imm: 
> $src3),
>                     !strconcat(OpcodeStr,
>                     "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                    [(set VR128:$dst,
> +                    [(set VR128:$dst,
>                         (F64Int VR128:$src1, sse_load_f64:$src2, imm: 
> $src3))]>,
>                     OpSize;
> }
> @@ -3305,9 +3318,9 @@
>                                  Intrinsic IntId128, bit Commutable  
> = 0> {
>     def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
>                     (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
> -                    !strconcat(OpcodeStr,
> +                    !strconcat(OpcodeStr,
>                      "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                    [(set VR128:$dst,
> +                    [(set VR128:$dst,
>                       (IntId128 VR128:$src1, VR128:$src2, imm: 
> $src3))]>,
>                     OpSize {
>       let isCommutable = Commutable;
> @@ -3342,7 +3355,7 @@
>   multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr,  
> Intrinsic IntId> {
>     def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
>                     (ins VR128:$src1, VR128:$src2),
> -                    !strconcat(OpcodeStr,
> +                    !strconcat(OpcodeStr,
>                      "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
>                     [(set VR128:$dst, (IntId VR128:$src1,  
> VR128:$src2, XMM0))]>,
>                     OpSize;
> @@ -3474,13 +3487,13 @@
> multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
>   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1),  
> imm:$src2))]>,
>                  OpSize;
>   def mr : SS4AIi8<opc, MRMDestMem, (outs),
>                  (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  []>, OpSize;
> // FIXME:
> @@ -3495,7 +3508,7 @@
> multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
>   def mr : SS4AIi8<opc, MRMDestMem, (outs),
>                  (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  []>, OpSize;
> // FIXME:
> @@ -3510,13 +3523,13 @@
> multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
>   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  [(set GR32:$dst,
>                   (extractelt (v4i32 VR128:$src1), imm:$src2))]>,  
> OpSize;
>   def mr : SS4AIi8<opc, MRMDestMem, (outs),
>                  (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
>                           addr:$dst)]>, OpSize;
> @@ -3530,14 +3543,14 @@
> multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
>   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  [(set GR32:$dst,
>                     (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm: 
> $src2))]>,
>            OpSize;
> -  def mr : SS4AIi8<opc, MRMDestMem, (outs),
> +  def mr : SS4AIi8<opc, MRMDestMem, (outs),
>                  (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
> -                 !strconcat(OpcodeStr,
> +                 !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
>                  [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)),  
> imm:$src2),
>                           addr:$dst)]>, OpSize;
> @@ -3556,15 +3569,15 @@
>   multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
>     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
>                    (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
> -                   !strconcat(OpcodeStr,
> +                   !strconcat(OpcodeStr,
>                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                   [(set VR128:$dst,
> +                   [(set VR128:$dst,
>                      (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))] 
> >, OpSize;
>     def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
>                    (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
>                    !strconcat(OpcodeStr,
>                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                   [(set VR128:$dst,
> +                   [(set VR128:$dst,
>                      (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
>                                 imm:$src3))]>, OpSize;
>   }
> @@ -3576,16 +3589,16 @@
>   multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
>     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
>                    (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
> -                   !strconcat(OpcodeStr,
> +                   !strconcat(OpcodeStr,
>                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                   [(set VR128:$dst,
> +                   [(set VR128:$dst,
>                      (v4i32 (insertelt VR128:$src1, GR32:$src2, imm: 
> $src3)))]>,
>                    OpSize;
>     def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
>                    (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
>                    !strconcat(OpcodeStr,
>                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                   [(set VR128:$dst,
> +                   [(set VR128:$dst,
>                      (v4i32 (insertelt VR128:$src1, (loadi32 addr: 
> $src2),
>                                        imm:$src3)))]>, OpSize;
>   }
> @@ -3601,15 +3614,16 @@
>   multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
>     def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
>                    (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
> -                   !strconcat(OpcodeStr,
> +                   !strconcat(OpcodeStr,
>                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                   [(set VR128:$dst,
> -                     (X86insrtps VR128:$src1, VR128:$src2, imm: 
> $src3))]>, OpSize;
> +                   [(set VR128:$dst,
> +                     (X86insrtps VR128:$src1, VR128:$src2, imm: 
> $src3))]>,
> +		OpSize;
>     def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
>                    (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
>                    !strconcat(OpcodeStr,
>                     "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> -                   [(set VR128:$dst,
> +                   [(set VR128:$dst,
>                      (X86insrtps VR128:$src1,
>                                 (v4f32 (scalar_to_vector (loadf32  
> addr:$src2))),
>                                  imm:$src3))]>, OpSize;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list