[llvm-commits] [llvm] r77718 - /llvm/trunk/lib/Target/X86/X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Fri Jul 31 13:36:33 PDT 2009
Thanks. This has been on my todo list forever. Where you're cleaning
up x86 td file, can you replace all the "let isTwoAddress=1" with
"let Constraints = ..." as well?
Evan
On Jul 31, 2009, at 1:07 PM, Eric Christopher wrote:
> Author: echristo
> Date: Fri Jul 31 15:07:27 2009
> New Revision: 77718
>
> URL: http://llvm.org/viewvc/llvm-project?rev=77718&view=rev
> Log:
> Whitespace and 80-col cleanup.
>
> Modified:
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=77718&r1=77717&r2=77718&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Jul 31 15:07:27 2009
> @@ -1,10 +1,10 @@
> //====- X86InstrSSE.td - Describe the X86 Instruction Set --*-
> tablegen -*-===//
> -//
> +//
> // The LLVM Compiler Infrastructure
> //
> // This file is distributed under the University of Illinois Open
> Source
> // License. See LICENSE.TXT for details.
> -//
> +//
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> //
> // This file describes the X86 SSE instruction set, defining the
> instructions,
> @@ -36,20 +36,20 @@
> def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
> def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
> def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
> -def X86pshufb : SDNode<"X86ISD::PSHUFB",
> +def X86pshufb : SDNode<"X86ISD::PSHUFB",
> SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>,
> SDTCisSameAs<0,1>,
> SDTCisSameAs<0,2>]>>;
> def X86pextrb : SDNode<"X86ISD::PEXTRB",
> SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
> SDTCisPtrTy<2>]>>;
> def X86pextrw : SDNode<"X86ISD::PEXTRW",
> SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
> SDTCisPtrTy<2>]>>;
> -def X86pinsrb : SDNode<"X86ISD::PINSRB",
> +def X86pinsrb : SDNode<"X86ISD::PINSRB",
> SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>,
> SDTCisSameAs<0,1>,
> SDTCisVT<2, i32>,
> SDTCisPtrTy<3>]>>;
> -def X86pinsrw : SDNode<"X86ISD::PINSRW",
> +def X86pinsrw : SDNode<"X86ISD::PINSRW",
> SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>,
> SDTCisSameAs<0,1>,
> SDTCisVT<2, i32>,
> SDTCisPtrTy<3>]>>;
> -def X86insrtps : SDNode<"X86ISD::INSERTPS",
> +def X86insrtps : SDNode<"X86ISD::INSERTPS",
> SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>,
> SDTCisSameAs<0,1>,
> SDTCisVT<2, v4f32>,
> SDTCisPtrTy<3>]>>;
> def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
> @@ -182,13 +182,13 @@
> return getI8Imm(X86::getShuffleSHUFImmediate(N));
> }]>;
>
> -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle
> mask to
> +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle
> mask to
> // PSHUFHW imm.
> def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
> return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
> }]>;
>
> -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle
> mask to
> +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle
> mask to
> // PSHUFLW imm.
> def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
> return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
> @@ -363,25 +363,25 @@
> [(set VR64:$dst, (int_x86_sse_cvtps2pi
> VR128:$src))]>;
> def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins
> f64mem:$src),
> "cvtps2pi\t{$src, $dst|$dst, $src}",
> - [(set VR64:$dst, (int_x86_sse_cvtps2pi
> + [(set VR64:$dst, (int_x86_sse_cvtps2pi
> (load addr:$src)))]>;
> def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins
> VR128:$src),
> "cvttps2pi\t{$src, $dst|$dst, $src}",
> [(set VR64:$dst, (int_x86_sse_cvttps2pi
> VR128:$src))]>;
> def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins
> f64mem:$src),
> "cvttps2pi\t{$src, $dst|$dst, $src}",
> - [(set VR64:$dst, (int_x86_sse_cvttps2pi
> + [(set VR64:$dst, (int_x86_sse_cvttps2pi
> (load addr:$src)))]>;
> let Constraints = "$src1 = $dst" in {
> - def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
> + def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR64:$src2),
> "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst, (int_x86_sse_cvtpi2ps
> VR128:$src1,
> VR64:$src2))]>;
> - def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
> + def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> i64mem:$src2),
> "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
> - [(set VR128:$dst, (int_x86_sse_cvtpi2ps
> VR128:$src1,
> + [(set VR128:$dst, (int_x86_sse_cvtpi2ps
> VR128:$src1,
> (load addr:$src2)))]>;
> }
>
> @@ -410,11 +410,11 @@
>
> // Comparison instructions
> let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
> - def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> + def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> (outs FR32:$dst), (ins FR32:$src1, FR32:$src,
> SSECC:$cc),
> "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
> let mayLoad = 1 in
> - def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> + def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> (outs FR32:$dst), (ins FR32:$src1, f32mem:$src,
> SSECC:$cc),
> "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
> }
> @@ -431,13 +431,15 @@
>
> // Aliases to match intrinsics which expect XMM operand(s).
> let Constraints = "$src1 = $dst" in {
> - def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> - (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src, SSECC:$cc),
> + def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
> + (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src,
> + SSECC:$cc),
> "cmp${cc}ss\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse_cmp_ss
> VR128:$src1,
> - VR128:$src, imm:$cc))]>;
> - def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> - (outs VR128:$dst), (ins VR128:$src1, f32mem:
> $src, SSECC:$cc),
> + VR128:$src, imm:$cc))]>;
> + def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
> + (outs VR128:$dst), (ins VR128:$src1, f32mem:
> $src,
> + SSECC:$cc),
> "cmp${cc}ss\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse_cmp_ss
> VR128:$src1,
> (load addr:$src), imm:
> $cc))]>;
> @@ -463,8 +465,8 @@
> (implicit EFLAGS)]>;
> } // Defs = [EFLAGS]
>
> -// Aliases of packed SSE1 instructions for scalar use. These all
> have names that
> -// start with 'Fs'.
> +// Aliases of packed SSE1 instructions for scalar use. These all
> have names
> +// that start with 'Fs'.
>
> // Alias instructions that map fld0 to pxor for sse.
> let isReMaterializable = 1, isAsCheapAsAMove = 1 in
> @@ -474,7 +476,7 @@
>
> // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper
> bits are
> // disregarded.
> -let neverHasSideEffects = 1 in
> +let neverHasSideEffects = 1 in
> def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins
> FR32:$src),
> "movaps\t{$src, $dst|$dst, $src}", []>;
>
> @@ -555,7 +557,7 @@
> (ins FR32:$src1, f32mem:$src2),
> !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst,
> $src2}"),
> [(set FR32:$dst, (OpNode FR32:$src1, (load addr:
> $src2)))]>;
> -
> +
> // Vector operation, reg+reg.
> def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2),
> @@ -619,7 +621,7 @@
> (ins FR32:$src1, f32mem:$src2),
> !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst,
> $src2}"),
> [(set FR32:$dst, (OpNode FR32:$src1, (load addr:
> $src2)))]>;
> -
> +
> // Vector operation, reg+reg.
> def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2),
> @@ -674,7 +676,7 @@
> // SSE packed FP Instructions
>
> // Move Instructions
> -let neverHasSideEffects = 1 in
> +let neverHasSideEffects = 1 in
> def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins
> VR128:$src),
> "movaps\t{$src, $dst|$dst, $src}", []>;
> let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects =
> 1 in
> @@ -711,13 +713,13 @@
> def MOVLPSrm : PSI<0x12, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src2),
> "movlps\t{$src2, $dst|$dst, $src2}",
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (movlp VR128:$src1,
> (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:
> $src2))))))]>;
> def MOVHPSrm : PSI<0x16, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src2),
> "movhps\t{$src2, $dst|$dst, $src2}",
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (movhp VR128:$src1,
> (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:
> $src2))))))]>;
> } // AddedComplexity
> @@ -792,7 +794,7 @@
> def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
> !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
> [(set FR32:$dst, (OpNode (load addr:$src)))]>;
> -
> +
> // Vector operation, reg.
> def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
> !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
> @@ -893,12 +895,12 @@
> }
>
> let Constraints = "$src1 = $dst" in {
> - def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
> + def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1, VR128:$src,
> SSECC:$cc),
> "cmp${cc}ps\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
> VR128:$src,
> imm:$cc))]>;
> - def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
> + def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, f128mem:$src,
> SSECC:$cc),
> "cmp${cc}ps\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
> @@ -912,13 +914,13 @@
> // Shuffle and unpack instructions
> let Constraints = "$src1 = $dst" in {
> let isConvertibleToThreeAddress = 1 in // Convert to pshufd
> - def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
> + def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2, i8imm:$src3),
> "shufps\t{$src3, $src2, $dst|$dst, $src2,
> $src3}",
> [(set VR128:$dst,
> (v4f32 (shufp:$src3 VR128:$src1,
> VR128:$src2)))]>;
> - def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
> + def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> f128mem:$src2, i8imm:$src3),
> "shufps\t{$src3, $src2, $dst|$dst, $src2,
> $src3}",
> @@ -927,24 +929,24 @@
> VR128:$src1, (memopv4f32 addr:
> $src2))))]>;
>
> let AddedComplexity = 10 in {
> - def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
> + def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "unpckhps\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v4f32 (unpckh VR128:$src1,
> VR128:$src2)))]>;
> - def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
> + def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> f128mem:$src2),
> "unpckhps\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v4f32 (unpckh VR128:$src1,
> (memopv4f32 addr:$src2))))]
> >;
>
> - def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
> + def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "unpcklps\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v4f32 (unpckl VR128:$src1,
> VR128:$src2)))]>;
> - def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
> + def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> f128mem:$src2),
> "unpcklps\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> @@ -1049,14 +1051,14 @@
> def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins
> f32mem:$src),
> "movss\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32
> (scalar_to_vector
> - (loadf32 addr:
> $src))))))]>;
> + (loadf32 addr:
> $src))))))]>;
>
> def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
> (MOVZSS2PSrm addr:$src)>;
>
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
> // SSE2 Instructions
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
>
> // Move Instructions
> let neverHasSideEffects = 1 in
> @@ -1080,7 +1082,7 @@
> def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins
> FR64:$src),
> "cvtsd2ss\t{$src, $dst|$dst, $src}",
> [(set FR32:$dst, (fround FR64:$src))]>;
> -def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins
> f64mem:$src),
> +def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins
> f64mem:$src),
> "cvtsd2ss\t{$src, $dst|$dst, $src}",
> [(set FR32:$dst, (fround (loadf64 addr:$src)))]
> >;
> def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins
> GR32:$src),
> @@ -1115,21 +1117,21 @@
> [(set VR64:$dst, (int_x86_sse_cvtpd2pi
> VR128:$src))]>;
> def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins
> f128mem:$src),
> "cvtpd2pi\t{$src, $dst|$dst, $src}",
> - [(set VR64:$dst, (int_x86_sse_cvtpd2pi
> + [(set VR64:$dst, (int_x86_sse_cvtpd2pi
> (memop addr:$src)))]>;
> def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins
> VR128:$src),
> "cvttpd2pi\t{$src, $dst|$dst, $src}",
> [(set VR64:$dst, (int_x86_sse_cvttpd2pi
> VR128:$src))]>;
> def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins
> f128mem:$src),
> "cvttpd2pi\t{$src, $dst|$dst, $src}",
> - [(set VR64:$dst, (int_x86_sse_cvttpd2pi
> + [(set VR64:$dst, (int_x86_sse_cvttpd2pi
> (memop addr:$src)))]>;
> def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins
> VR64:$src),
> "cvtpi2pd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse_cvtpi2pd
> VR64:$src))]>;
> def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins
> i64mem:$src),
> "cvtpi2pd\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst, (int_x86_sse_cvtpi2pd
> + [(set VR128:$dst, (int_x86_sse_cvtpi2pd
> (load addr:$src)))]>;
>
> // Aliases for intrinsics
> @@ -1144,11 +1146,11 @@
>
> // Comparison instructions
> let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
> - def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> + def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> (outs FR64:$dst), (ins FR64:$src1, FR64:$src,
> SSECC:$cc),
> "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
> let mayLoad = 1 in
> - def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> + def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> (outs FR64:$dst), (ins FR64:$src1, f64mem:$src,
> SSECC:$cc),
> "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
> }
> @@ -1165,13 +1167,15 @@
>
> // Aliases to match intrinsics which expect XMM operand(s).
> let Constraints = "$src1 = $dst" in {
> - def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> - (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src, SSECC:$cc),
> + def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
> + (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src,
> + SSECC:$cc),
> "cmp${cc}sd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse2_cmp_sd
> VR128:$src1,
> VR128:$src, imm:$cc))]>;
> - def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> - (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src, SSECC:$cc),
> + def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
> + (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src,
> + SSECC:$cc),
> "cmp${cc}sd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse2_cmp_sd
> VR128:$src1,
> (load addr:$src), imm:
> $cc))]>;
> @@ -1197,8 +1201,8 @@
> (implicit EFLAGS)]>;
> } // Defs = [EFLAGS]
>
> -// Aliases of packed SSE2 instructions for scalar use. These all
> have names that
> -// start with 'Fs'.
> +// Aliases of packed SSE2 instructions for scalar use. These all
> have names
> +// that start with 'Fs'.
>
> // Alias instructions that map fld0 to pxor for sse.
> let isReMaterializable = 1, isAsCheapAsAMove = 1 in
> @@ -1289,7 +1293,7 @@
> (ins FR64:$src1, f64mem:$src2),
> !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst,
> $src2}"),
> [(set FR64:$dst, (OpNode FR64:$src1, (load addr:
> $src2)))]>;
> -
> +
> // Vector operation, reg+reg.
> def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2),
> @@ -1353,7 +1357,7 @@
> (ins FR64:$src1, f64mem:$src2),
> !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst,
> $src2}"),
> [(set FR64:$dst, (OpNode FR64:$src1, (load addr:
> $src2)))]>;
> -
> +
> // Vector operation, reg+reg.
> def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2),
> @@ -1405,7 +1409,7 @@
> defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
> int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
>
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
> // SSE packed FP Instructions
>
> // Move Instructions
> @@ -1445,13 +1449,13 @@
> def MOVLPDrm : PDI<0x12, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src2),
> "movlpd\t{$src2, $dst|$dst, $src2}",
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (v2f64 (movlp VR128:$src1,
> (scalar_to_vector (loadf64 addr:
> $src2)))))]>;
> def MOVHPDrm : PDI<0x16, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src2),
> "movhpd\t{$src2, $dst|$dst, $src2}",
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (v2f64 (movhp VR128:$src1,
> (scalar_to_vector (loadf64 addr:
> $src2)))))]>;
> } // AddedComplexity
> @@ -1567,7 +1571,7 @@
> [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
> VR128:$src1,
> VR128:$src2))]>;
> def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
> - (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src2),
> + (outs VR128:$dst), (ins VR128:$src1, f64mem:
> $src2),
> "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
> VR128:$src1,
> (load addr:$src2)))]>;
> @@ -1615,7 +1619,7 @@
> def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
> !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
> [(set FR64:$dst, (OpNode (load addr:$src)))]>;
> -
> +
> // Vector operation, reg.
> def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
> !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
> @@ -1715,12 +1719,12 @@
> }
>
> let Constraints = "$src1 = $dst" in {
> - def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
> + def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1, VR128:$src,
> SSECC:$cc),
> "cmp${cc}pd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse2_cmp_pd
> VR128:$src1,
> VR128:$src,
> imm:$cc))]>;
> - def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
> + def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, f128mem:$src,
> SSECC:$cc),
> "cmp${cc}pd\t{$src, $dst|$dst, $src}",
> [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
> @@ -1733,12 +1737,12 @@
>
> // Shuffle and unpack instructions
> let Constraints = "$src1 = $dst" in {
> - def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
> + def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1, VR128:$src2,
> i8imm:$src3),
> "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
> [(set VR128:$dst,
> (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
> - def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
> + def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> f128mem:$src2, i8imm:$src3),
> "shufpd\t{$src3, $src2, $dst|$dst, $src2,
> $src3}",
> @@ -1747,24 +1751,24 @@
> VR128:$src1, (memopv2f64 addr:
> $src2))))]>;
>
> let AddedComplexity = 10 in {
> - def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
> + def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "unpckhpd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v2f64 (unpckh VR128:$src1,
> VR128:$src2)))]>;
> - def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
> + def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> f128mem:$src2),
> "unpckhpd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v2f64 (unpckh VR128:$src1,
> (memopv2f64 addr:$src2))))]
> >;
>
> - def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
> + def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "unpcklpd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v2f64 (unpckl VR128:$src1,
> VR128:$src2)))]>;
> - def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
> + def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> f128mem:$src2),
> "unpcklpd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> @@ -1773,7 +1777,7 @@
> } // Constraints = "$src1 = $dst"
>
>
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
> // SSE integer instructions
>
> // Move Instructions
> @@ -1828,14 +1832,17 @@
> multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format
> ImmForm,
> string OpcodeStr,
> Intrinsic IntId, Intrinsic IntId2> {
> - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> + VR128:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
> - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> i128mem:$src2),
> + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> + i128mem:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> [(set VR128:$dst, (IntId VR128:$src1,
> - (bitconvert (memopv2i64
> addr:$src2))))]>;
> - def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins
> VR128:$src1, i32i8imm:$src2),
> + (bitconvert (memopv2i64 addr:
> $src2))))]>;
> + def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1,
> + i32i8imm:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:
> $src2)))]>;
> }
> @@ -1843,15 +1850,17 @@
> /// PDI_binop_rm - Simple SSE2 binary operator.
> multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
> ValueType OpVT, bit Commutable = 0> {
> - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> + VR128:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
> VR128:$src2)))]> {
> let isCommutable = Commutable;
> }
> - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> i128mem:$src2),
> + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> + i128mem:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
> - (bitconvert (memopv2i64 addr:
> $src2)))))]>;
> + (bitconvert (memopv2i64 addr:
> $src2)))))]>;
> }
>
> /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is
> v2i64.
> @@ -1861,14 +1870,17 @@
> ///
> multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode
> OpNode,
> bit Commutable = 0> {
> - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
> + (ins VR128:$src1, VR128:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
> VR128:$src2)))]> {
> let isCommutable = Commutable;
> }
> - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
> i128mem:$src2),
> + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
> + (ins VR128:$src1, i128mem:$src2),
> !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
> - [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64
> addr:$src2)))]>;
> + [(set VR128:$dst, (OpNode VR128:$src1,
> + (memopv2i64 addr:$src2)))]>;
> }
>
> } // Constraints = "$src1 = $dst"
> @@ -2032,8 +2044,8 @@
> (outs VR128:$dst), (ins i128mem:$src1, i8imm:
> $src2),
> "pshufd\t{$src2, $src1, $dst|$dst, $src1,
> $src2}",
> [(set VR128:$dst, (v4i32 (pshufd:$src2
> - (bc_v4i32(memopv2i64
> addr:$src1)),
> - (undef))))]>;
> + (bc_v4i32(memopv2i64
> addr:$src1)),
> + (undef))))]>;
>
> // SSE2 with ImmT == Imm8 and XS prefix.
> def PSHUFHWri : Ii8<0x70, MRMSrcReg,
> @@ -2046,8 +2058,8 @@
> (outs VR128:$dst), (ins i128mem:$src1, i8imm:
> $src2),
> "pshufhw\t{$src2, $src1, $dst|$dst, $src1,
> $src2}",
> [(set VR128:$dst, (v8i16 (pshufhw:$src2
> - (bc_v8i16 (memopv2i64
> addr:$src1)),
> - (undef))))]>,
> + (bc_v8i16 (memopv2i64
> addr:$src1)),
> + (undef))))]>,
> XS, Requires<[HasSSE2]>;
>
> // SSE2 with ImmT == Imm8 and XD prefix.
> @@ -2067,90 +2079,90 @@
>
>
> let Constraints = "$src1 = $dst" in {
> - def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
> + def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpcklbw\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v16i8 (unpckl VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
> + def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpcklbw\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (unpckl VR128:$src1,
> (bc_v16i8 (memopv2i64 addr:
> $src2))))]>;
> - def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
> + def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpcklwd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v8i16 (unpckl VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
> + def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpcklwd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (unpckl VR128:$src1,
> (bc_v8i16 (memopv2i64 addr:
> $src2))))]>;
> - def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
> + def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpckldq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v4i32 (unpckl VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
> + def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpckldq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (unpckl VR128:$src1,
> (bc_v4i32 (memopv2i64 addr:
> $src2))))]>;
> - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
> + def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpcklqdq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v2i64 (unpckl VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
> + def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1,
> i128mem:$src2),
> "punpcklqdq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v2i64 (unpckl VR128:$src1,
> (memopv2i64 addr:$src2))))]>;
> -
> - def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
> +
> + def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpckhbw\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v16i8 (unpckh VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
> + def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpckhbw\t{$src2, $dst|$dst, $src2}",
> - [(set VR128:$dst,
> - (unpckh VR128:$src1,
> + [(set VR128:$dst,
> + (unpckh VR128:$src1,
> (bc_v16i8 (memopv2i64 addr:
> $src2))))]>;
> - def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
> + def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpckhwd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v8i16 (unpckh VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
> + def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpckhwd\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (unpckh VR128:$src1,
> (bc_v8i16 (memopv2i64 addr:
> $src2))))]>;
> - def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
> + def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpckhdq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v4i32 (unpckh VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
> + def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpckhdq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (unpckh VR128:$src1,
> (bc_v4i32 (memopv2i64 addr:
> $src2))))]>;
> - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
> + def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1,
> VR128:$src2),
> "punpckhqdq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> (v2i64 (unpckh VR128:$src1, VR128:$src2)))]
> >;
> - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
> + def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
> (outs VR128:$dst), (ins VR128:$src1, i128mem:
> $src2),
> "punpckhqdq\t{$src2, $dst|$dst, $src2}",
> [(set VR128:$dst,
> @@ -2175,7 +2187,7 @@
> (outs VR128:$dst), (ins VR128:$src1,
> i16mem:$src2, i32i8imm:$src3),
> "pinsrw\t{$src3, $src2, $dst|$dst, $src2,
> $src3}",
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (X86pinsrw VR128:$src1, (extloadi16 addr:
> $src2),
> imm:$src3))]>;
> }
> @@ -2205,7 +2217,7 @@
> [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
> def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst,
> GR32:$src),
> "movnti\t{$src, $dst|$dst, $src}",
> - [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
> + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
> TB, Requires<[HasSSE2]>;
>
> // Flush cache
> @@ -2220,11 +2232,11 @@
> "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<
> [HasSSE2]>;
>
> //TODO: custom lower this so as to never even generate the noop
> -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8
> imm:$ss),
> +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8
> imm:$ss),
> (i8 0)), (NOOP)>;
> def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)),
> (SFENCE)>;
> def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)),
> (LFENCE)>;
> -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8
> imm:$ss),
> +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8
> imm:$ss),
> (i8 1)), (MFENCE)>;
>
> // Alias instructions that map zero vector to pxor / xorp* for sse.
> @@ -2243,7 +2255,7 @@
> (v2f64 (scalar_to_vector FR64:$src)))]>;
> def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:
> $src),
> "movsd\t{$src, $dst|$dst, $src}",
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (v2f64 (scalar_to_vector (loadf64 addr:
> $src))))]>;
>
> def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins
> GR32:$src),
> @@ -2402,9 +2414,9 @@
> (MOVZPQILo2PQIrm addr:$src)>;
> }
>
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
> // SSE3 Instructions
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
>
> // Move Instructions
> def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins
> VR128:$src),
> @@ -2528,9 +2540,9 @@
> def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)),
> (undef))),
> (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
>
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
> // SSSE3 Instructions
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
>
> /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is
> v*i8.
> multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
> @@ -2804,12 +2816,13 @@
> def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
> (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
>
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
> // Non-Instruction Patterns
> -//
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> +//
> =
> =
> =
> ---------------------------------------------------------------------
> ===//
>
> -// extload f32 -> f64. This matches load+fextend because we have a
> hack in
> -// the isel (PreprocessForFPConvert) that can introduce loads after
> dag combine.
> +// extload f32 -> f64. This matches load+fextend because we have a
> hack in
> +// the isel (PreprocessForFPConvert) that can introduce loads after
> dag
> +// combine.
> // Since these loads aren't folded into the fextend, we have to
> match it
> // explicitly here.
> let Predicates = [HasSSE2] in
> @@ -2887,12 +2900,12 @@
> Requires<[HasSSE2]>;
> // Special unary SHUFPDrri case.
> def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
> - (SHUFPDrri VR128:$src1, VR128:$src1,
> + (SHUFPDrri VR128:$src1, VR128:$src1,
> (SHUFFLE_get_shuf_imm VR128:$src3))>,
> Requires<[HasSSE2]>;
> // Special unary SHUFPDrri case.
> def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
> - (SHUFPDrri VR128:$src1, VR128:$src1,
> + (SHUFPDrri VR128:$src1, VR128:$src1,
> (SHUFFLE_get_shuf_imm VR128:$src3))>,
> Requires<[HasSSE2]>;
> // Unary v4f32 shuffle with PSHUF* in order to fold a load.
> @@ -2902,16 +2915,16 @@
>
> // Special binary v4i32 shuffle cases with SHUFPS.
> def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
> - (SHUFPSrri VR128:$src1, VR128:$src2,
> + (SHUFPSrri VR128:$src1, VR128:$src2,
> (SHUFFLE_get_shuf_imm VR128:$src3))>,
> Requires<[HasSSE2]>;
> def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64
> addr:$src2)))),
> - (SHUFPSrmi VR128:$src1, addr:$src2,
> + (SHUFPSrmi VR128:$src1, addr:$src2,
> (SHUFFLE_get_shuf_imm VR128:$src3))>,
> Requires<[HasSSE2]>;
> // Special binary v2i64 shuffle cases using SHUFPDrri.
> def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
> - (SHUFPDrri VR128:$src1, VR128:$src2,
> + (SHUFPDrri VR128:$src1, VR128:$src2,
> (SHUFFLE_get_shuf_imm VR128:$src3))>,
> Requires<[HasSSE2]>;
>
> @@ -3033,7 +3046,7 @@
> // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer
> movsd, but
> // fall back to this for SSE1)
> def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
> - (SHUFPSrri VR128:$src2, VR128:$src1,
> + (SHUFPSrri VR128:$src2, VR128:$src1,
> (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<
> [HasSSE1]>;
>
> // Set lowest element and zero upper elements.
> @@ -3100,7 +3113,7 @@
> (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
> def : Pat<(store (v16i8 VR128:$src), addr:$dst),
> (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
> -
> +
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> // SSE4.1 Instructions
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> @@ -3111,7 +3124,7 @@
> Intrinsic V2F64Int> {
> // Intrinsic operation, reg.
> // Vector intrinsic operation, reg
> - def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
> + def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
> (outs VR128:$dst), (ins VR128:$src1, i32i8imm:
> $src2),
> !strconcat(OpcodeStr,
> "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> @@ -3152,41 +3165,41 @@
> Intrinsic F64Int> {
> // Intrinsic operation, reg.
> def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
> - (outs VR128:$dst),
> + (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2,
> i32i8imm:$src3),
> !strconcat(OpcodeStr,
> "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (F32Int VR128:$src1, VR128:$src2, imm:
> $src3))]>,
> OpSize;
>
> // Intrinsic operation, mem.
> - def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
> - (outs VR128:$dst),
> + def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
> + (outs VR128:$dst),
> (ins VR128:$src1, ssmem:$src2,
> i32i8imm:$src3),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (F32Int VR128:$src1, sse_load_f32:$src2,
> imm:$src3))]>,
> OpSize;
>
> // Intrinsic operation, reg.
> def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
> - (outs VR128:$dst),
> + (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2, i32i8imm:
> $src3),
> !strconcat(OpcodeStr,
> "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (F64Int VR128:$src1, VR128:$src2, imm:
> $src3))]>,
> OpSize;
>
> // Intrinsic operation, mem.
> def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
> - (outs VR128:$dst),
> + (outs VR128:$dst),
> (ins VR128:$src1, sdmem:$src2, i32i8imm:
> $src3),
> !strconcat(OpcodeStr,
> "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (F64Int VR128:$src1, sse_load_f64:$src2, imm:
> $src3))]>,
> OpSize;
> }
> @@ -3305,9 +3318,9 @@
> Intrinsic IntId128, bit Commutable
> = 0> {
> def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (IntId128 VR128:$src1, VR128:$src2, imm:
> $src3))]>,
> OpSize {
> let isCommutable = Commutable;
> @@ -3342,7 +3355,7 @@
> multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr,
> Intrinsic IntId> {
> def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
> [(set VR128:$dst, (IntId VR128:$src1,
> VR128:$src2, XMM0))]>,
> OpSize;
> @@ -3474,13 +3487,13 @@
> multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
> def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
> (ins VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1),
> imm:$src2))]>,
> OpSize;
> def mr : SS4AIi8<opc, MRMDestMem, (outs),
> (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> []>, OpSize;
> // FIXME:
> @@ -3495,7 +3508,7 @@
> multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
> def mr : SS4AIi8<opc, MRMDestMem, (outs),
> (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> []>, OpSize;
> // FIXME:
> @@ -3510,13 +3523,13 @@
> multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
> def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
> (ins VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> [(set GR32:$dst,
> (extractelt (v4i32 VR128:$src1), imm:$src2))]>,
> OpSize;
> def mr : SS4AIi8<opc, MRMDestMem, (outs),
> (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
> addr:$dst)]>, OpSize;
> @@ -3530,14 +3543,14 @@
> multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
> def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
> (ins VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> [(set GR32:$dst,
> (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:
> $src2))]>,
> OpSize;
> - def mr : SS4AIi8<opc, MRMDestMem, (outs),
> + def mr : SS4AIi8<opc, MRMDestMem, (outs),
> (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
> imm:$src2),
> addr:$dst)]>, OpSize;
> @@ -3556,15 +3569,15 @@
> multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
> def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]
> >, OpSize;
> def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
> (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
> !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
> imm:$src3))]>, OpSize;
> }
> @@ -3576,16 +3589,16 @@
> multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
> def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:
> $src3)))]>,
> OpSize;
> def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
> (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
> !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (v4i32 (insertelt VR128:$src1, (loadi32 addr:
> $src2),
> imm:$src3)))]>, OpSize;
> }
> @@ -3601,15 +3614,16 @@
> multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
> def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
> (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
> - !strconcat(OpcodeStr,
> + !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> - (X86insrtps VR128:$src1, VR128:$src2, imm:
> $src3))]>, OpSize;
> + [(set VR128:$dst,
> + (X86insrtps VR128:$src1, VR128:$src2, imm:
> $src3))]>,
> + OpSize;
> def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
> (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
> !strconcat(OpcodeStr,
> "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> - [(set VR128:$dst,
> + [(set VR128:$dst,
> (X86insrtps VR128:$src1,
> (v4f32 (scalar_to_vector (loadf32
> addr:$src2))),
> imm:$src3))]>, OpSize;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list