[llvm] r336971 - [X86] Remove isel patterns that turns packed add/sub/mul/div+movss/sd into scalar intrinsic instructions.
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 17 11:48:57 PDT 2018
Hi Craig,
One of our internal tests discovered a regression in the code generated caused by this change. I have filed the details as PR38197, can you please take a look?
Douglas Yung
> -----Original Message-----
> From: llvm-commits [mailto:llvm-commits-bounces at lists.llvm.org] On
> Behalf Of Craig Topper via llvm-commits
> Sent: Thursday, July 12, 2018 21:51
> To: llvm-commits at lists.llvm.org
> Subject: [llvm] r336971 - [X86] Remove isel patterns that turns packed
> add/sub/mul/div+movss/sd into scalar intrinsic instructions.
>
> Author: ctopper
> Date: Thu Jul 12 21:50:39 2018
> New Revision: 336971
>
> URL: http://llvm.org/viewvc/llvm-project?rev=336971&view=rev
> Log:
> [X86] Remove isel patterns that turns packed add/sub/mul/div+movss/sd
> into scalar intrinsic instructions.
>
> This is not an optimization we should be doing in isel. This is more
> suitable for a DAG combine.
>
> My main concern is a future time when we support more FPENV. Changing a
> packed op to a scalar op could cause us to miss some exceptions that
> should have occured if we had done a packed op. A DAG combine would be
> better able to manage this.
>
> Modified:
> llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=336971&r1=33697
> 0&r2=336971&view=diff
> =======================================================================
> =======
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Jul 12 21:50:39
> 2018
> @@ -11481,37 +11481,37 @@ multiclass AVX512_scalar_math_fp_pattern
> X86VectorVTInfo _, PatLeaf
> ZeroFP> {
> let Predicates = [HasAVX512] in {
> // extracted scalar math op with insert via movss
> - def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (_.VT
> (scalar_to_vector
> - (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
> - _.FRC:$src))))),
> - (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
> - (COPY_TO_REGCLASS _.FRC:$src, VR128X))>;
> -
> - // vector math op with insert via movss
> - def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst),
> - (Op (_.VT VR128X:$dst), (_.VT VR128X:$src)))),
> - (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
> _.VT:$src)>;
> + def : Pat<(MoveNode
> + (_.VT VR128X:$dst),
> + (_.VT (scalar_to_vector
> + (Op (_.EltVT (extractelt (_.VT VR128X:$dst),
> (iPTR 0))),
> + _.FRC:$src)))),
> + (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
> + (COPY_TO_REGCLASS _.FRC:$src, VR128X))>;
>
> // extracted masked scalar math op with insert via movss
> def : Pat<(MoveNode (_.VT VR128X:$src1),
> (scalar_to_vector
> (X86selects VK1WM:$mask,
> - (Op (_.EltVT (extractelt (_.VT
> VR128X:$src1), (iPTR 0))),
> + (Op (_.EltVT
> + (extractelt (_.VT VR128X:$src1),
> (iPTR 0))),
> _.FRC:$src2),
> _.FRC:$src0))),
> - (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) (COPY_TO_REGCLASS
> _.FRC:$src0, VR128X),
> - VK1WM:$mask, _.VT:$src1,
> - (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
> + (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
> + (COPY_TO_REGCLASS _.FRC:$src0, VR128X),
> + VK1WM:$mask, _.VT:$src1,
> + (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
>
> // extracted masked scalar math op with insert via movss
> def : Pat<(MoveNode (_.VT VR128X:$src1),
> (scalar_to_vector
> (X86selects VK1WM:$mask,
> - (Op (_.EltVT (extractelt (_.VT
> VR128X:$src1), (iPTR 0))),
> + (Op (_.EltVT
> + (extractelt (_.VT VR128X:$src1),
> (iPTR 0))),
> _.FRC:$src2), (_.EltVT ZeroFP)))),
> - (!cast<Instruction>("V"#OpcPrefix#Zrr_Intkz)
> - VK1WM:$mask, _.VT:$src1,
> - (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
> + (!cast<Instruction>("V"#OpcPrefix#Zrr_Intkz)
> + VK1WM:$mask, _.VT:$src1,
> + (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
> }
> }
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=336971&r1=336970&r
> 2=336971&view=diff
> =======================================================================
> =======
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jul 12 21:50:39 2018
> @@ -2642,34 +2642,26 @@ let isCodeGenOnly = 1 in {
> multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode
> Move,
> ValueType VT, ValueType EltTy,
> RegisterClass RC, Predicate
> BasePredicate> {
> - let Predicates = [BasePredicate] in {
> + let Predicates = [BasePredicate] in {
> // extracted scalar math op with insert via movss/movsd
> - def : Pat<(VT (Move (VT VR128:$dst), (VT (scalar_to_vector
> - (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
> - RC:$src))))),
> - (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
> - (COPY_TO_REGCLASS RC:$src, VR128))>;
> -
> - // vector math op with insert via movss/movsd
> def : Pat<(VT (Move (VT VR128:$dst),
> - (Op (VT VR128:$dst), (VT VR128:$src)))),
> - (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, VT:$src)>;
> - }
> + (VT (scalar_to_vector
> + (Op (EltTy (extractelt (VT VR128:$dst),
> (iPTR 0))),
> + RC:$src))))),
> + (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
> + (COPY_TO_REGCLASS RC:$src, VR128))>;
> + }
>
> - // Repeat for AVX versions of the instructions.
> - let Predicates = [UseAVX] in {
> + // Repeat for AVX versions of the instructions.
> + let Predicates = [UseAVX] in {
> // extracted scalar math op with insert via movss/movsd
> - def : Pat<(VT (Move (VT VR128:$dst), (VT (scalar_to_vector
> - (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
> - RC:$src))))),
> - (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
> - (COPY_TO_REGCLASS RC:$src, VR128))>;
> -
> - // vector math op with insert via movss/movsd
> def : Pat<(VT (Move (VT VR128:$dst),
> - (Op (VT VR128:$dst), (VT VR128:$src)))),
> - (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, VT:$src)>;
> - }
> + (VT (scalar_to_vector
> + (Op (EltTy (extractelt (VT VR128:$dst),
> (iPTR 0))),
> + RC:$src))))),
> + (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
> + (COPY_TO_REGCLASS RC:$src, VR128))>;
> + }
> }
>
> defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32,
> UseSSE1>;
>
> Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-
> arith.ll?rev=336971&r1=336970&r2=336971&view=diff
> =======================================================================
> =======
> --- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Thu Jul 12
> 21:50:39 2018
> @@ -655,496 +655,992 @@ define <2 x double> @blend_div_sd(<2 x d
> ; from a packed fp instruction plus a vector insert.
>
> define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b)
> {
> -; SSE-LABEL: insert_test_add_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_add_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_add_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addps %xmm0, %xmm1
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_add_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addps %xmm0, %xmm1
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_add_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_add_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <4 x float> %a, %b
> %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b)
> {
> -; SSE-LABEL: insert_test_sub_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_sub_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_sub_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm0, %xmm2
> +; SSE2-NEXT: subps %xmm1, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_sub_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm0, %xmm2
> +; SSE41-NEXT: subps %xmm1, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_sub_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_sub_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <4 x float> %a, %b
> %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b)
> {
> -; SSE-LABEL: insert_test_mul_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_mul_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_mul_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulps %xmm0, %xmm1
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_mul_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulps %xmm0, %xmm1
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_mul_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_mul_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <4 x float> %a, %b
> %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b)
> {
> -; SSE-LABEL: insert_test_div_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_div_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_div_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm0, %xmm2
> +; SSE2-NEXT: divps %xmm1, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_div_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm0, %xmm2
> +; SSE41-NEXT: divps %xmm1, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_div_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_div_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <4 x float> %a, %b
> %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test_add_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_add_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_add_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addpd %xmm0, %xmm1
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_add_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addpd %xmm0, %xmm1
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_add_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_add_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <2 x double> %a, %b
> %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test_sub_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_sub_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_sub_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm0, %xmm2
> +; SSE2-NEXT: subpd %xmm1, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_sub_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm0, %xmm2
> +; SSE41-NEXT: subpd %xmm1, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_sub_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_sub_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <2 x double> %a, %b
> %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test_mul_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_mul_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_mul_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulpd %xmm0, %xmm1
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_mul_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulpd %xmm0, %xmm1
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_mul_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_mul_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <2 x double> %a, %b
> %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test_div_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test_div_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test_div_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm0, %xmm2
> +; SSE2-NEXT: divpd %xmm1, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test_div_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm0, %xmm2
> +; SSE41-NEXT: divpd %xmm1, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test_div_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test_div_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <2 x double> %a, %b
> %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test2_add_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_add_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_add_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addps %xmm1, %xmm0
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_add_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addps %xmm1, %xmm0
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_add_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_add_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <4 x float> %b, %a
> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test2_sub_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_sub_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_sub_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm1, %xmm2
> +; SSE2-NEXT: subps %xmm0, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_sub_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm1, %xmm2
> +; SSE41-NEXT: subps %xmm0, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_sub_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_sub_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <4 x float> %b, %a
> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test2_mul_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_mul_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_mul_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulps %xmm1, %xmm0
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_mul_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulps %xmm1, %xmm0
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_mul_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_mul_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <4 x float> %b, %a
> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test2_div_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_div_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_div_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm1, %xmm2
> +; SSE2-NEXT: divps %xmm0, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_div_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm1, %xmm2
> +; SSE41-NEXT: divps %xmm0, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_div_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_div_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <4 x float> %b, %a
> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0,
> i32 5, i32 6, i32 7>
> ret <4 x float> %2
> }
>
> define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test2_add_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_add_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_add_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addpd %xmm1, %xmm0
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_add_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addpd %xmm1, %xmm0
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_add_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_add_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <2 x double> %b, %a
> %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test2_sub_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_sub_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_sub_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm1, %xmm2
> +; SSE2-NEXT: subpd %xmm0, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_sub_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm1, %xmm2
> +; SSE41-NEXT: subpd %xmm0, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_sub_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_sub_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <2 x double> %b, %a
> %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test2_mul_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_mul_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_mul_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulpd %xmm1, %xmm0
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_mul_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulpd %xmm1, %xmm0
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_mul_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_mul_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <2 x double> %b, %a
> %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test2_div_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test2_div_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test2_div_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm1, %xmm2
> +; SSE2-NEXT: divpd %xmm0, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test2_div_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm1, %xmm2
> +; SSE41-NEXT: divpd %xmm0, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test2_div_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test2_div_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <2 x double> %b, %a
> %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32
> 0, i32 3>
> ret <2 x double> %2
> }
>
> define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test3_add_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_add_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_add_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addps %xmm0, %xmm1
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_add_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addps %xmm0, %xmm1
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_add_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_add_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <4 x float> %a, %b
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %a, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test3_sub_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_sub_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_sub_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm0, %xmm2
> +; SSE2-NEXT: subps %xmm1, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_sub_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm0, %xmm2
> +; SSE41-NEXT: subps %xmm1, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_sub_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_sub_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <4 x float> %a, %b
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %a, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test3_mul_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_mul_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_mul_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulps %xmm0, %xmm1
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_mul_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulps %xmm0, %xmm1
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_mul_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_mul_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <4 x float> %a, %b
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %a, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test3_div_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divss %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_div_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_div_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm0, %xmm2
> +; SSE2-NEXT: divps %xmm1, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_div_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm0, %xmm2
> +; SSE41-NEXT: divps %xmm1, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_div_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_div_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <4 x float> %a, %b
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %a, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test3_add_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_add_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_add_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addpd %xmm0, %xmm1
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_add_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addpd %xmm0, %xmm1
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_add_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_add_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <2 x double> %a, %b
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test3_sub_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_sub_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_sub_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm0, %xmm2
> +; SSE2-NEXT: subpd %xmm1, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_sub_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm0, %xmm2
> +; SSE41-NEXT: subpd %xmm1, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_sub_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_sub_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <2 x double> %a, %b
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test3_mul_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_mul_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_mul_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulpd %xmm0, %xmm1
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_mul_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulpd %xmm0, %xmm1
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_mul_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_mul_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <2 x double> %a, %b
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test3_div_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divsd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test3_div_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test3_div_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm0, %xmm2
> +; SSE2-NEXT: divpd %xmm1, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test3_div_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm0, %xmm2
> +; SSE41-NEXT: divpd %xmm1, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test3_div_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm1
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test3_div_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm1
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <2 x double> %a, %b
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test4_add_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_add_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_add_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addps %xmm1, %xmm0
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_add_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addps %xmm1, %xmm0
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_add_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_add_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <4 x float> %b, %a
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %b, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test4_sub_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_sub_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_sub_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm1, %xmm2
> +; SSE2-NEXT: subps %xmm0, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_sub_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm1, %xmm2
> +; SSE41-NEXT: subps %xmm0, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_sub_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_sub_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <4 x float> %b, %a
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %b, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test4_mul_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_mul_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_mul_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulps %xmm1, %xmm0
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_mul_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulps %xmm1, %xmm0
> +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_mul_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_mul_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <4 x float> %b, %a
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %b, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float>
> %b) {
> -; SSE-LABEL: insert_test4_div_ss:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divss %xmm0, %xmm1
> -; SSE-NEXT: movaps %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_div_ss:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_div_ss:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movaps %xmm1, %xmm2
> +; SSE2-NEXT: divps %xmm0, %xmm2
> +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
> +; SSE2-NEXT: movaps %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_div_ss:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movaps %xmm1, %xmm2
> +; SSE41-NEXT: divps %xmm0, %xmm2
> +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
> +; SSE41-NEXT: movaps %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_div_ss:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_div_ss:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <4 x float> %b, %a
> %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x
> float> %b, <4 x float> %1
> ret <4 x float> %2
> }
>
> define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test4_add_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: addsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_add_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_add_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: addpd %xmm1, %xmm0
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_add_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: addpd %xmm1, %xmm0
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_add_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_add_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fadd <2 x double> %b, %a
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test4_sub_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: subsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_sub_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_sub_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm1, %xmm2
> +; SSE2-NEXT: subpd %xmm0, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_sub_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm1, %xmm2
> +; SSE41-NEXT: subpd %xmm0, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_sub_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vsubpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_sub_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vsubpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fsub <2 x double> %b, %a
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test4_mul_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: mulsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_mul_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_mul_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: mulpd %xmm1, %xmm0
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_mul_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: mulpd %xmm1, %xmm0
> +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_mul_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vmulpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_mul_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fmul <2 x double> %b, %a
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x
> double> %1
> ret <2 x double> %2
> }
>
> define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double>
> %b) {
> -; SSE-LABEL: insert_test4_div_sd:
> -; SSE: # %bb.0:
> -; SSE-NEXT: divsd %xmm0, %xmm1
> -; SSE-NEXT: movapd %xmm1, %xmm0
> -; SSE-NEXT: ret{{[l|q]}}
> -;
> -; AVX-LABEL: insert_test4_div_sd:
> -; AVX: # %bb.0:
> -; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
> -; AVX-NEXT: ret{{[l|q]}}
> +; SSE2-LABEL: insert_test4_div_sd:
> +; SSE2: # %bb.0:
> +; SSE2-NEXT: movapd %xmm1, %xmm2
> +; SSE2-NEXT: divpd %xmm0, %xmm2
> +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
> +; SSE2-NEXT: movapd %xmm1, %xmm0
> +; SSE2-NEXT: ret{{[l|q]}}
> +;
> +; SSE41-LABEL: insert_test4_div_sd:
> +; SSE41: # %bb.0:
> +; SSE41-NEXT: movapd %xmm1, %xmm2
> +; SSE41-NEXT: divpd %xmm0, %xmm2
> +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
> +; SSE41-NEXT: movapd %xmm2, %xmm0
> +; SSE41-NEXT: ret{{[l|q]}}
> +;
> +; AVX1-LABEL: insert_test4_div_sd:
> +; AVX1: # %bb.0:
> +; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm0
> +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX1-NEXT: ret{{[l|q]}}
> +;
> +; AVX512-LABEL: insert_test4_div_sd:
> +; AVX512: # %bb.0:
> +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0
> +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
> +; AVX512-NEXT: ret{{[l|q]}}
> %1 = fdiv <2 x double> %b, %a
> %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x
> double> %1
> ret <2 x double> %2
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list