[llvm] r337419 - [X86][SSE] Canonicalize scalar fp arithmetic shuffle patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 18 12:55:19 PDT 2018
Author: rksimon
Date: Wed Jul 18 12:55:19 2018
New Revision: 337419
URL: http://llvm.org/viewvc/llvm-project?rev=337419&view=rev
Log:
[X86][SSE] Canonicalize scalar fp arithmetic shuffle patterns
As discussed on PR38197, this canonicalizes MOVS*(N0, OP(N0, N1)) --> MOVS*(N0, SCALAR_TO_VECTOR(OP(N0[0], N1[0])))
This returns the scalar-fp codegen lost by rL336971.
Additionally it handles the OP(N1, N0)) case for commutable (FADD/FMUL) ops.
Differential Revision: https://reviews.llvm.org/D49474
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=337419&r1=337418&r2=337419&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 18 12:55:19 2018
@@ -30698,8 +30698,37 @@ static SDValue combineTargetShuffle(SDVa
}
case X86ISD::MOVSD:
case X86ISD::MOVSS: {
- SDValue V0 = peekThroughBitcasts(N->getOperand(0));
- SDValue V1 = peekThroughBitcasts(N->getOperand(1));
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+
+ // Canonicalize scalar FPOps:
+ // MOVS*(N0, OP(N0, N1)) --> MOVS*(N0, SCALAR_TO_VECTOR(OP(N0[0], N1[0])))
+ // If commutable, allow OP(N1[0], N0[0]).
+ unsigned Opcode1 = N1.getOpcode();
+ if (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL || Opcode1 == ISD::FSUB ||
+ Opcode1 == ISD::FDIV) {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ if (N10 == N0 ||
+ (N11 == N0 && (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL))) {
+ if (N10 != N0)
+ std::swap(N10, N11);
+ MVT SVT = VT.getVectorElementType();
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
+ N10 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N10, ZeroIdx);
+ N11 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N11, ZeroIdx);
+ SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11);
+ SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
+ DCI.AddToWorklist(N10.getNode());
+ DCI.AddToWorklist(N11.getNode());
+ DCI.AddToWorklist(Scl.getNode());
+ DCI.AddToWorklist(SclVec.getNode());
+ return DAG.getNode(Opcode, DL, VT, N0, SclVec);
+ }
+ }
+
+ SDValue V0 = peekThroughBitcasts(N0);
+ SDValue V1 = peekThroughBitcasts(N1);
bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode());
bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode());
if (isZero0 && isZero1)
Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll?rev=337419&r1=337418&r2=337419&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Wed Jul 18 12:55:19 2018
@@ -655,22 +655,14 @@ define <2 x double> @blend_div_sd(<2 x d
; from a packed fp instruction plus a vector insert.
define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test_add_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addps %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_add_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addps %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_add_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_add_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -678,25 +670,14 @@ define <4 x float> @insert_test_add_ss(<
}
define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test_sub_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: subps %xmm1, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_sub_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: subps %xmm1, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_sub_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_sub_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -704,22 +685,14 @@ define <4 x float> @insert_test_sub_ss(<
}
define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test_mul_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulps %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_mul_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulps %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_mul_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: mulss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_mul_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -727,25 +700,14 @@ define <4 x float> @insert_test_mul_ss(<
}
define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test_div_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: divps %xmm1, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_div_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: divps %xmm1, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_div_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: divss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_div_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -753,22 +715,14 @@ define <4 x float> @insert_test_div_ss(<
}
define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test_add_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addpd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_add_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addpd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_add_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: addsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_add_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
@@ -776,25 +730,14 @@ define <2 x double> @insert_test_add_sd(
}
define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test_sub_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: subpd %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_sub_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm0, %xmm2
-; SSE41-NEXT: subpd %xmm1, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_sub_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: subsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_sub_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
@@ -802,22 +745,14 @@ define <2 x double> @insert_test_sub_sd(
}
define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test_mul_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulpd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_mul_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulpd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_mul_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: mulsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_mul_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
@@ -825,25 +760,14 @@ define <2 x double> @insert_test_mul_sd(
}
define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test_div_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: divpd %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test_div_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm0, %xmm2
-; SSE41-NEXT: divpd %xmm1, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test_div_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: divsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test_div_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
@@ -851,23 +775,15 @@ define <2 x double> @insert_test_div_sd(
}
define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test2_add_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addps %xmm1, %xmm0
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_add_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addps %xmm1, %xmm0
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_add_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_add_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -875,26 +791,15 @@ define <4 x float> @insert_test2_add_ss(
}
define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test2_sub_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: subps %xmm0, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_sub_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: subps %xmm0, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_sub_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: subss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_sub_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -902,23 +807,15 @@ define <4 x float> @insert_test2_sub_ss(
}
define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test2_mul_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulps %xmm1, %xmm0
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_mul_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulps %xmm1, %xmm0
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_mul_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: mulss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_mul_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -926,26 +823,15 @@ define <4 x float> @insert_test2_mul_ss(
}
define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test2_div_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: divps %xmm0, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_div_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: divps %xmm0, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_div_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: divss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_div_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -953,23 +839,15 @@ define <4 x float> @insert_test2_div_ss(
}
define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test2_add_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addpd %xmm1, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_add_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addpd %xmm1, %xmm0
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_add_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: addsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_add_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
@@ -977,26 +855,15 @@ define <2 x double> @insert_test2_add_sd
}
define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test2_sub_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm1, %xmm2
-; SSE2-NEXT: subpd %xmm0, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_sub_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm1, %xmm2
-; SSE41-NEXT: subpd %xmm0, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_sub_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: subsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_sub_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
@@ -1004,23 +871,15 @@ define <2 x double> @insert_test2_sub_sd
}
define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test2_mul_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulpd %xmm1, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_mul_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulpd %xmm1, %xmm0
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_mul_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: mulsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_mul_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vmulpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
@@ -1028,26 +887,15 @@ define <2 x double> @insert_test2_mul_sd
}
define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test2_div_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm1, %xmm2
-; SSE2-NEXT: divpd %xmm0, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test2_div_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm1, %xmm2
-; SSE41-NEXT: divpd %xmm0, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test2_div_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: divsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test2_div_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
@@ -1055,22 +903,14 @@ define <2 x double> @insert_test2_div_sd
}
define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test3_add_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addps %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_add_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addps %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_add_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_add_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
@@ -1078,25 +918,14 @@ define <4 x float> @insert_test3_add_ss(
}
define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test3_sub_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: subps %xmm1, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_sub_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: subps %xmm1, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_sub_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_sub_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
@@ -1104,22 +933,14 @@ define <4 x float> @insert_test3_sub_ss(
}
define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test3_mul_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulps %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_mul_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulps %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_mul_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: mulss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_mul_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
@@ -1127,25 +948,14 @@ define <4 x float> @insert_test3_mul_ss(
}
define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test3_div_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: divps %xmm1, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_div_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: divps %xmm1, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_div_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: divss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_div_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
@@ -1153,22 +963,14 @@ define <4 x float> @insert_test3_div_ss(
}
define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test3_add_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addpd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_add_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addpd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_add_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: addsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_add_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
@@ -1176,25 +978,14 @@ define <2 x double> @insert_test3_add_sd
}
define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test3_sub_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: subpd %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_sub_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm0, %xmm2
-; SSE41-NEXT: subpd %xmm1, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_sub_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: subsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_sub_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
@@ -1202,22 +993,14 @@ define <2 x double> @insert_test3_sub_sd
}
define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test3_mul_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulpd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_mul_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulpd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_mul_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: mulsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_mul_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
@@ -1225,25 +1008,14 @@ define <2 x double> @insert_test3_mul_sd
}
define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test3_div_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: divpd %xmm1, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test3_div_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm0, %xmm2
-; SSE41-NEXT: divpd %xmm1, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test3_div_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: divsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test3_div_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
@@ -1251,23 +1023,15 @@ define <2 x double> @insert_test3_div_sd
}
define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test4_add_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addps %xmm1, %xmm0
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_add_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addps %xmm1, %xmm0
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_add_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_add_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
@@ -1275,26 +1039,15 @@ define <4 x float> @insert_test4_add_ss(
}
define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test4_sub_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: subps %xmm0, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_sub_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: subps %xmm0, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_sub_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: subss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_sub_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
@@ -1302,23 +1055,15 @@ define <4 x float> @insert_test4_sub_ss(
}
define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test4_mul_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulps %xmm1, %xmm0
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_mul_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulps %xmm1, %xmm0
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_mul_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: mulss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_mul_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
@@ -1326,26 +1071,15 @@ define <4 x float> @insert_test4_mul_ss(
}
define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test4_div_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: divps %xmm0, %xmm2
-; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_div_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: divps %xmm0, %xmm2
-; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
-; SSE41-NEXT: movaps %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_div_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: divss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_div_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
@@ -1353,23 +1087,15 @@ define <4 x float> @insert_test4_div_ss(
}
define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test4_add_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addpd %xmm1, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_add_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addpd %xmm1, %xmm0
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_add_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: addsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_add_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
@@ -1377,26 +1103,15 @@ define <2 x double> @insert_test4_add_sd
}
define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test4_sub_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm1, %xmm2
-; SSE2-NEXT: subpd %xmm0, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_sub_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm1, %xmm2
-; SSE41-NEXT: subpd %xmm0, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_sub_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: subsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_sub_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
@@ -1404,23 +1119,15 @@ define <2 x double> @insert_test4_sub_sd
}
define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test4_mul_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulpd %xmm1, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_mul_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulpd %xmm1, %xmm0
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_mul_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: mulsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_mul_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vmulpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
@@ -1428,26 +1135,15 @@ define <2 x double> @insert_test4_mul_sd
}
define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test4_div_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm1, %xmm2
-; SSE2-NEXT: divpd %xmm0, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test4_div_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movapd %xmm1, %xmm2
-; SSE41-NEXT: divpd %xmm0, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; SSE41-NEXT: movapd %xmm2, %xmm0
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test4_div_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: divsd %xmm0, %xmm1
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test4_div_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
@@ -1455,22 +1151,14 @@ define <2 x double> @insert_test4_div_sd
}
define <4 x float> @insert_test5_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test5_add_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addps %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test5_add_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addps %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test5_add_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test5_add_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1501,22 +1189,14 @@ define <4 x float> @insert_test5_sub_ss(
}
define <4 x float> @insert_test5_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: insert_test5_mul_ss:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulps %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test5_mul_ss:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulps %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test5_mul_ss:
+; SSE: # %bb.0:
+; SSE-NEXT: mulss %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test5_mul_ss:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1547,22 +1227,14 @@ define <4 x float> @insert_test5_div_ss(
}
define <2 x double> @insert_test5_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test5_add_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: addpd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test5_add_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: addpd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test5_add_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: addsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test5_add_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm0, %xmm1, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
@@ -1593,22 +1265,14 @@ define <2 x double> @insert_test5_sub_sd
}
define <2 x double> @insert_test5_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: insert_test5_mul_sd:
-; SSE2: # %bb.0:
-; SSE2-NEXT: mulpd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE2-NEXT: ret{{[l|q]}}
-;
-; SSE41-LABEL: insert_test5_mul_sd:
-; SSE41: # %bb.0:
-; SSE41-NEXT: mulpd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SSE41-NEXT: ret{{[l|q]}}
+; SSE-LABEL: insert_test5_mul_sd:
+; SSE: # %bb.0:
+; SSE-NEXT: mulsd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: insert_test5_mul_sd:
; AVX: # %bb.0:
-; AVX-NEXT: vmulpd %xmm0, %xmm1, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
More information about the llvm-commits
mailing list