[llvm] r336971 - [X86] Remove isel patterns that turns packed add/sub/mul/div+movss/sd into scalar intrinsic instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 12 21:50:39 PDT 2018
Author: ctopper
Date: Thu Jul 12 21:50:39 2018
New Revision: 336971
URL: http://llvm.org/viewvc/llvm-project?rev=336971&view=rev
Log:
[X86] Remove isel patterns that turns packed add/sub/mul/div+movss/sd into scalar intrinsic instructions.
This is not an optimization we should be doing in isel. This is more suitable for a DAG combine.
My main concern is a future time when we support more FPENV. Changing a packed op to a scalar op could cause us to miss some exceptions that should have occured if we had done a packed op. A DAG combine would be better able to manage this.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=336971&r1=336970&r2=336971&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Jul 12 21:50:39 2018
@@ -11481,37 +11481,37 @@ multiclass AVX512_scalar_math_fp_pattern
X86VectorVTInfo _, PatLeaf ZeroFP> {
let Predicates = [HasAVX512] in {
// extracted scalar math op with insert via movss
- def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (_.VT (scalar_to_vector
- (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
- _.FRC:$src))))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
- (COPY_TO_REGCLASS _.FRC:$src, VR128X))>;
-
- // vector math op with insert via movss
- def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst),
- (Op (_.VT VR128X:$dst), (_.VT VR128X:$src)))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, _.VT:$src)>;
+ def : Pat<(MoveNode
+ (_.VT VR128X:$dst),
+ (_.VT (scalar_to_vector
+ (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
+ _.FRC:$src)))),
+ (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
+ (COPY_TO_REGCLASS _.FRC:$src, VR128X))>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
(X86selects VK1WM:$mask,
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (Op (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src2),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk) (COPY_TO_REGCLASS _.FRC:$src0, VR128X),
- VK1WM:$mask, _.VT:$src1,
- (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
+ (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
+ (COPY_TO_REGCLASS _.FRC:$src0, VR128X),
+ VK1WM:$mask, _.VT:$src1,
+ (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
(X86selects VK1WM:$mask,
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (Op (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src2), (_.EltVT ZeroFP)))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Intkz)
- VK1WM:$mask, _.VT:$src1,
- (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
+ (!cast<Instruction>("V"#OpcPrefix#Zrr_Intkz)
+ VK1WM:$mask, _.VT:$src1,
+ (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
}
}
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=336971&r1=336970&r2=336971&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jul 12 21:50:39 2018
@@ -2642,34 +2642,26 @@ let isCodeGenOnly = 1 in {
multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
ValueType VT, ValueType EltTy,
RegisterClass RC, Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
+ let Predicates = [BasePredicate] in {
// extracted scalar math op with insert via movss/movsd
- def : Pat<(VT (Move (VT VR128:$dst), (VT (scalar_to_vector
- (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
- RC:$src))))),
- (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
- (COPY_TO_REGCLASS RC:$src, VR128))>;
-
- // vector math op with insert via movss/movsd
def : Pat<(VT (Move (VT VR128:$dst),
- (Op (VT VR128:$dst), (VT VR128:$src)))),
- (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, VT:$src)>;
- }
+ (VT (scalar_to_vector
+ (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
+ RC:$src))))),
+ (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
+ (COPY_TO_REGCLASS RC:$src, VR128))>;
+ }
- // Repeat for AVX versions of the instructions.
- let Predicates = [UseAVX] in {
+ // Repeat for AVX versions of the instructions.
+ let Predicates = [UseAVX] in {
// extracted scalar math op with insert via movss/movsd
- def : Pat<(VT (Move (VT VR128:$dst), (VT (scalar_to_vector
- (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
- RC:$src))))),
- (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
- (COPY_TO_REGCLASS RC:$src, VR128))>;
-
- // vector math op with insert via movss/movsd
def : Pat<(VT (Move (VT VR128:$dst),
- (Op (VT VR128:$dst), (VT VR128:$src)))),
- (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, VT:$src)>;
- }
+ (VT (scalar_to_vector
+ (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
+ RC:$src))))),
+ (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
+ (COPY_TO_REGCLASS RC:$src, VR128))>;
+ }
}
defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll?rev=336971&r1=336970&r2=336971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Thu Jul 12 21:50:39 2018
@@ -655,496 +655,992 @@ define <2 x double> @blend_div_sd(<2 x d
; from a packed fp instruction plus a vector insert.
define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test_add_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: addss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_add_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_add_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addps %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_add_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addps %xmm0, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_add_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_add_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test_sub_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: subss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_sub_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_sub_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: subps %xmm1, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_sub_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm0, %xmm2
+; SSE41-NEXT: subps %xmm1, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_sub_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_sub_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test_mul_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: mulss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_mul_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_mul_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulps %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_mul_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulps %xmm0, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_mul_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_mul_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test_div_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: divss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_div_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_div_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: divps %xmm1, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_div_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm0, %xmm2
+; SSE41-NEXT: divps %xmm1, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_div_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_div_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test_add_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: addsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_add_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_add_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addpd %xmm0, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_add_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addpd %xmm0, %xmm1
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_add_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_add_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test_sub_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: subsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_sub_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_sub_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: subpd %xmm1, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_sub_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm0, %xmm2
+; SSE41-NEXT: subpd %xmm1, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_sub_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_sub_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test_mul_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: mulsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_mul_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_mul_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulpd %xmm0, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_mul_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulpd %xmm0, %xmm1
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_mul_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_mul_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test_div_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: divsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test_div_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test_div_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: divpd %xmm1, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test_div_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm0, %xmm2
+; SSE41-NEXT: divpd %xmm1, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test_div_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test_div_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %a, %b
%2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test2_add_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: addss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_add_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_add_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addps %xmm1, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_add_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_add_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_add_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test2_sub_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: subss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_sub_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_sub_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm1, %xmm2
+; SSE2-NEXT: subps %xmm0, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_sub_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm1, %xmm2
+; SSE41-NEXT: subps %xmm0, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_sub_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_sub_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test2_mul_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: mulss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_mul_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_mul_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulps %xmm1, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_mul_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_mul_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_mul_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test2_div_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: divss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_div_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_div_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm1, %xmm2
+; SSE2-NEXT: divps %xmm0, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_div_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm1, %xmm2
+; SSE41-NEXT: divps %xmm0, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_div_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_div_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %b, %a
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %2
}
define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test2_add_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: addsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_add_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_add_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addpd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_add_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addpd %xmm1, %xmm0
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_add_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_add_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test2_sub_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: subsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_sub_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_sub_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm1, %xmm2
+; SSE2-NEXT: subpd %xmm0, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_sub_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm1, %xmm2
+; SSE41-NEXT: subpd %xmm0, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_sub_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_sub_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test2_mul_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: mulsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_mul_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_mul_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulpd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_mul_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulpd %xmm1, %xmm0
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_mul_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_mul_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test2_div_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: divsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test2_div_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test2_div_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm1, %xmm2
+; SSE2-NEXT: divpd %xmm0, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test2_div_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm1, %xmm2
+; SSE41-NEXT: divpd %xmm0, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test2_div_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test2_div_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %b, %a
%2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %2
}
define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test3_add_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: addss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_add_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_add_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addps %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_add_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addps %xmm0, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_add_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_add_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
ret <4 x float> %2
}
define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test3_sub_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: subss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_sub_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_sub_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: subps %xmm1, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_sub_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm0, %xmm2
+; SSE41-NEXT: subps %xmm1, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_sub_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_sub_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
ret <4 x float> %2
}
define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test3_mul_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: mulss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_mul_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_mul_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulps %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_mul_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulps %xmm0, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_mul_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_mul_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
ret <4 x float> %2
}
define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test3_div_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: divss %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_div_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_div_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: divps %xmm1, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_div_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm0, %xmm2
+; SSE41-NEXT: divps %xmm1, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_div_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_div_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %a, %b
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
ret <4 x float> %2
}
define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test3_add_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: addsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_add_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_add_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addpd %xmm0, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_add_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addpd %xmm0, %xmm1
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_add_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_add_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
ret <2 x double> %2
}
define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test3_sub_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: subsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_sub_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_sub_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: subpd %xmm1, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_sub_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm0, %xmm2
+; SSE41-NEXT: subpd %xmm1, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_sub_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_sub_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
ret <2 x double> %2
}
define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test3_mul_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: mulsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_mul_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_mul_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulpd %xmm0, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_mul_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulpd %xmm0, %xmm1
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_mul_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_mul_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
ret <2 x double> %2
}
define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test3_div_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: divsd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test3_div_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test3_div_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: divpd %xmm1, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test3_div_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm0, %xmm2
+; SSE41-NEXT: divpd %xmm1, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test3_div_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test3_div_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %a, %b
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
ret <2 x double> %2
}
define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test4_add_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: addss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_add_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_add_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addps %xmm1, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_add_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_add_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_add_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
ret <4 x float> %2
}
define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test4_sub_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: subss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_sub_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_sub_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm1, %xmm2
+; SSE2-NEXT: subps %xmm0, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_sub_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm1, %xmm2
+; SSE41-NEXT: subps %xmm0, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_sub_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_sub_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
ret <4 x float> %2
}
define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test4_mul_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: mulss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_mul_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_mul_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulps %xmm1, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_mul_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_mul_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_mul_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
ret <4 x float> %2
}
define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: insert_test4_div_ss:
-; SSE: # %bb.0:
-; SSE-NEXT: divss %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_div_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_div_ss:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movaps %xmm1, %xmm2
+; SSE2-NEXT: divps %xmm0, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_div_ss:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movaps %xmm1, %xmm2
+; SSE41-NEXT: divps %xmm0, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3]
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_div_ss:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_div_ss:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <4 x float> %b, %a
%2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
ret <4 x float> %2
}
define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test4_add_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: addsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_add_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_add_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: addpd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_add_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: addpd %xmm1, %xmm0
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_add_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_add_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fadd <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2
}
define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test4_sub_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: subsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_sub_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_sub_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm1, %xmm2
+; SSE2-NEXT: subpd %xmm0, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_sub_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm1, %xmm2
+; SSE41-NEXT: subpd %xmm0, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_sub_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_sub_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fsub <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2
}
define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test4_mul_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: mulsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_mul_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_mul_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulpd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_mul_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: mulpd %xmm1, %xmm0
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_mul_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmulpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_mul_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fmul <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2
}
define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: insert_test4_div_sd:
-; SSE: # %bb.0:
-; SSE-NEXT: divsd %xmm0, %xmm1
-; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
-;
-; AVX-LABEL: insert_test4_div_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: insert_test4_div_sd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movapd %xmm1, %xmm2
+; SSE2-NEXT: divpd %xmm0, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE41-LABEL: insert_test4_div_sd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movapd %xmm1, %xmm2
+; SSE41-NEXT: divpd %xmm0, %xmm2
+; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX1-LABEL: insert_test4_div_sd:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: ret{{[l|q]}}
+;
+; AVX512-LABEL: insert_test4_div_sd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX512-NEXT: ret{{[l|q]}}
%1 = fdiv <2 x double> %b, %a
%2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
ret <2 x double> %2
More information about the llvm-commits
mailing list