[llvm] r362397 - [SelectionDAG] Add [us]itofp(undef) --> 0 constant fold (PR39205)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 06:02:07 PDT 2019
Author: rksimon
Date: Mon Jun 3 06:02:07 2019
New Revision: 362397
URL: http://llvm.org/viewvc/llvm-project?rev=362397&view=rev
Log:
[SelectionDAG] Add [us]itofp(undef) --> 0 constant fold (PR39205)
We were missing this fold in the DAG, which I've copied directly from llvm::ConstantFoldCastInstruction
Differential Revision: https://reviews.llvm.org/D62807
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=362397&r1=362396&r2=362397&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Jun 3 06:02:07 2019
@@ -12440,6 +12440,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDN
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -12497,6 +12501,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDN
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=362397&r1=362396&r2=362397&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Jun 3 06:02:07 2019
@@ -4440,6 +4440,12 @@ SDValue SelectionDAG::getNode(unsigned O
if (Operand.isUndef())
return getUNDEF(VT);
break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (Operand.isUndef())
+ return getConstantFP(0.0, DL, VT);
+ break;
case ISD::SIGN_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll?rev=362397&r1=362396&r2=362397&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll Mon Jun 3 06:02:07 2019
@@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64>
}
define <2 x float> @sltof2f32(<2 x i64> %a) {
-; NODQ-LABEL: sltof2f32:
-; NODQ: # %bb.0:
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; NODQ-NEXT: retq
+; NOVLDQ-LABEL: sltof2f32:
+; NOVLDQ: # %bb.0:
+; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; NOVLDQ-NEXT: vmovq %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: sltof2f32:
+; VLNODQ: # %bb.0:
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; VLNODQ-NEXT: retq
+;
; DQNOVL-LABEL: sltof2f32:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=362397&r1=362396&r2=362397&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Mon Jun 3 06:02:07 2019
@@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64>
}
define <2 x float> @sltof2f32(<2 x i64> %a) {
-; NODQ-LABEL: sltof2f32:
-; NODQ: # %bb.0:
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; NODQ-NEXT: retq
+; NOVLDQ-LABEL: sltof2f32:
+; NOVLDQ: # %bb.0:
+; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; NOVLDQ-NEXT: vmovq %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: sltof2f32:
+; VLNODQ: # %bb.0:
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; VLNODQ-NEXT: retq
+;
; DQNOVL-LABEL: sltof2f32:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll?rev=362397&r1=362396&r2=362397&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp-widen.ll Mon Jun 3 06:02:07 2019
@@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
@@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
@@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
@@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(
; VEX-NEXT: js .LBB39_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB39_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB39_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB39_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB39_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB39_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i64_to_4f32:
@@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
@@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
@@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(
define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_1
; SSE2-NEXT: # %bb.2:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_3
; SSE2-NEXT: .LBB41_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_3:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movq %xmm1, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB41_6:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: js .LBB41_8
-; SSE2-NEXT: # %bb.7:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: .LBB41_8:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; SSE41-NEXT: # %bb.5:
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: jmp .LBB41_6
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; SSE41-NEXT: retq
; SSE41-NEXT: .LBB41_4:
; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shrq %rcx
@@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: addss %xmm0, %xmm0
-; SSE41-NEXT: .LBB41_6:
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: testq %rax, %rax
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: js .LBB41_8
-; SSE41-NEXT: # %bb.7:
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: .LBB41_8:
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; VEX-NEXT: js .LBB41_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB41_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB41_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB41_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB41_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB41_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=362397&r1=362396&r2=362397&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Mon Jun 3 06:02:07 2019
@@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
@@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
@@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
@@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(
; VEX-NEXT: js .LBB39_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB39_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB39_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB39_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB39_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB39_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i64_to_4f32:
@@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
@@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
@@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(
define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_1
; SSE2-NEXT: # %bb.2:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_3
; SSE2-NEXT: .LBB41_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_3:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movq %xmm1, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB41_6:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: js .LBB41_8
-; SSE2-NEXT: # %bb.7:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: .LBB41_8:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; SSE41-NEXT: # %bb.5:
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: jmp .LBB41_6
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; SSE41-NEXT: retq
; SSE41-NEXT: .LBB41_4:
; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shrq %rcx
@@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: addss %xmm0, %xmm0
-; SSE41-NEXT: .LBB41_6:
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: testq %rax, %rax
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: js .LBB41_8
-; SSE41-NEXT: # %bb.7:
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: .LBB41_8:
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; VEX-NEXT: js .LBB41_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB41_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB41_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
@@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB41_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB41_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB41_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
More information about the llvm-commits
mailing list