[llvm] r347105 - [X86] Add custom promotion of narrow fp_to_uint/fp_to_sint operations under -x86-experimental-vector-widening-legalization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 16 14:53:00 PST 2018
Author: ctopper
Date: Fri Nov 16 14:53:00 2018
New Revision: 347105
URL: http://llvm.org/viewvc/llvm-project?rev=347105&view=rev
Log:
[X86] Add custom promotion of narrow fp_to_uint/fp_to_sint operations under -x86-experimental-vector-widening-legalization.
This tries to force the result type to vXi32 followed by a truncate. This can help avoid scalarization that would otherwise occur.
There's some annoying examples of an avx512 truncate instruction followed by a packus where we should really be able to just use one truncate. But overall this is still a net improvement.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
llvm/trunk/test/CodeGen/X86/vec_cast2.ll
llvm/trunk/test/CodeGen/X86/vec_cast3.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347105&r1=347104&r2=347105&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Nov 16 14:53:00 2018
@@ -899,10 +899,18 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
- // Custom legalize these to avoid over promotion.
+
+ // Custom legalize these to avoid over promotion or custom promotion.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -26287,7 +26295,7 @@ void X86TargetLowering::ReplaceNodeResul
// Promote these manually to avoid over promotion to v2i64. Type
// legalization will revisit the v2i32 operation for more cleanup.
if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
- getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
+ getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
// AVX512DQ provides instructions that produce a v2i64 result.
if (Subtarget.hasDQI())
return;
@@ -26302,6 +26310,43 @@ void X86TargetLowering::ReplaceNodeResul
return;
}
+ if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
+
+ // Try to create a 128 bit vector, but don't exceed a 32 bit element.
+ unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
+ MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
+ VT.getVectorNumElements());
+ unsigned Opc = N->getOpcode();
+ if (PromoteVT == MVT::v2i32 || PromoteVT == MVT::v4i32)
+ Opc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(Opc, dl, PromoteVT, Src);
+
+ // Preserve what we know about the size of the original result. Except
+ // when the result is v2i32 since we can't widen the assert.
+ if (PromoteVT != MVT::v2i32)
+ Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, PromoteVT, Res,
+ DAG.getValueType(VT.getVectorElementType()));
+
+ // Truncate back to the original width.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+
+ // Now widen to 128 bits.
+ unsigned NumConcats = 128 / VT.getSizeInBits();
+ MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
+ VT.getVectorNumElements() * NumConcats);
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+ Results.push_back(Res);
+ return;
+ }
+
+
if (VT == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll?rev=347105&r1=347104&r2=347105&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll Fri Nov 16 14:53:00 2018
@@ -502,33 +502,21 @@ define <8 x i16> @f64to8us(<8 x double>
}
define <8 x i8> @f64to8uc(<8 x double> %f) {
-; ALL-LABEL: f64to8uc:
-; ALL: # %bb.0:
-; ALL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; ALL-NEXT: vcvttsd2si %xmm1, %eax
-; ALL-NEXT: vcvttsd2si %xmm0, %ecx
-; ALL-NEXT: vmovd %ecx, %xmm1
-; ALL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; ALL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; ALL-NEXT: vcvttsd2si %xmm0, %eax
-; ALL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; ALL-NEXT: vcvttsd2si %xmm0, %eax
-; ALL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
-; ALL-NEXT: vzeroupper
-; ALL-NEXT: retq
+; NOVL-LABEL: f64to8uc:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; NOVL-NEXT: vpmovdw %zmm0, %ymm0
+; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; NOVL-NEXT: vzeroupper
+; NOVL-NEXT: retq
+;
+; VL-LABEL: f64to8uc:
+; VL: # %bb.0:
+; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; VL-NEXT: vpmovdw %ymm0, %xmm0
+; VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; VL-NEXT: vzeroupper
+; VL-NEXT: retq
%res = fptoui <8 x double> %f to <8 x i8>
ret <8 x i8> %res
}
Modified: llvm/trunk/test/CodeGen/X86/vec_cast2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast2.ll?rev=347105&r1=347104&r2=347105&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast2.ll Fri Nov 16 14:53:00 2018
@@ -172,29 +172,10 @@ define <8 x i8> @cvt_v8f32_v8i8(<8 x flo
;
; CHECK-WIDE-LABEL: cvt_v8f32_v8i8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: vzeroupper
; CHECK-WIDE-NEXT: retl
%res = fptosi <8 x float> %src to <8 x i8>
@@ -229,17 +210,8 @@ define <4 x i8> @cvt_v4f32_v4i8(<4 x flo
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4i8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptosi <4 x float> %src to <4 x i8>
ret <4 x i8> %res
@@ -253,11 +225,8 @@ define <4 x i16> @cvt_v4f32_v4i16(<4 x f
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4i16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: retl
%res = fptosi <4 x float> %src to <4 x i16>
ret <4 x i16> %res
@@ -274,29 +243,10 @@ define <8 x i8> @cvt_v8f32_v8u8(<8 x flo
;
; CHECK-WIDE-LABEL: cvt_v8f32_v8u8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: vzeroupper
; CHECK-WIDE-NEXT: retl
%res = fptoui <8 x float> %src to <8 x i8>
@@ -331,17 +281,8 @@ define <4 x i8> @cvt_v4f32_v4u8(<4 x flo
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4u8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptoui <4 x float> %src to <4 x i8>
ret <4 x i8> %res
@@ -355,11 +296,8 @@ define <4 x i16> @cvt_v4f32_v4u16(<4 x f
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4u16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: retl
%res = fptoui <4 x float> %src to <4 x i16>
ret <4 x i16> %res
Modified: llvm/trunk/test/CodeGen/X86/vec_cast3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast3.ll?rev=347105&r1=347104&r2=347105&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast3.ll Fri Nov 16 14:53:00 2018
@@ -117,11 +117,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x flo
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i8>
ret <2 x i8> %res
@@ -136,11 +133,8 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x f
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-WIDE-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i16>
ret <2 x i16> %res
@@ -170,11 +164,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x flo
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i8>
ret <2 x i8> %res
@@ -189,11 +180,8 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x f
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-WIDE-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i16>
ret <2 x i16> %res
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll?rev=347105&r1=347104&r2=347105&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int-widen.ll Fri Nov 16 14:53:00 2018
@@ -2310,31 +2310,17 @@ define <4 x i32> @fptosi_2f128_to_4i32(<
define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: fptosi_2f32_to_2i8:
-; VEX: # %bb.0:
-; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; VEX-NEXT: vcvttss2si %xmm1, %eax
-; VEX-NEXT: vcvttss2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: fptosi_2f32_to_2i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: fptosi_2f32_to_2i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i8>
ret <2 x i8> %cvt
}
@@ -2342,64 +2328,15 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x
define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX1-LABEL: fptosi_2f32_to_2i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: fptosi_2f32_to_2i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: fptosi_2f32_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptosi_2f32_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptosi_2f32_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
}
@@ -2407,31 +2344,17 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2
define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: fptoui_2f32_to_2i8:
-; VEX: # %bb.0:
-; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; VEX-NEXT: vcvttss2si %xmm1, %eax
-; VEX-NEXT: vcvttss2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: fptoui_2f32_to_2i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: fptoui_2f32_to_2i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i8>
ret <2 x i8> %cvt
}
@@ -2439,64 +2362,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x
define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX1-LABEL: fptoui_2f32_to_2i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: fptoui_2f32_to_2i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: fptoui_2f32_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptoui_2f32_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptoui_2f32_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
}
@@ -2504,22 +2378,16 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2
define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f64_to_2i8:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vcvttsd2si %xmm1, %eax
-; AVX-NEXT: vcvttsd2si %xmm0, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i8>
ret <2 x i8> %cvt
@@ -2528,55 +2396,15 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x
define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; VEX-LABEL: fptosi_2f64_to_2i16:
-; VEX: # %bb.0:
-; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; VEX-NEXT: vcvttsd2si %xmm1, %eax
-; VEX-NEXT: vcvttsd2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512F-LABEL: fptosi_2f64_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptosi_2f64_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptosi_2f64_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
}
@@ -2584,22 +2412,16 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2
define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptoui_2f64_to_2i8:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vcvttsd2si %xmm1, %eax
-; AVX-NEXT: vcvttsd2si %xmm0, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i8>
ret <2 x i8> %cvt
@@ -2608,55 +2430,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x
define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; VEX-LABEL: fptoui_2f64_to_2i16:
-; VEX: # %bb.0:
-; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; VEX-NEXT: vcvttsd2si %xmm1, %eax
-; VEX-NEXT: vcvttsd2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512F-LABEL: fptoui_2f64_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptoui_2f64_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptoui_2f64_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
}
More information about the llvm-commits
mailing list