[llvm] r346259 - [X86] Add custom promotion of v2i8/v2i16 fp_to_sint to avoid over promotion to v2i64 which would force scalarization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 6 11:24:22 PST 2018
Author: ctopper
Date: Tue Nov 6 11:24:21 2018
New Revision: 346259
URL: http://llvm.org/viewvc/llvm-project?rev=346259&view=rev
Log:
[X86] Add custom promotion of v2i8/v2i16 fp_to_sint to avoid over promotion to v2i64 which would force scalarization.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_cast3.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346259&r1=346258&r2=346259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 6 11:24:21 2018
@@ -882,6 +882,11 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
+ // Custom legalize these to avoid over promotion.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -26025,6 +26030,24 @@ void X86TargetLowering::ReplaceNodeResul
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
+ // Promote these manually to avoid over promotion to v2i64. Type
+ // legalization will revisit the v2i32 operation for more cleanup.
+ if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
+ getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
+ // AVX512DQ provides instructions that produce a v2i64 result.
+ if (Subtarget.hasDQI())
+ return;
+
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
+ Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, MVT::v2i32, Res,
+ DAG.getValueType(VT.getVectorElementType()));
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ Results.push_back(Res);
+ return;
+ }
+
if (VT == MVT::v2i32) {
assert((IsSigned || Subtarget.hasAVX512()) &&
"Can only handle signed conversion without AVX512");
@@ -26051,7 +26074,7 @@ void X86TargetLowering::ReplaceNodeResul
return;
}
if (SrcVT == MVT::v2f32 &&
- getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) {
+ getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
Modified: llvm/trunk/test/CodeGen/X86/vec_cast3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast3.ll?rev=346259&r1=346258&r2=346259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast3.ll Tue Nov 6 11:24:21 2018
@@ -111,19 +111,8 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x
define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i8:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subl $68, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 72
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll (%esp)
-; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT: addl $68, %esp
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
@@ -141,19 +130,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x flo
define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subl $68, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 72
-; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll (%esp)
-; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT: addl $68, %esp
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
@@ -186,37 +164,8 @@ define <2 x i32> @cvt_v2f32_v2i32(<2 x f
define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u8:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subl $68, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 72
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll (%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: vucomiss %xmm2, %xmm1
-; CHECK-NEXT: setae %al
-; CHECK-NEXT: shll $31, %eax
-; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: vucomiss %xmm2, %xmm0
-; CHECK-NEXT: setae %cl
-; CHECK-NEXT: shll $31, %ecx
-; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; CHECK-NEXT: addl $68, %esp
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
@@ -234,37 +183,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x flo
define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subl $68, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 72
-; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3
-; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll (%esp)
-; CHECK-NEXT: flds {{[0-9]+}}(%esp)
-; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: vucomiss %xmm2, %xmm1
-; CHECK-NEXT: setae %al
-; CHECK-NEXT: shll $31, %eax
-; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: vucomiss %xmm2, %xmm0
-; CHECK-NEXT: setae %cl
-; CHECK-NEXT: shll $31, %ecx
-; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; CHECK-NEXT: addl $68, %esp
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=346259&r1=346258&r2=346259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Tue Nov 6 11:24:21 2018
@@ -2866,3 +2866,445 @@ define <4 x i32> @fptosi_2f128_to_4i32(<
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %ext
}
+
+define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
+; SSE-LABEL: fptosi_2f32_to_2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptosi_2f32_to_2i8:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptosi_2f32_to_2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptosi_2f32_to_2i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN-LABEL: fptosi_2f32_to_2i8:
+; WIDEN: # %bb.0:
+; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0
+; WIDEN-NEXT: vpmovdb %zmm0, %xmm0
+; WIDEN-NEXT: vzeroupper
+; WIDEN-NEXT: retq
+ %cvt = fptosi <2 x float> %a to <2 x i8>
+ ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
+; SSE-LABEL: fptosi_2f32_to_2i16:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptosi_2f32_to_2i16:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptosi_2f32_to_2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptosi_2f32_to_2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN_SKX-LABEL: fptosi_2f32_to_2i16:
+; WIDEN_SKX: # %bb.0:
+; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0
+; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT: vzeroupper
+; WIDEN_SKX-NEXT: retq
+;
+; WIDEN_KNL-LABEL: fptosi_2f32_to_2i16:
+; WIDEN_KNL: # %bb.0:
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0
+; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT: vzeroupper
+; WIDEN_KNL-NEXT: retq
+ %cvt = fptosi <2 x float> %a to <2 x i16>
+ ret <2 x i16> %cvt
+}
+
+define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
+; SSE-LABEL: fptoui_2f32_to_2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptoui_2f32_to_2i8:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptoui_2f32_to_2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptoui_2f32_to_2i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN-LABEL: fptoui_2f32_to_2i8:
+; WIDEN: # %bb.0:
+; WIDEN-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN-NEXT: vcvttps2dq %zmm0, %zmm0
+; WIDEN-NEXT: vpmovdb %zmm0, %xmm0
+; WIDEN-NEXT: vzeroupper
+; WIDEN-NEXT: retq
+ %cvt = fptoui <2 x float> %a to <2 x i8>
+ ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
+; SSE-LABEL: fptoui_2f32_to_2i16:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptoui_2f32_to_2i16:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptoui_2f32_to_2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptoui_2f32_to_2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN_SKX-LABEL: fptoui_2f32_to_2i16:
+; WIDEN_SKX: # %bb.0:
+; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_SKX-NEXT: vcvttps2dq %ymm0, %ymm0
+; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT: vzeroupper
+; WIDEN_SKX-NEXT: retq
+;
+; WIDEN_KNL-LABEL: fptoui_2f32_to_2i16:
+; WIDEN_KNL: # %bb.0:
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_KNL-NEXT: vcvttps2dq %ymm0, %ymm0
+; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT: vzeroupper
+; WIDEN_KNL-NEXT: retq
+ %cvt = fptoui <2 x float> %a to <2 x i16>
+ ret <2 x i16> %cvt
+}
+
+define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptosi_2f64_to_2i8:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptosi_2f64_to_2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptosi_2f64_to_2i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN-LABEL: fptosi_2f64_to_2i8:
+; WIDEN: # %bb.0:
+; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; WIDEN-NEXT: vcvttsd2si %xmm1, %eax
+; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx
+; WIDEN-NEXT: vmovd %ecx, %xmm0
+; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; WIDEN-NEXT: retq
+ %cvt = fptosi <2 x double> %a to <2 x i8>
+ ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i16:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptosi_2f64_to_2i16:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptosi_2f64_to_2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptosi_2f64_to_2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN_SKX-LABEL: fptosi_2f64_to_2i16:
+; WIDEN_SKX: # %bb.0:
+; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0
+; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT: vzeroupper
+; WIDEN_SKX-NEXT: retq
+;
+; WIDEN_KNL-LABEL: fptosi_2f64_to_2i16:
+; WIDEN_KNL: # %bb.0:
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT: vzeroupper
+; WIDEN_KNL-NEXT: retq
+ %cvt = fptosi <2 x double> %a to <2 x i16>
+ ret <2 x i16> %cvt
+}
+
+define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i8:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: xorpd %xmm1, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptoui_2f64_to_2i8:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptoui_2f64_to_2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptoui_2f64_to_2i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN-LABEL: fptoui_2f64_to_2i8:
+; WIDEN: # %bb.0:
+; WIDEN-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; WIDEN-NEXT: vcvttsd2si %xmm1, %eax
+; WIDEN-NEXT: vcvttsd2si %xmm0, %ecx
+; WIDEN-NEXT: vmovd %ecx, %xmm0
+; WIDEN-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; WIDEN-NEXT: retq
+ %cvt = fptoui <2 x double> %a to <2 x i8>
+ ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i16:
+; SSE: # %bb.0:
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: xorpd %xmm1, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; VEX-LABEL: fptoui_2f64_to_2i16:
+; VEX: # %bb.0:
+; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptoui_2f64_to_2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptoui_2f64_to_2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
+;
+; WIDEN_SKX-LABEL: fptoui_2f64_to_2i16:
+; WIDEN_SKX: # %bb.0:
+; WIDEN_SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_SKX-NEXT: vcvttpd2dq %zmm0, %ymm0
+; WIDEN_SKX-NEXT: vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT: vzeroupper
+; WIDEN_SKX-NEXT: retq
+;
+; WIDEN_KNL-LABEL: fptoui_2f64_to_2i16:
+; WIDEN_KNL: # %bb.0:
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_KNL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; WIDEN_KNL-NEXT: vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT: vzeroupper
+; WIDEN_KNL-NEXT: retq
+ %cvt = fptoui <2 x double> %a to <2 x i16>
+ ret <2 x i16> %cvt
+}
More information about the llvm-commits
mailing list