[llvm] r346259 - [X86] Add custom promotion of v2i8/v2i16 fp_to_sint to avoid over promotion to v2i64 which would force scalarization.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 6 11:24:22 PST 2018


Author: ctopper
Date: Tue Nov  6 11:24:21 2018
New Revision: 346259

URL: http://llvm.org/viewvc/llvm-project?rev=346259&view=rev
Log:
[X86] Add custom promotion of v2i8/v2i16 fp_to_sint to avoid over promotion to v2i64 which would force scalarization.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vec_cast3.ll
    llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346259&r1=346258&r2=346259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov  6 11:24:21 2018
@@ -882,6 +882,11 @@ X86TargetLowering::X86TargetLowering(con
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
+    // Custom legalize these to avoid over promotion.
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i8,  Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i8,  Custom);
 
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
@@ -26025,6 +26030,24 @@ void X86TargetLowering::ReplaceNodeResul
     SDValue Src = N->getOperand(0);
     EVT SrcVT = Src.getValueType();
 
+    // Promote these manually to avoid over promotion to v2i64. Type
+    // legalization will revisit the v2i32 operation for more cleanup.
+    if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
+        getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
+      // AVX512DQ provides instructions that produce a v2i64 result.
+      if (Subtarget.hasDQI())
+        return;
+
+      SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
+      Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+                                                          : ISD::AssertSext,
+                        dl, MVT::v2i32, Res,
+                        DAG.getValueType(VT.getVectorElementType()));
+      Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+      Results.push_back(Res);
+      return;
+    }
+
     if (VT == MVT::v2i32) {
       assert((IsSigned || Subtarget.hasAVX512()) &&
              "Can only handle signed conversion without AVX512");
@@ -26051,7 +26074,7 @@ void X86TargetLowering::ReplaceNodeResul
         return;
       }
       if (SrcVT == MVT::v2f32 &&
-          getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) {
+          getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
         SDValue Idx = DAG.getIntPtrConstant(0, dl);
         SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
                                   DAG.getUNDEF(MVT::v2f32));

Modified: llvm/trunk/test/CodeGen/X86/vec_cast3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast3.ll?rev=346259&r1=346258&r2=346259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast3.ll Tue Nov  6 11:24:21 2018
@@ -111,19 +111,8 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x
 define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
 ; CHECK-LABEL: cvt_v2f32_v2i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    subl $68, %esp
-; CHECK-NEXT:    .cfi_def_cfa_offset 72
-; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll (%esp)
-; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT:    addl $68, %esp
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
 ; CHECK-NEXT:    retl
 ;
 ; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
@@ -141,19 +130,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x flo
 define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
 ; CHECK-LABEL: cvt_v2f32_v2i16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    subl $68, %esp
-; CHECK-NEXT:    .cfi_def_cfa_offset 72
-; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll (%esp)
-; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; CHECK-NEXT:    addl $68, %esp
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
 ; CHECK-NEXT:    retl
 ;
 ; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
@@ -186,37 +164,8 @@ define <2 x i32> @cvt_v2f32_v2i32(<2 x f
 define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
 ; CHECK-LABEL: cvt_v2f32_v2u8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    subl $68, %esp
-; CHECK-NEXT:    .cfi_def_cfa_offset 72
-; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vcmpltss %xmm2, %xmm1, %xmm3
-; CHECK-NEXT:    vsubss %xmm2, %xmm1, %xmm4
-; CHECK-NEXT:    vblendvps %xmm3, %xmm1, %xmm4, %xmm3
-; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    vcmpltss %xmm2, %xmm0, %xmm3
-; CHECK-NEXT:    vsubss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm3
-; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll (%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    vucomiss %xmm2, %xmm1
-; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    shll $31, %eax
-; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    vucomiss %xmm2, %xmm0
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    shll $31, %ecx
-; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    addl $68, %esp
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; CHECK-NEXT:    retl
 ;
 ; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
@@ -234,37 +183,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x flo
 define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
 ; CHECK-LABEL: cvt_v2f32_v2u16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    subl $68, %esp
-; CHECK-NEXT:    .cfi_def_cfa_offset 72
-; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vcmpltss %xmm2, %xmm1, %xmm3
-; CHECK-NEXT:    vsubss %xmm2, %xmm1, %xmm4
-; CHECK-NEXT:    vblendvps %xmm3, %xmm1, %xmm4, %xmm3
-; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    vcmpltss %xmm2, %xmm0, %xmm3
-; CHECK-NEXT:    vsubss %xmm2, %xmm0, %xmm4
-; CHECK-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm3
-; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll (%esp)
-; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
-; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    vucomiss %xmm2, %xmm1
-; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    shll $31, %eax
-; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    vucomiss %xmm2, %xmm0
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    shll $31, %ecx
-; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    addl $68, %esp
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; CHECK-NEXT:    retl
 ;
 ; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:

Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=346259&r1=346258&r2=346259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Tue Nov  6 11:24:21 2018
@@ -2866,3 +2866,445 @@ define <4 x i32> @fptosi_2f128_to_4i32(<
   %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %ext
 }
+
+define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
+; SSE-LABEL: fptosi_2f32_to_2i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptosi_2f32_to_2i8:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptosi_2f32_to_2i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptosi_2f32_to_2i8:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN-LABEL: fptosi_2f32_to_2i8:
+; WIDEN:       # %bb.0:
+; WIDEN-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN-NEXT:    vcvttps2dq %zmm0, %zmm0
+; WIDEN-NEXT:    vpmovdb %zmm0, %xmm0
+; WIDEN-NEXT:    vzeroupper
+; WIDEN-NEXT:    retq
+  %cvt = fptosi <2 x float> %a to <2 x i8>
+  ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
+; SSE-LABEL: fptosi_2f32_to_2i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptosi_2f32_to_2i16:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptosi_2f32_to_2i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptosi_2f32_to_2i16:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN_SKX-LABEL: fptosi_2f32_to_2i16:
+; WIDEN_SKX:       # %bb.0:
+; WIDEN_SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_SKX-NEXT:    vcvttps2dq %ymm0, %ymm0
+; WIDEN_SKX-NEXT:    vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT:    vzeroupper
+; WIDEN_SKX-NEXT:    retq
+;
+; WIDEN_KNL-LABEL: fptosi_2f32_to_2i16:
+; WIDEN_KNL:       # %bb.0:
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_KNL-NEXT:    vcvttps2dq %ymm0, %ymm0
+; WIDEN_KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT:    vzeroupper
+; WIDEN_KNL-NEXT:    retq
+  %cvt = fptosi <2 x float> %a to <2 x i16>
+  ret <2 x i16> %cvt
+}
+
+define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
+; SSE-LABEL: fptoui_2f32_to_2i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptoui_2f32_to_2i8:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptoui_2f32_to_2i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptoui_2f32_to_2i8:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN-LABEL: fptoui_2f32_to_2i8:
+; WIDEN:       # %bb.0:
+; WIDEN-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN-NEXT:    vcvttps2dq %zmm0, %zmm0
+; WIDEN-NEXT:    vpmovdb %zmm0, %xmm0
+; WIDEN-NEXT:    vzeroupper
+; WIDEN-NEXT:    retq
+  %cvt = fptoui <2 x float> %a to <2 x i8>
+  ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
+; SSE-LABEL: fptoui_2f32_to_2i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptoui_2f32_to_2i16:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptoui_2f32_to_2i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptoui_2f32_to_2i16:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN_SKX-LABEL: fptoui_2f32_to_2i16:
+; WIDEN_SKX:       # %bb.0:
+; WIDEN_SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_SKX-NEXT:    vcvttps2dq %ymm0, %ymm0
+; WIDEN_SKX-NEXT:    vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT:    vzeroupper
+; WIDEN_SKX-NEXT:    retq
+;
+; WIDEN_KNL-LABEL: fptoui_2f32_to_2i16:
+; WIDEN_KNL:       # %bb.0:
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; WIDEN_KNL-NEXT:    vcvttps2dq %ymm0, %ymm0
+; WIDEN_KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT:    vzeroupper
+; WIDEN_KNL-NEXT:    retq
+  %cvt = fptoui <2 x float> %a to <2 x i16>
+  ret <2 x i16> %cvt
+}
+
+define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT:    movapd %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptosi_2f64_to_2i8:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptosi_2f64_to_2i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptosi_2f64_to_2i8:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttpd2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN-LABEL: fptosi_2f64_to_2i8:
+; WIDEN:       # %bb.0:
+; WIDEN-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; WIDEN-NEXT:    vcvttsd2si %xmm1, %eax
+; WIDEN-NEXT:    vcvttsd2si %xmm0, %ecx
+; WIDEN-NEXT:    vmovd %ecx, %xmm0
+; WIDEN-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; WIDEN-NEXT:    retq
+  %cvt = fptosi <2 x double> %a to <2 x i8>
+  ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT:    movapd %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptosi_2f64_to_2i16:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovsxdq %xmm0, %xmm0
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptosi_2f64_to_2i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptosi_2f64_to_2i16:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttpd2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN_SKX-LABEL: fptosi_2f64_to_2i16:
+; WIDEN_SKX:       # %bb.0:
+; WIDEN_SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; WIDEN_SKX-NEXT:    vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT:    vzeroupper
+; WIDEN_SKX-NEXT:    retq
+;
+; WIDEN_KNL-LABEL: fptosi_2f64_to_2i16:
+; WIDEN_KNL:       # %bb.0:
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_KNL-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; WIDEN_KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT:    vzeroupper
+; WIDEN_KNL-NEXT:    retq
+  %cvt = fptosi <2 x double> %a to <2 x i16>
+  ret <2 x i16> %cvt
+}
+
+define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT:    xorpd %xmm1, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptoui_2f64_to_2i8:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptoui_2f64_to_2i8:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptoui_2f64_to_2i8:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttpd2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN-LABEL: fptoui_2f64_to_2i8:
+; WIDEN:       # %bb.0:
+; WIDEN-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; WIDEN-NEXT:    vcvttsd2si %xmm1, %eax
+; WIDEN-NEXT:    vcvttsd2si %xmm0, %ecx
+; WIDEN-NEXT:    vmovd %ecx, %xmm0
+; WIDEN-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; WIDEN-NEXT:    retq
+  %cvt = fptoui <2 x double> %a to <2 x i8>
+  ret <2 x i8> %cvt
+}
+
+define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT:    xorpd %xmm1, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; VEX-LABEL: fptoui_2f64_to_2i16:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; VEX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; VEX-NEXT:    retq
+;
+; AVX512F-LABEL: fptoui_2f64_to_2i16:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fptoui_2f64_to_2i16:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT:    retq
+;
+; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
+; AVX512VLDQ:       # %bb.0:
+; AVX512VLDQ-NEXT:    vcvttpd2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
+;
+; WIDEN_SKX-LABEL: fptoui_2f64_to_2i16:
+; WIDEN_SKX:       # %bb.0:
+; WIDEN_SKX-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; WIDEN_SKX-NEXT:    vpmovdw %ymm0, %xmm0
+; WIDEN_SKX-NEXT:    vzeroupper
+; WIDEN_SKX-NEXT:    retq
+;
+; WIDEN_KNL-LABEL: fptoui_2f64_to_2i16:
+; WIDEN_KNL:       # %bb.0:
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; WIDEN_KNL-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; WIDEN_KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; WIDEN_KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; WIDEN_KNL-NEXT:    vzeroupper
+; WIDEN_KNL-NEXT:    retq
+  %cvt = fptoui <2 x double> %a to <2 x i16>
+  ret <2 x i16> %cvt
+}




More information about the llvm-commits mailing list