[llvm] 50fb395 - [X86] Custom widen strict v2f32->v2i32 by padding with zeroes.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 26 21:45:27 PST 2019


Author: Craig Topper
Date: 2019-12-26T21:45:18-08:00
New Revision: 50fb3957c1906d2e971a192e5bd69b40bc5a3919

URL: https://github.com/llvm/llvm-project/commit/50fb3957c1906d2e971a192e5bd69b40bc5a3919
DIFF: https://github.com/llvm/llvm-project/commit/50fb3957c1906d2e971a192e5bd69b40bc5a3919.diff

LOG: [X86] Custom widen strict v2f32->v2i32 by padding with zeroes.

For non-strict, generic type legalization will take care of this,
but that doesn't happen currently for strict nodes.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
    llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 508bbfb17f92..e1236714a331 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28858,6 +28858,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
         return;
       }
 
+      // Custom widen strict v2f32->v2i32 by padding with zeros.
+      // FIXME: Should generic type legalizer do this?
+      if (Src.getValueType() == MVT::v2f32 && IsStrict) {
+        Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+                          DAG.getConstantFP(0.0, dl, MVT::v2f32));
+        SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other},
+                                  {N->getOperand(0), Src});
+        Results.push_back(Res);
+        Results.push_back(Res.getValue(1));
+        return;
+      }
+
       // The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
       // so early out here.
       return;

diff  --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index 82585d440878..b8a863643c84 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -1179,69 +1179,44 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
 define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; SSE-32:       # %bb.0:
-; SSE-32-NEXT:    cvttss2si %xmm0, %eax
-; SSE-32-NEXT:    movd %eax, %xmm1
-; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT:    cvttss2si %xmm0, %eax
-; SSE-32-NEXT:    movd %eax, %xmm0
-; SSE-32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-32-NEXT:    movdqa %xmm1, %xmm0
+; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
 ; SSE-32-NEXT:    retl
 ;
 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; SSE-64:       # %bb.0:
-; SSE-64-NEXT:    cvttss2si %xmm0, %eax
-; SSE-64-NEXT:    movd %eax, %xmm1
-; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT:    cvttss2si %xmm0, %eax
-; SSE-64-NEXT:    movd %eax, %xmm0
-; SSE-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-64-NEXT:    movdqa %xmm1, %xmm0
+; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
 ; SSE-64-NEXT:    retq
 ;
 ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT:    vcvttss2si %xmm1, %eax
-; AVX-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; AVX-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512F-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VL-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VL-NEXT:    vmovd %ecx, %xmm0
-; AVX512VL-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; AVX512VL-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
 ; AVX512VLDQ:       # %bb.0:
-; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512VLDQ-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; AVX512VLDQ-NEXT:    ret{{[l|q]}}
   %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a,
                                               metadata !"fpexcept.strict")
@@ -1328,38 +1303,30 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
 ;
 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvttss2usi %xmm1, %eax
-; AVX512F-NEXT:    vcvttss2usi %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT:    vcvttss2usi %xmm1, %eax
-; AVX512VL-NEXT:    vcvttss2usi %xmm0, %ecx
-; AVX512VL-NEXT:    vmovd %ecx, %xmm0
-; AVX512VL-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
 ; AVX512VL-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQ-NEXT:    vcvttss2usi %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttss2usi %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
+; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
 ; AVX512VLDQ:       # %bb.0:
-; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VLDQ-NEXT:    vcvttss2usi %xmm1, %eax
-; AVX512VLDQ-NEXT:    vcvttss2usi %xmm0, %ecx
-; AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512VLDQ-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
 ; AVX512VLDQ-NEXT:    ret{{[l|q]}}
   %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a,
                                               metadata !"fpexcept.strict")
@@ -1463,65 +1430,51 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
 define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; SSE-32:       # %bb.0:
-; SSE-32-NEXT:    cvttss2si %xmm0, %eax
-; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-32-NEXT:    movd %eax, %xmm0
-; SSE-32-NEXT:    pinsrw $1, %ecx, %xmm0
+; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE-32-NEXT:    retl
 ;
 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; SSE-64:       # %bb.0:
-; SSE-64-NEXT:    cvttss2si %xmm0, %eax
-; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-64-NEXT:    movd %eax, %xmm0
-; SSE-64-NEXT:    pinsrw $1, %ecx, %xmm0
+; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE-64-NEXT:    retq
 ;
 ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT:    vcvttss2si %xmm1, %eax
-; AVX-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512F-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VL-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VL-NEXT:    vmovd %ecx, %xmm0
-; AVX512VL-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512VL-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
 ; AVX512VLDQ:       # %bb.0:
-; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512VLDQ-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512VLDQ-NEXT:    ret{{[l|q]}}
   %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
                                               metadata !"fpexcept.strict")
@@ -1531,65 +1484,51 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
 define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; SSE-32:       # %bb.0:
-; SSE-32-NEXT:    cvttss2si %xmm0, %eax
-; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-32-NEXT:    movd %eax, %xmm0
-; SSE-32-NEXT:    pinsrw $1, %ecx, %xmm0
+; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE-32-NEXT:    retl
 ;
 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; SSE-64:       # %bb.0:
-; SSE-64-NEXT:    cvttss2si %xmm0, %eax
-; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-64-NEXT:    movd %eax, %xmm0
-; SSE-64-NEXT:    pinsrw $1, %ecx, %xmm0
+; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE-64-NEXT:    retq
 ;
 ; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT:    vcvttss2si %xmm1, %eax
-; AVX-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512F-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VL-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VL-NEXT:    vmovd %ecx, %xmm0
-; AVX512VL-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512VL-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
 ; AVX512VLDQ:       # %bb.0:
-; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512VLDQ-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; AVX512VLDQ-NEXT:    ret{{[l|q]}}
   %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
                                               metadata !"fpexcept.strict")
@@ -1701,69 +1640,55 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
 define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
 ; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; SSE-32:       # %bb.0:
-; SSE-32-NEXT:    cvttss2si %xmm0, %eax
-; SSE-32-NEXT:    movzbl %al, %eax
-; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-32-NEXT:    shll $8, %ecx
-; SSE-32-NEXT:    orl %eax, %ecx
-; SSE-32-NEXT:    movd %ecx, %xmm0
+; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-32-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; SSE-32-NEXT:    packuswb %xmm0, %xmm0
+; SSE-32-NEXT:    packuswb %xmm0, %xmm0
 ; SSE-32-NEXT:    retl
 ;
 ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; SSE-64:       # %bb.0:
-; SSE-64-NEXT:    cvttss2si %xmm0, %eax
-; SSE-64-NEXT:    movzbl %al, %eax
-; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-64-NEXT:    shll $8, %ecx
-; SSE-64-NEXT:    orl %eax, %ecx
-; SSE-64-NEXT:    movd %ecx, %xmm0
+; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-64-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-64-NEXT:    packuswb %xmm0, %xmm0
+; SSE-64-NEXT:    packuswb %xmm0, %xmm0
 ; SSE-64-NEXT:    retq
 ;
 ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT:    vcvttss2si %xmm1, %eax
-; AVX-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512F-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VL-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VL-NEXT:    vmovd %ecx, %xmm0
-; AVX512VL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512VL-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
 ; AVX512VLDQ:       # %bb.0:
-; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512VLDQ-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512VLDQ-NEXT:    ret{{[l|q]}}
   %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a,
                                               metadata !"fpexcept.strict")
@@ -1773,69 +1698,55 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
 define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
 ; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; SSE-32:       # %bb.0:
-; SSE-32-NEXT:    cvttss2si %xmm0, %eax
-; SSE-32-NEXT:    movzbl %al, %eax
-; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-32-NEXT:    shll $8, %ecx
-; SSE-32-NEXT:    orl %eax, %ecx
-; SSE-32-NEXT:    movd %ecx, %xmm0
+; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-32-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; SSE-32-NEXT:    packuswb %xmm0, %xmm0
+; SSE-32-NEXT:    packuswb %xmm0, %xmm0
 ; SSE-32-NEXT:    retl
 ;
 ; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; SSE-64:       # %bb.0:
-; SSE-64-NEXT:    cvttss2si %xmm0, %eax
-; SSE-64-NEXT:    movzbl %al, %eax
-; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT:    cvttss2si %xmm0, %ecx
-; SSE-64-NEXT:    shll $8, %ecx
-; SSE-64-NEXT:    orl %eax, %ecx
-; SSE-64-NEXT:    movd %ecx, %xmm0
+; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-64-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-64-NEXT:    packuswb %xmm0, %xmm0
+; SSE-64-NEXT:    packuswb %xmm0, %xmm0
 ; SSE-64-NEXT:    retq
 ;
 ; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT:    vcvttss2si %xmm1, %eax
-; AVX-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm0
-; AVX-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512F-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512F-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512F-NEXT:    vmovd %ecx, %xmm0
-; AVX512F-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512F-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VL-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VL-NEXT:    vmovd %ecx, %xmm0
-; AVX512VL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512VL-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512DQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512DQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512DQ-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512DQ-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
 ; AVX512VLDQ:       # %bb.0:
-; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
-; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %ecx
-; AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
-; AVX512VLDQ-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
 ; AVX512VLDQ-NEXT:    ret{{[l|q]}}
   %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a,
                                               metadata !"fpexcept.strict")

diff  --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index e131f613fff4..478594fcdbc5 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -3868,19 +3868,12 @@ entry:
 define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 {
 ; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
-; CHECK-NEXT:    movd %eax, %xmm1
-; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
-; CHECK-NEXT:    movd %eax, %xmm0
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    cvttps2dq {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
 ; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttps2dq {{.*}}(%rip), %xmm0
 ; AVX-NEXT:    retq
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(
@@ -4363,10 +4356,10 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 {
 ;
 ; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f32:
 ; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %eax
-; AVX512-NEXT:    vmovd %eax, %xmm0
-; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %eax
-; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,0.0E+0,0.0E+0]
+; AVX512-NEXT:    vcvttps2udq %zmm0, %zmm0
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(


        


More information about the llvm-commits mailing list