[llvm] 9dbeac1 - [X86] ReplaceNodeResults - fp_to_sint/uint - manually widen v2i32 results to let us add AssertSext/AssertZext
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 9 04:07:54 PDT 2021
Author: Simon Pilgrim
Date: 2021-07-09T12:07:33+01:00
New Revision: 9dbeac16ba9b7a5121c0ce5ba8b7eb5c46ce2b15
URL: https://github.com/llvm/llvm-project/commit/9dbeac16ba9b7a5121c0ce5ba8b7eb5c46ce2b15
DIFF: https://github.com/llvm/llvm-project/commit/9dbeac16ba9b7a5121c0ce5ba8b7eb5c46ce2b15.diff
LOG: [X86] ReplaceNodeResults - fp_to_sint/uint - manually widen v2i32 results to let us add AssertSext/AssertZext
Its proving tricky to move this to the generic legalizer code, so manually insert the v2i32 subvector into v4i32, insert the AssertSext/AssertZext node, then extract the subvector again.
This avoids masks in the truncation/pack code, which means we avoid a PSHUFB in the fp_to_sint/uint code for sub-128 bit types (specific targets can still combine the packs to a pshufb if they have fast variable per-lane shuffles).
This was noticed when I was trying to improve fp_to_sint/uint costs with D103695 (and some targets had very high fp_to_sint costs due to the PSHUFB), so we can then update the fp_to_uint codegen from D89697.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
llvm/test/CodeGen/X86/vec_cast3.ll
llvm/test/CodeGen/X86/vec_fp_to_int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 35b4f62abb34..0410a6923310 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30749,12 +30749,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
- // Preserve what we know about the size of the original result. Except
- // when the result is v2i32 since we can't widen the assert.
- if (PromoteVT != MVT::v2i32)
- Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext,
- dl, PromoteVT, Res,
- DAG.getValueType(VT.getVectorElementType()));
+ // Preserve what we know about the size of the original result. If the
+ // result is v2i32, we have to manually widen the assert.
+ if (PromoteVT == MVT::v2i32)
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
+ DAG.getUNDEF(MVT::v2i32));
+
+ Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl,
+ Res.getValueType(), Res,
+ DAG.getValueType(VT.getVectorElementType()));
+
+ if (PromoteVT == MVT::v2i32)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+ DAG.getIntPtrConstant(0, dl));
// Truncate back to the original width.
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index 8e790e972884..7fae417a1699 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -1829,43 +1829,43 @@ define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict") #0
@@ -1888,31 +1888,31 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict") #0
@@ -1924,49 +1924,49 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict") #0
@@ -1992,35 +1992,35 @@ define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict") #0
@@ -2031,29 +2031,29 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
+; SSE-32-NEXT: packsswb %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
+; SSE-64-NEXT: packsswb %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
@@ -2065,7 +2065,8 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
@@ -2082,7 +2083,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl
@@ -2090,7 +2090,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq
@@ -2098,13 +2097,15 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
@@ -2116,7 +2117,8 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
@@ -2134,32 +2136,32 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
+; SSE-32-NEXT: packsswb %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
+; SSE-64-NEXT: packsswb %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
@@ -2173,7 +2175,8 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
@@ -2192,7 +2195,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl
@@ -2201,7 +2203,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq
@@ -2210,14 +2211,16 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
@@ -2231,7 +2234,8 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
diff --git a/llvm/test/CodeGen/X86/vec_cast3.ll b/llvm/test/CodeGen/X86/vec_cast3.ll
index 749188c5542a..57911e1eb967 100644
--- a/llvm/test/CodeGen/X86/vec_cast3.ll
+++ b/llvm/test/CodeGen/X86/vec_cast3.ll
@@ -67,7 +67,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i8:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i8>
ret <2 x i8> %res
@@ -77,7 +78,7 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i16:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
-; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i16>
ret <2 x i16> %res
@@ -96,7 +97,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u8:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i8>
ret <2 x i8> %res
@@ -106,7 +108,7 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u16:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
-; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i16>
ret <2 x i16> %res
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
index dfbebfd6a8ca..ff8e59c04c62 100644
--- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
@@ -2297,21 +2297,22 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f32_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f32_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f32_to_2i8:
@@ -2323,7 +2324,8 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
@@ -2339,13 +2341,13 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f32_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
@@ -2355,7 +2357,6 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
@@ -2363,13 +2364,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; VEX-LABEL: fptoui_2f32_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f32_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f32_to_2i8:
@@ -2381,7 +2384,8 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
@@ -2403,7 +2407,7 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
; AVX-LABEL: fptoui_2f32_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
@@ -2413,21 +2417,22 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f64_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f64_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f64_to_2i8:
@@ -2439,7 +2444,8 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
@@ -2455,13 +2461,13 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f64_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
@@ -2471,7 +2477,6 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
@@ -2479,13 +2484,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; VEX-LABEL: fptoui_2f64_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f64_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f64_to_2i8:
@@ -2497,7 +2504,8 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
@@ -2519,7 +2527,7 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
; AVX-LABEL: fptoui_2f64_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
More information about the llvm-commits
mailing list