[llvm] f471f6f - [X86] combineTruncateWithSat - relax minimum truncation size for PACKSS/PACKUS
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 03:33:48 PDT 2023
Author: Simon Pilgrim
Date: 2023-11-01T10:33:35Z
New Revision: f471f6ff2f02a5081f89e0daf18c2ee9f3dc103d
URL: https://github.com/llvm/llvm-project/commit/f471f6ff2f02a5081f89e0daf18c2ee9f3dc103d
DIFF: https://github.com/llvm/llvm-project/commit/f471f6ff2f02a5081f89e0daf18c2ee9f3dc103d.diff
LOG: [X86] combineTruncateWithSat - relax minimum truncation size for PACKSS/PACKUS
truncateVectorWithPACK handling of sub-128-bit result types was improved some time ago, so remove the old 64-bit limit
Fixes #68466
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fpclamptosat_vec.ll
llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
llvm/test/CodeGen/X86/vector-trunc-packus.ll
llvm/test/CodeGen/X86/vector-trunc-ssat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 18f6a695e4502e9..9a3e1e9bd3233c5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49604,14 +49604,12 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
(Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
!(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
- if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 &&
- VT.getSizeInBits() >= 64 &&
+ if (!PreferAVX512 && VT.getVectorNumElements() > 1 &&
+ isPowerOf2_32(VT.getVectorNumElements()) &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
- // Only do this when the result is at least 64 bits or we'll leaving
- // dangling PACKSSDW nodes.
if (SVT == MVT::i8 && InSVT == MVT::i32) {
EVT MidVT = VT.changeVectorElementType(MVT::i16);
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index 017fe14366bd679..78ccc983d1637a5 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -1092,38 +1092,14 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) nounwind {
; SSE-LABEL: stest_f64i16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = <32767,32767,u,u>
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm1, %xmm2
-; SSE-NEXT: por %xmm0, %xmm2
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = <4294934528,4294934528,u,u>
-; SSE-NEXT: movdqa %xmm2, %xmm1
-; SSE-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE-NEXT: pand %xmm1, %xmm2
-; SSE-NEXT: pandn %xmm0, %xmm1
-; SSE-NEXT: por %xmm2, %xmm1
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: stest_f64i16:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: stest_f64i16:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpmovdw %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: stest_f64i16:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
@@ -1198,24 +1174,11 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) nounwind {
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX2-LABEL: ustest_f64i16:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: ustest_f64i16:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovdw %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: ustest_f64i16:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
@@ -1652,40 +1615,16 @@ define <2 x i8> @stest_f64i8(<2 x double> %x) nounwind {
; SSE-LABEL: stest_f64i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = <127,127,u,u>
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm1, %xmm2
-; SSE-NEXT: por %xmm0, %xmm2
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = <4294967168,4294967168,u,u>
-; SSE-NEXT: movdqa %xmm2, %xmm0
-; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: pand %xmm0, %xmm2
-; SSE-NEXT: pandn %xmm1, %xmm0
-; SSE-NEXT: por %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: stest_f64i8:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: stest_f64i8:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpmovdb %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: stest_f64i8:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%0 = icmp slt <2 x i32> %conv, <i32 127, i32 127>
@@ -1748,39 +1687,16 @@ define <2 x i8> @ustest_f64i8(<2 x double> %x) nounwind {
; SSE-LABEL: ustest_f64i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = <255,255,u,u>
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm1, %xmm2
-; SSE-NEXT: por %xmm0, %xmm2
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: movdqa %xmm2, %xmm0
-; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: ustest_f64i8:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: ustest_f64i8:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovdb %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: ustest_f64i8:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%0 = icmp slt <2 x i32> %conv, <i32 255, i32 255>
@@ -1795,37 +1711,16 @@ define <4 x i8> @stest_f32i8(<4 x float> %x) nounwind {
; SSE-LABEL: stest_f32i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127]
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm1, %xmm2
-; SSE-NEXT: por %xmm0, %xmm2
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; SSE-NEXT: movdqa %xmm2, %xmm0
-; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: pand %xmm0, %xmm2
-; SSE-NEXT: pandn %xmm1, %xmm0
-; SSE-NEXT: por %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: stest_f32i8:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: stest_f32i8:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512-NEXT: vpmovsdb %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: stest_f32i8:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
%0 = icmp slt <4 x i32> %conv, <i32 127, i32 127, i32 127, i32 127>
@@ -1888,37 +1783,16 @@ define <4 x i8> @ustest_f32i8(<4 x float> %x) nounwind {
; SSE-LABEL: ustest_f32i8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm1, %xmm2
-; SSE-NEXT: por %xmm0, %xmm2
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: movdqa %xmm2, %xmm0
-; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: ustest_f32i8:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: ustest_f32i8:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovusdb %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: ustest_f32i8:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
%0 = icmp slt <4 x i32> %conv, <i32 255, i32 255, i32 255, i32 255>
@@ -3863,38 +3737,14 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) nounwind {
; SSE-LABEL: stest_f64i16_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = <32767,32767,u,u>
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pandn %xmm1, %xmm2
-; SSE-NEXT: por %xmm0, %xmm2
-; SSE-NEXT: movdqa {{.*#+}} xmm0 = <4294934528,4294934528,u,u>
-; SSE-NEXT: movdqa %xmm2, %xmm1
-; SSE-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE-NEXT: pand %xmm1, %xmm2
-; SSE-NEXT: pandn %xmm0, %xmm1
-; SSE-NEXT: por %xmm2, %xmm1
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: stest_f64i16_mm:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: stest_f64i16_mm:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpmovdw %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: stest_f64i16_mm:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
@@ -3966,24 +3816,11 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) nounwind {
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX2-LABEL: ustest_f64i16_mm:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: ustest_f64i16_mm:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovdw %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX-LABEL: ustest_f64i16_mm:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
index 38abaf8ff11c6c4..bd1e6d320b69e12 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -5166,25 +5166,13 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; SSE2-LABEL: truncstore_v4i32_v4i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [127,127,127,127]
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
-; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: pandn %xmm3, %xmm4
-; SSE2-NEXT: por %xmm0, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168]
-; SSE2-NEXT: movdqa %xmm4, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
-; SSE2-NEXT: pand %xmm3, %xmm4
-; SSE2-NEXT: pandn %xmm0, %xmm3
-; SSE2-NEXT: por %xmm4, %xmm3
-; SSE2-NEXT: packssdw %xmm3, %xmm3
-; SSE2-NEXT: packsswb %xmm3, %xmm3
+; SSE2-NEXT: packssdw %xmm0, %xmm0
+; SSE2-NEXT: packsswb %xmm0, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
; SSE2-NEXT: movmskps %xmm2, %ecx
; SSE2-NEXT: xorl $15, %ecx
; SSE2-NEXT: testb $1, %cl
-; SSE2-NEXT: movd %xmm3, %eax
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: jne .LBB14_1
; SSE2-NEXT: # %bb.2: # %else
; SSE2-NEXT: testb $2, %cl
@@ -5219,8 +5207,6 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; SSE4-LABEL: truncstore_v4i32_v4i8:
; SSE4: # %bb.0:
; SSE4-NEXT: pxor %xmm2, %xmm2
-; SSE4-NEXT: pminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE4-NEXT: pmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: packssdw %xmm0, %xmm0
; SSE4-NEXT: packsswb %xmm0, %xmm0
; SSE4-NEXT: pcmpeqd %xmm1, %xmm2
@@ -5255,92 +5241,49 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; SSE4-NEXT: pextrb $3, %xmm0, 3(%rdi)
; SSE4-NEXT: retq
;
-; AVX1-LABEL: truncstore_v4i32_v4i8:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vmovmskps %xmm1, %eax
-; AVX1-NEXT: xorl $15, %eax
-; AVX1-NEXT: testb $1, %al
-; AVX1-NEXT: jne .LBB14_1
-; AVX1-NEXT: # %bb.2: # %else
-; AVX1-NEXT: testb $2, %al
-; AVX1-NEXT: jne .LBB14_3
-; AVX1-NEXT: .LBB14_4: # %else2
-; AVX1-NEXT: testb $4, %al
-; AVX1-NEXT: jne .LBB14_5
-; AVX1-NEXT: .LBB14_6: # %else4
-; AVX1-NEXT: testb $8, %al
-; AVX1-NEXT: jne .LBB14_7
-; AVX1-NEXT: .LBB14_8: # %else6
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB14_1: # %cond.store
-; AVX1-NEXT: vpextrb $0, %xmm0, (%rdi)
-; AVX1-NEXT: testb $2, %al
-; AVX1-NEXT: je .LBB14_4
-; AVX1-NEXT: .LBB14_3: # %cond.store1
-; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rdi)
-; AVX1-NEXT: testb $4, %al
-; AVX1-NEXT: je .LBB14_6
-; AVX1-NEXT: .LBB14_5: # %cond.store3
-; AVX1-NEXT: vpextrb $2, %xmm0, 2(%rdi)
-; AVX1-NEXT: testb $8, %al
-; AVX1-NEXT: je .LBB14_8
-; AVX1-NEXT: .LBB14_7: # %cond.store5
-; AVX1-NEXT: vpextrb $3, %xmm0, 3(%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: truncstore_v4i32_v4i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [127,127,127,127]
-; AVX2-NEXT: vpminsd %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-NEXT: vpmaxsd %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vmovmskps %xmm1, %eax
-; AVX2-NEXT: xorl $15, %eax
-; AVX2-NEXT: testb $1, %al
-; AVX2-NEXT: jne .LBB14_1
-; AVX2-NEXT: # %bb.2: # %else
-; AVX2-NEXT: testb $2, %al
-; AVX2-NEXT: jne .LBB14_3
-; AVX2-NEXT: .LBB14_4: # %else2
-; AVX2-NEXT: testb $4, %al
-; AVX2-NEXT: jne .LBB14_5
-; AVX2-NEXT: .LBB14_6: # %else4
-; AVX2-NEXT: testb $8, %al
-; AVX2-NEXT: jne .LBB14_7
-; AVX2-NEXT: .LBB14_8: # %else6
-; AVX2-NEXT: retq
-; AVX2-NEXT: .LBB14_1: # %cond.store
-; AVX2-NEXT: vpextrb $0, %xmm0, (%rdi)
-; AVX2-NEXT: testb $2, %al
-; AVX2-NEXT: je .LBB14_4
-; AVX2-NEXT: .LBB14_3: # %cond.store1
-; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rdi)
-; AVX2-NEXT: testb $4, %al
-; AVX2-NEXT: je .LBB14_6
-; AVX2-NEXT: .LBB14_5: # %cond.store3
-; AVX2-NEXT: vpextrb $2, %xmm0, 2(%rdi)
-; AVX2-NEXT: testb $8, %al
-; AVX2-NEXT: je .LBB14_8
-; AVX2-NEXT: .LBB14_7: # %cond.store5
-; AVX2-NEXT: vpextrb $3, %xmm0, 3(%rdi)
-; AVX2-NEXT: retq
+; AVX-LABEL: truncstore_v4i32_v4i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovmskps %xmm1, %eax
+; AVX-NEXT: xorl $15, %eax
+; AVX-NEXT: testb $1, %al
+; AVX-NEXT: jne .LBB14_1
+; AVX-NEXT: # %bb.2: # %else
+; AVX-NEXT: testb $2, %al
+; AVX-NEXT: jne .LBB14_3
+; AVX-NEXT: .LBB14_4: # %else2
+; AVX-NEXT: testb $4, %al
+; AVX-NEXT: jne .LBB14_5
+; AVX-NEXT: .LBB14_6: # %else4
+; AVX-NEXT: testb $8, %al
+; AVX-NEXT: jne .LBB14_7
+; AVX-NEXT: .LBB14_8: # %else6
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB14_1: # %cond.store
+; AVX-NEXT: vpextrb $0, %xmm0, (%rdi)
+; AVX-NEXT: testb $2, %al
+; AVX-NEXT: je .LBB14_4
+; AVX-NEXT: .LBB14_3: # %cond.store1
+; AVX-NEXT: vpextrb $1, %xmm0, 1(%rdi)
+; AVX-NEXT: testb $4, %al
+; AVX-NEXT: je .LBB14_6
+; AVX-NEXT: .LBB14_5: # %cond.store3
+; AVX-NEXT: vpextrb $2, %xmm0, 2(%rdi)
+; AVX-NEXT: testb $8, %al
+; AVX-NEXT: je .LBB14_8
+; AVX-NEXT: .LBB14_7: # %cond.store5
+; AVX-NEXT: vpextrb $3, %xmm0, 3(%rdi)
+; AVX-NEXT: retq
;
; AVX512F-LABEL: truncstore_v4i32_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB14_1
@@ -5376,11 +5319,11 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; AVX512BW-LABEL: truncstore_v4i32_v4i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
-; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
index a94104a002d5ce4..f93f5682df826bf 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
@@ -4042,94 +4042,28 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="2
}
define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) "min-legal-vector-width"="256" {
-; SSE2-SSSE3-LABEL: trunc_packus_v4i32_v4i8:
-; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: retq
-;
-; SSE41-LABEL: trunc_packus_v4i32_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pmaxsd %xmm1, %xmm0
-; SSE41-NEXT: packusdw %xmm0, %xmm0
-; SSE41-NEXT: packuswb %xmm0, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: trunc_packus_v4i32_v4i8:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-SLOW-LABEL: trunc_packus_v4i32_v4i8:
-; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX2-SLOW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-SLOW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: retq
-;
-; AVX2-FAST-LABEL: trunc_packus_v4i32_v4i8:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX2-FAST-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-FAST-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
-; AVX2-FAST-NEXT: retq
-;
-; AVX512F-LABEL: trunc_packus_v4i32_v4i8:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc_packus_v4i32_v4i8:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmovusdb %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; SSE-LABEL: trunc_packus_v4i32_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: retq
;
-; AVX512BW-LABEL: trunc_packus_v4i32_v4i8:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX-LABEL: trunc_packus_v4i32_v4i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovusdb %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc_packus_v4i32_v4i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
; SKX-LABEL: trunc_packus_v4i32_v4i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; SKX-NEXT: vpmovusdb %xmm0, %xmm0
+; SKX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255>
%2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 255, i32 255, i32 255, i32 255>
@@ -4140,71 +4074,25 @@ define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) "min-legal-vector-width"
}
define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) {
-; SSE2-SSSE3-LABEL: trunc_packus_v4i32_v4i8_store:
-; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: por %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1
-; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1
-; SSE2-SSSE3-NEXT: movd %xmm1, (%rdi)
-; SSE2-SSSE3-NEXT: retq
-;
-; SSE41-LABEL: trunc_packus_v4i32_v4i8_store:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pmaxsd %xmm0, %xmm1
-; SSE41-NEXT: packusdw %xmm1, %xmm1
-; SSE41-NEXT: packuswb %xmm1, %xmm1
-; SSE41-NEXT: movd %xmm1, (%rdi)
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: trunc_packus_v4i32_v4i8_store:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovd %xmm0, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-SLOW-LABEL: trunc_packus_v4i32_v4i8_store:
-; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX2-SLOW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-SLOW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi)
-; AVX2-SLOW-NEXT: retq
+; SSE-LABEL: trunc_packus_v4i32_v4i8_store:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: movd %xmm0, (%rdi)
+; SSE-NEXT: retq
;
-; AVX2-FAST-LABEL: trunc_packus_v4i32_v4i8_store:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX2-FAST-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-FAST-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi)
-; AVX2-FAST-NEXT: retq
+; AVX-LABEL: trunc_packus_v4i32_v4i8_store:
+; AVX: # %bb.0:
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, (%rdi)
+; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_packus_v4i32_v4i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
-; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_packus_v4i32_v4i8_store:
@@ -4216,11 +4104,9 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) {
;
; AVX512BW-LABEL: trunc_packus_v4i32_v4i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8_store:
diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
index 2f3fdeb74dc473e..14f724fc3b8c79d 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
@@ -3777,86 +3777,28 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="256
}
define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) {
-; SSE2-SSSE3-LABEL: trunc_ssat_v4i32_v4i8:
-; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127]
-; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: packssdw %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: retq
-;
-; SSE41-LABEL: trunc_ssat_v4i32_v4i8:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: packssdw %xmm0, %xmm0
-; SSE41-NEXT: packsswb %xmm0, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-SLOW-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX2-SLOW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-SLOW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: retq
-;
-; AVX2-FAST-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX2-FAST-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-FAST-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
-; AVX2-FAST-NEXT: retq
-;
-; AVX512F-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovsdb %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; SSE-LABEL: trunc_ssat_v4i32_v4i8:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: retq
;
-; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX-LABEL: trunc_ssat_v4i32_v4i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovsdb %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512-LABEL: trunc_ssat_v4i32_v4i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
; SKX-LABEL: trunc_ssat_v4i32_v4i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsdb %xmm0, %xmm0
+; SKX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127>
%2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127>
@@ -3867,70 +3809,25 @@ define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) {
}
define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) {
-; SSE2-SSSE3-LABEL: trunc_ssat_v4i32_v4i8_store:
-; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127]
-; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: por %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168]
-; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: por %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm1
-; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm1
-; SSE2-SSSE3-NEXT: movd %xmm1, (%rdi)
-; SSE2-SSSE3-NEXT: retq
-;
-; SSE41-LABEL: trunc_ssat_v4i32_v4i8_store:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: packssdw %xmm0, %xmm0
-; SSE41-NEXT: packsswb %xmm0, %xmm0
-; SSE41-NEXT: movd %xmm0, (%rdi)
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: trunc_ssat_v4i32_v4i8_store:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovd %xmm0, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-SLOW-LABEL: trunc_ssat_v4i32_v4i8_store:
-; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX2-SLOW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-SLOW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi)
-; AVX2-SLOW-NEXT: retq
+; SSE-LABEL: trunc_ssat_v4i32_v4i8_store:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
+; SSE-NEXT: movd %xmm0, (%rdi)
+; SSE-NEXT: retq
;
-; AVX2-FAST-LABEL: trunc_ssat_v4i32_v4i8_store:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX2-FAST-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX2-FAST-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi)
-; AVX2-FAST-NEXT: retq
+; AVX-LABEL: trunc_ssat_v4i32_v4i8_store:
+; AVX: # %bb.0:
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, (%rdi)
+; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
-; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store:
@@ -3940,10 +3837,9 @@ define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) {
;
; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store:
More information about the llvm-commits
mailing list