[llvm] ec6a15f - [X86] optimize masked truncated saturating stores (#169827)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 08:04:24 PST 2025
Author: Folkert de Vries
Date: 2025-12-03T16:04:16Z
New Revision: ec6a15f84db135186f5075e15146c7f2ec532d3a
URL: https://github.com/llvm/llvm-project/commit/ec6a15f84db135186f5075e15146c7f2ec532d3a
DIFF: https://github.com/llvm/llvm-project/commit/ec6a15f84db135186f5075e15146c7f2ec532d3a.diff
LOG: [X86] optimize masked truncated saturating stores (#169827)
Combine the saturating operation into the masked truncating store.
https://godbolt.org/z/n1YfavKP6
```asm
_mm256_mask_cvtusepi16_storeu_epi8_manual: # @_mm256_mask_cvtusepi16_storeu_epi8_manual
kmovd k1, esi
vmovdqa ymm0, ymmword ptr [rdx]
vpminuw ymm0, ymm0, ymmword ptr [rip + .LCPI0_0]
vpmovwb xmmword ptr [rdi] {k1}, ymm0
vzeroupper
ret
_mm256_mask_cvtusepi16_storeu_epi8_intrinsic: # @_mm256_mask_cvtusepi16_storeu_epi8_intrinsic
kmovd k1, esi
vmovdqa ymm0, ymmword ptr [rdx]
vpmovuswb xmmword ptr [rdi] {k1}, ymm0
vzeroupper
ret
```
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d46f0f1572f1f..6e16bb148b5df 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53523,18 +53523,48 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
if (Mst->isCompressingStore())
return SDValue();
- EVT VT = Mst->getValue().getValueType();
+ if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
+ return ScalarStore;
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(N);
- if (Mst->isTruncatingStore())
- return SDValue();
+ SDValue Mask = Mst->getMask();
+ SDValue Value = Mst->getValue();
+ EVT MemVT = Mst->getMemoryVT();
+ EVT VT = Value.getValueType();
- if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
- return ScalarStore;
+ // See if the truncating store can be a saturating truncated store.
+ if (Mst->isTruncatingStore()) {
+ if (VT.isVector() && MemVT.isVector() && VT.getScalarType().isInteger() &&
+ MemVT.getScalarType().isInteger() &&
+ VT.getVectorNumElements() == MemVT.getVectorNumElements() &&
+ Subtarget.hasBWI() && Subtarget.hasVLX()) {
+
+ SDValue SatSrc;
+ unsigned Opc;
+ if (SDValue SVal = detectSSatPattern(Value, MemVT)) {
+ SatSrc = SVal;
+ Opc = X86ISD::VMTRUNCSTORES;
+ } else if (SDValue UVal = detectUSatPattern(Value, MemVT, DAG, DL)) {
+ SatSrc = UVal;
+ Opc = X86ISD::VMTRUNCSTOREUS;
+ } else {
+ return SDValue();
+ }
+
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {Mst->getChain(), SatSrc, Mst->getBasePtr(), Mask};
+ MachineMemOperand *MMO = Mst->getMemOperand();
+ return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
+ }
+
+ // Otherwise don't combine if this store already truncates.
+ return SDValue();
+ }
// If the mask value has been legalized to a non-boolean vector, try to
// simplify ops leading up to it. We only demand the MSB of each lane.
- SDValue Mask = Mst->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
@@ -53550,14 +53580,12 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
Mst->getAddressingMode());
}
- SDValue Value = Mst->getValue();
if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
- TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
- Mst->getMemoryVT())) {
- return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
- Mst->getBasePtr(), Mst->getOffset(), Mask,
- Mst->getMemoryVT(), Mst->getMemOperand(),
- Mst->getAddressingMode(), true);
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), MemVT)) {
+ return DAG.getMaskedStore(Mst->getChain(), DL, Value.getOperand(0),
+ Mst->getBasePtr(), Mst->getOffset(), Mask, MemVT,
+ Mst->getMemOperand(), Mst->getAddressingMode(),
+ true);
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
index 18d394e1281b4..57b0577ac7cc9 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -4,9 +4,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512FVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512FVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512BWVL
define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; SSE2-LABEL: truncstore_v8i64_v8i32:
@@ -350,14 +350,21 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
-; AVX512VL-LABEL: truncstore_v8i64_v8i32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512FVL-LABEL: truncstore_v8i64_v8i32:
+; AVX512FVL: # %bb.0:
+; AVX512FVL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX512FVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512FVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512FVL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
+; AVX512FVL-NEXT: vzeroupper
+; AVX512FVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: truncstore_v8i64_v8i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX512BWVL-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
%b = icmp slt <8 x i64> %x, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%c = select <8 x i1> %b, <8 x i64> %x, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
@@ -964,9 +971,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i64_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -1572,9 +1577,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i64_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -1788,14 +1791,21 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
-; AVX512VL-LABEL: truncstore_v4i64_v4i32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512FVL-LABEL: truncstore_v4i64_v4i32:
+; AVX512FVL: # %bb.0:
+; AVX512FVL-NEXT: vptestmd %xmm1, %xmm1, %k1
+; AVX512FVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512FVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512FVL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
+; AVX512FVL-NEXT: vzeroupper
+; AVX512FVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: truncstore_v4i64_v4i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
+; AVX512BWVL-NEXT: vpmovsqd %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
%b = icmp slt <4 x i64> %x, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%c = select <4 x i1> %b, <4 x i64> %x, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
@@ -2141,9 +2151,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i64_v4i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsqw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
@@ -2495,9 +2503,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i64_v4i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsqb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
@@ -2641,13 +2647,19 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
-; AVX512VL-LABEL: truncstore_v2i64_v2i32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmovqd %xmm0, (%rdi) {%k1}
-; AVX512VL-NEXT: retq
+; AVX512FVL-LABEL: truncstore_v2i64_v2i32:
+; AVX512FVL: # %bb.0:
+; AVX512FVL-NEXT: vptestmq %xmm1, %xmm1, %k1
+; AVX512FVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512FVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512FVL-NEXT: vpmovqd %xmm0, (%rdi) {%k1}
+; AVX512FVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: truncstore_v2i64_v2i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
+; AVX512BWVL-NEXT: vpmovsqd %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: retq
%a = icmp ne <2 x i64> %mask, zeroinitializer
%b = icmp slt <2 x i64> %x, <i64 2147483647, i64 2147483647>
%c = select <2 x i1> %b, <2 x i64> %x, <2 x i64> <i64 2147483647, i64 2147483647>
@@ -2832,9 +2844,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX512BWVL-LABEL: truncstore_v2i64_v2i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsqw %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <2 x i64> %mask, zeroinitializer
%b = icmp slt <2 x i64> %x, <i64 32767, i64 32767>
@@ -3018,9 +3028,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX512BWVL-LABEL: truncstore_v2i64_v2i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsqb %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <2 x i64> %mask, zeroinitializer
%b = icmp slt <2 x i64> %x, <i64 127, i64 127>
@@ -3816,9 +3824,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v16i32_v16i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
@@ -4594,9 +4600,7 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v16i32_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
@@ -5034,9 +5038,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i32_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsdw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -5473,9 +5475,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i32_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -5686,9 +5686,7 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i32_v4i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsdw %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
%b = icmp slt <4 x i32> %x, <i32 32767, i32 32767, i32 32767, i32 32767>
@@ -5904,9 +5902,7 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i32_v4i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovsdb %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
%b = icmp slt <4 x i32> %x, <i32 127, i32 127, i32 127, i32 127>
@@ -7332,9 +7328,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
; AVX512BWVL-LABEL: truncstore_v32i16_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <32 x i8> %mask, zeroinitializer
@@ -8083,9 +8077,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
; AVX512BWVL-LABEL: truncstore_v16i16_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovswb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i8> %mask, zeroinitializer
@@ -8445,9 +8437,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i16_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmw %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovswb %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i16> %mask, zeroinitializer
%b = icmp slt <8 x i16> %x, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
index 4c4b6e78d1f8c..0386d9531723d 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
@@ -4,9 +4,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512FVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512FVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512BWVL
define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; SSE2-LABEL: truncstore_v8i64_v8i32:
@@ -281,13 +281,20 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
-; AVX512VL-LABEL: truncstore_v8i64_v8i32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512FVL-LABEL: truncstore_v8i64_v8i32:
+; AVX512FVL: # %bb.0:
+; AVX512FVL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX512FVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512FVL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
+; AVX512FVL-NEXT: vzeroupper
+; AVX512FVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: truncstore_v8i64_v8i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX512BWVL-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
%b = icmp ult <8 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%c = select <8 x i1> %b, <8 x i64> %x, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -829,8 +836,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i64_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -1367,8 +1373,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i64_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -1547,13 +1552,20 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
-; AVX512VL-LABEL: truncstore_v4i64_v4i32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512FVL-LABEL: truncstore_v4i64_v4i32:
+; AVX512FVL: # %bb.0:
+; AVX512FVL-NEXT: vptestmd %xmm1, %xmm1, %k1
+; AVX512FVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512FVL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
+; AVX512FVL-NEXT: vzeroupper
+; AVX512FVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: truncstore_v4i64_v4i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
+; AVX512BWVL-NEXT: vpmovusqd %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
%b = icmp ult <4 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%c = select <4 x i1> %b, <4 x i64> %x, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -1868,8 +1880,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i64_v4i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusqw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
@@ -2188,8 +2199,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i64_v4i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusqb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
@@ -2304,12 +2314,18 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
-; AVX512VL-LABEL: truncstore_v2i64_v2i32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1
-; AVX512VL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmovqd %xmm0, (%rdi) {%k1}
-; AVX512VL-NEXT: retq
+; AVX512FVL-LABEL: truncstore_v2i64_v2i32:
+; AVX512FVL: # %bb.0:
+; AVX512FVL-NEXT: vptestmq %xmm1, %xmm1, %k1
+; AVX512FVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512FVL-NEXT: vpmovqd %xmm0, (%rdi) {%k1}
+; AVX512FVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: truncstore_v2i64_v2i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
+; AVX512BWVL-NEXT: vpmovusqd %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: retq
%a = icmp ne <2 x i64> %mask, zeroinitializer
%b = icmp ult <2 x i64> %x, <i64 4294967295, i64 4294967295>
%c = select <2 x i1> %b, <2 x i64> %x, <2 x i64> <i64 4294967295, i64 4294967295>
@@ -2470,8 +2486,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX512BWVL-LABEL: truncstore_v2i64_v2i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusqw %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <2 x i64> %mask, zeroinitializer
%b = icmp ult <2 x i64> %x, <i64 65535, i64 65535>
@@ -2630,8 +2645,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, ptr %p, <2 x i64> %mask) {
; AVX512BWVL-LABEL: truncstore_v2i64_v2i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusqb %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <2 x i64> %mask, zeroinitializer
%b = icmp ult <2 x i64> %x, <i64 255, i64 255>
@@ -3457,8 +3471,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v16i32_v16i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
@@ -4273,8 +4286,7 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v16i32_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512BWVL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i32> %mask, zeroinitializer
@@ -4737,8 +4749,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i32_v8i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusdw %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -5194,8 +5205,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %p, <8 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i32_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusdb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i32> %mask, zeroinitializer
@@ -5455,8 +5465,7 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i32_v4i16:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusdw %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
%b = icmp ult <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -5717,8 +5726,7 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, ptr %p, <4 x i32> %mask) {
; AVX512BWVL-LABEL: truncstore_v4i32_v4i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovusdb %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <4 x i32> %mask, zeroinitializer
%b = icmp ult <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
@@ -7171,8 +7179,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
; AVX512BWVL-LABEL: truncstore_v32i16_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1
-; AVX512BWVL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <32 x i8> %mask, zeroinitializer
@@ -7935,8 +7942,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %p, <16 x i8> %mask) {
; AVX512BWVL-LABEL: truncstore_v16i16_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovuswb %ymm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%a = icmp ne <16 x i8> %mask, zeroinitializer
@@ -8302,8 +8308,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %p, <8 x i16> %mask) {
; AVX512BWVL-LABEL: truncstore_v8i16_v8i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vptestmw %xmm1, %xmm1, %k1
-; AVX512BWVL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
+; AVX512BWVL-NEXT: vpmovuswb %xmm0, (%rdi) {%k1}
; AVX512BWVL-NEXT: retq
%a = icmp ne <8 x i16> %mask, zeroinitializer
%b = icmp ult <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
More information about the llvm-commits
mailing list