[llvm] r332336 - [X86] Improve unsigned saturation downconvert detection.

Mikael Holmén via llvm-commits llvm-commits at lists.llvm.org
Tue May 15 23:41:04 PDT 2018


Hi Artur,

This commit caused a compiler warning:

/repo/app/clang/3.6/bin/clang++  -march=corei7  -DGTEST_HAS_RTTI=0 
-D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS 
-D__STDC_LIMIT_MACROS -Ilib/Target/X86 -I../lib/Target/X86 
-I/usr/include/libxml2 -Iinclude -I../include 
-I/repo/app/valgrind/3.11.0/include  -fPIC -fvisibility-inlines-hidden 
-Werror -Werror=date-time -std=c++11 -Wall -W -Wno-unused-parameter 
-Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic 
-Wno-long-long -Wcovered-switch-default -Wnon-virtual-dtor 
-Wdelete-non-virtual-dtor -Wstring-conversion -fcolor-diagnostics 
-ffunction-sections -fdata-sections -O3    -UNDEBUG  -fno-exceptions 
-fno-rtti -MMD -MT 
lib/Target/X86/CMakeFiles/LLVMX86CodeGen.dir/X86ISelLowering.cpp.o -MF 
lib/Target/X86/CMakeFiles/LLVMX86CodeGen.dir/X86ISelLowering.cpp.o.d -o 
lib/Target/X86/CMakeFiles/LLVMX86CodeGen.dir/X86ISelLowering.cpp.o -c 
../lib/Target/X86/X86ISelLowering.cpp
../lib/Target/X86/X86ISelLowering.cpp:34614:17: error: unused variable 
'SMax' [-Werror,-Wunused-variable]
     if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, C1))
                 ^
1 error generated.


I removed the unused SMax variable in r332431.

regards,
Mikael

On 05/15/2018 12:24 PM, Artur Gainullin via llvm-commits wrote:
> Author: arturgainullin
> Date: Tue May 15 03:24:12 2018
> New Revision: 332336
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=332336&view=rev
> Log:
> [X86] Improve unsigned saturation downconvert detection.
> 
> Summary:
> New unsigned saturation downconvert patterns detection was implemented in
> X86 Codegen:
> 
> (truncate (smin (smax (x, C1), C2)) to dest_type),
> where C1 >= 0 and C2 is unsigned max of destination type.
> 
> (truncate (smax (smin (x, C2), C1)) to dest_type)
> where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
> These two patterns are equivalent to:
> 
> (truncate (umin (smax(x, C1), unsigned_max_of_dest_type)) to dest_type)
> 
> Reviewers: RKSimon
> 
> Subscribers: llvm-commits, a.elovikov
> 
> Differential Revision: https://reviews.llvm.org/D45315
> 
> Modified:
>      llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>      llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
>      llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=332336&r1=332335&r2=332336&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue May 15 03:24:12 2018
> @@ -34571,24 +34571,57 @@ static bool isSATValidOnAVX512Subtarget(
>     return false;
>   }
>   
> -/// Detect a pattern of truncation with unsigned saturation:
> -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
> -/// Return the source value to be truncated or SDValue() if the pattern was not
> -/// matched.
> -static SDValue detectUSatPattern(SDValue In, EVT VT) {
> -  if (In.getOpcode() != ISD::UMIN)
> -    return SDValue();
> +/// Detect patterns of truncation with unsigned saturation:
> +///
> +/// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
> +///   Return the source value x to be truncated or SDValue() if the pattern was
> +///   not matched.
> +///
> +/// 2. (truncate (smin (smax (x, C1), C2)) to dest_type),
> +///   where C1 >= 0 and C2 is unsigned max of destination type.
> +///
> +///    (truncate (smax (smin (x, C2), C1)) to dest_type)
> +///   where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
> +///
> +///   These two patterns are equivalent to:
> +///   (truncate (umin (smax(x, C1), unsigned_max_of_dest_type)) to dest_type)
> +///   So return the smax(x, C1) value to be truncated or SDValue() if the
> +///   pattern was not matched.
> +static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG,
> +                                 const SDLoc &DL) {
> +  EVT InVT = In.getValueType();
>   
>     // Saturation with truncation. We truncate from InVT to VT.
> -  assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
> +  assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
>            "Unexpected types for truncate operation");
>   
> -  APInt C;
> -  if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
> -    // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
> +  // Match min/max and return limit value as a parameter.
> +  auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
> +    if (V.getOpcode() == Opcode &&
> +        ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
> +      return V.getOperand(0);
> +    return SDValue();
> +  };
> +
> +  APInt C1, C2;
> +  if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2))
> +    // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
>       // the element size of the destination type.
> -    return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) : SDValue();
> -  }
> +    if (C2.isMask(VT.getScalarSizeInBits()))
> +      return UMin;
> +
> +  if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2))
> +    if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, C1))
> +      if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))
> +        return SMin;
> +
> +  if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1))
> +    if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2))
> +      if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&
> +          C2.uge(C1)) {
> +        return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
> +      }
> +
>     return SDValue();
>   }
>   
> @@ -34654,14 +34687,15 @@ static SDValue detectAVX512SSatPattern(S
>   /// The types should allow to use VPMOVUS* instruction on AVX512.
>   /// Return the source value to be truncated or SDValue() if the pattern was not
>   /// matched.
> -static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
> +static SDValue detectAVX512USatPattern(SDValue In, EVT VT, SelectionDAG &DAG,
> +                                       const SDLoc &DL,
>                                          const X86Subtarget &Subtarget,
>                                          const TargetLowering &TLI) {
>     if (!TLI.isTypeLegal(In.getValueType()))
>       return SDValue();
>     if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
>       return SDValue();
> -  return detectUSatPattern(In, VT);
> +  return detectUSatPattern(In, VT, DAG, DL);
>   }
>   
>   static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
> @@ -34675,7 +34709,7 @@ static SDValue combineTruncateWithSat(SD
>         isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) {
>       if (auto SSatVal = detectSSatPattern(In, VT))
>         return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
> -    if (auto USatVal = detectUSatPattern(In, VT))
> +    if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
>         return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
>     }
>     if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
> @@ -35350,9 +35384,8 @@ static SDValue combineStore(SDNode *N, S
>         return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
>                                dl, Val, St->getBasePtr(),
>                                St->getMemoryVT(), St->getMemOperand(), DAG);
> -    if (SDValue Val =
> -        detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
> -                                TLI))
> +    if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(),
> +                                              DAG, dl, Subtarget, TLI))
>         return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
>                                dl, Val, St->getBasePtr(),
>                                St->getMemoryVT(), St->getMemOperand(), DAG);
> 
> Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=332336&r1=332335&r2=332336&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Tue May 15 03:24:12 2018
> @@ -790,8 +790,7 @@ define void @smax_usat_trunc_wb_256_mem1
>   ; SKX:       ## %bb.0:
>   ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
> -; SKX-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
> -; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
> +; SKX-NEXT:    vpmovuswb %ymm0, (%rdi)
>   ; SKX-NEXT:    vzeroupper
>   ; SKX-NEXT:    retq
>     %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
> @@ -817,10 +816,9 @@ define void @smax_usat_trunc_wb_256_mem2
>   ;
>   ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
>   ; SKX:       ## %bb.0:
> -; SKX-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
>   ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
> -; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
> +; SKX-NEXT:    vpmovuswb %ymm0, (%rdi)
>   ; SKX-NEXT:    vzeroupper
>   ; SKX-NEXT:    retq
>     %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> @@ -847,8 +845,7 @@ define <16 x i8> @smax_usat_trunc_wb_256
>   ; SKX:       ## %bb.0:
>   ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; SKX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
> -; SKX-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
> -; SKX-NEXT:    vpmovwb %ymm0, %xmm0
> +; SKX-NEXT:    vpmovuswb %ymm0, %xmm0
>   ; SKX-NEXT:    vzeroupper
>   ; SKX-NEXT:    retq
>     %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
> @@ -873,8 +870,7 @@ define void @smax_usat_trunc_wb_128_mem(
>   ; SKX:       ## %bb.0:
>   ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; SKX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
> -; SKX-NEXT:    vpminsw {{.*}}(%rip), %xmm0, %xmm0
> -; SKX-NEXT:    vpmovwb %xmm0, (%rdi)
> +; SKX-NEXT:    vpmovuswb %xmm0, (%rdi)
>   ; SKX-NEXT:    retq
>     %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
>     %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
> @@ -890,8 +886,7 @@ define void @smax_usat_trunc_db_512_mem(
>   ; ALL:       ## %bb.0:
>   ; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; ALL-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
> -; ALL-NEXT:    vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
> -; ALL-NEXT:    vpmovdb %zmm0, (%rdi)
> +; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
>   ; ALL-NEXT:    vzeroupper
>   ; ALL-NEXT:    retq
>     %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> @@ -908,8 +903,7 @@ define void @smax_usat_trunc_qb_512_mem(
>   ; ALL:       ## %bb.0:
>   ; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; ALL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
> -; ALL-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
> -; ALL-NEXT:    vpmovqb %zmm0, (%rdi)
> +; ALL-NEXT:    vpmovusqb %zmm0, (%rdi)
>   ; ALL-NEXT:    vzeroupper
>   ; ALL-NEXT:    retq
>     %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
> @@ -926,8 +920,7 @@ define void @smax_usat_trunc_qd_512_mem(
>   ; ALL:       ## %bb.0:
>   ; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; ALL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
> -; ALL-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
> -; ALL-NEXT:    vpmovqd %zmm0, (%rdi)
> +; ALL-NEXT:    vpmovusqd %zmm0, (%rdi)
>   ; ALL-NEXT:    vzeroupper
>   ; ALL-NEXT:    retq
>     %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
> @@ -944,8 +937,7 @@ define void @smax_usat_trunc_qw_512_mem(
>   ; ALL:       ## %bb.0:
>   ; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; ALL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
> -; ALL-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
> -; ALL-NEXT:    vpmovqw %zmm0, (%rdi)
> +; ALL-NEXT:    vpmovusqw %zmm0, (%rdi)
>   ; ALL-NEXT:    vzeroupper
>   ; ALL-NEXT:    retq
>     %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
> @@ -961,13 +953,10 @@ define <32 x i8> @smax_usat_trunc_db_102
>   ; KNL-LABEL: smax_usat_trunc_db_1024:
>   ; KNL:       ## %bb.0:
>   ; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
> -; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
>   ; KNL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
> -; KNL-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
> -; KNL-NEXT:    vpminsd %zmm2, %zmm1, %zmm1
> -; KNL-NEXT:    vpminsd %zmm2, %zmm0, %zmm0
> -; KNL-NEXT:    vpmovdb %zmm0, %xmm0
> -; KNL-NEXT:    vpmovdb %zmm1, %xmm1
> +; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
> +; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
> +; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
>   ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
>   ; KNL-NEXT:    retq
>   ;
> @@ -996,13 +985,10 @@ define void @smax_usat_trunc_db_1024_mem
>   ; KNL-LABEL: smax_usat_trunc_db_1024_mem:
>   ; KNL:       ## %bb.0:
>   ; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
> -; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
>   ; KNL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
> -; KNL-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
> -; KNL-NEXT:    vpminsd %zmm2, %zmm1, %zmm1
> -; KNL-NEXT:    vpminsd %zmm2, %zmm0, %zmm0
> -; KNL-NEXT:    vpmovdb %zmm0, %xmm0
> -; KNL-NEXT:    vpmovdb %zmm1, %xmm1
> +; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
> +; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
> +; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
>   ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
>   ; KNL-NEXT:    vmovdqu %ymm0, (%rdi)
>   ; KNL-NEXT:    vzeroupper
> @@ -1036,8 +1022,7 @@ define <16 x i16> @smax_usat_trunc_dw_51
>   ; ALL:       ## %bb.0:
>   ; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; ALL-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
> -; ALL-NEXT:    vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
> -; ALL-NEXT:    vpmovdw %zmm0, %ymm0
> +; ALL-NEXT:    vpmovusdw %zmm0, %ymm0
>   ; ALL-NEXT:    retq
>     %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
>     %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll?rev=332336&r1=332335&r2=332336&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll Tue May 15 03:24:12 2018
> @@ -244,10 +244,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i
>   ;
>   ; AVX512VL-LABEL: trunc_packus_v4i64_v4i32:
>   ; AVX512VL:       # %bb.0:
> -; AVX512VL-NEXT:    vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
>   ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
> -; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
> +; AVX512VL-NEXT:    vpmovusqd %ymm0, %xmm0
>   ; AVX512VL-NEXT:    vzeroupper
>   ; AVX512VL-NEXT:    retq
>   ;
> @@ -265,10 +264,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i
>   ;
>   ; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i32:
>   ; AVX512BWVL:       # %bb.0:
> -; AVX512BWVL-NEXT:    vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
>   ; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512BWVL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
> -; AVX512BWVL-NEXT:    vpmovqd %ymm0, %xmm0
> +; AVX512BWVL-NEXT:    vpmovusqd %ymm0, %xmm0
>   ; AVX512BWVL-NEXT:    vzeroupper
>   ; AVX512BWVL-NEXT:    retq
>     %1 = icmp slt <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
> @@ -665,10 +663,9 @@ define <8 x i32> @trunc_packus_v8i64_v8i
>   ;
>   ; AVX512-LABEL: trunc_packus_v8i64_v8i32:
>   ; AVX512:       # %bb.0:
> -; AVX512-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
>   ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
> -; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
> +; AVX512-NEXT:    vpmovusqd %zmm0, %ymm0
>   ; AVX512-NEXT:    retq
>     %1 = icmp slt <8 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
>     %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
> @@ -1070,10 +1067,9 @@ define <8 x i16> @trunc_packus_v8i64_v8i
>   ;
>   ; AVX512-LABEL: trunc_packus_v8i64_v8i16:
>   ; AVX512:       # %bb.0:
> -; AVX512-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
>   ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
> -; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
> +; AVX512-NEXT:    vpmovusqw %zmm0, %xmm0
>   ; AVX512-NEXT:    vzeroupper
>   ; AVX512-NEXT:    retq
>     %1 = icmp slt <8 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
> @@ -1170,10 +1166,9 @@ define <8 x i16> @trunc_packus_v8i32_v8i
>   ;
>   ; AVX512VL-LABEL: trunc_packus_v8i32_v8i16:
>   ; AVX512VL:       # %bb.0:
> -; AVX512VL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
>   ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512VL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
> -; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
> +; AVX512VL-NEXT:    vpmovusdw %ymm0, %xmm0
>   ; AVX512VL-NEXT:    vzeroupper
>   ; AVX512VL-NEXT:    retq
>   ;
> @@ -1190,10 +1185,9 @@ define <8 x i16> @trunc_packus_v8i32_v8i
>   ;
>   ; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16:
>   ; AVX512BWVL:       # %bb.0:
> -; AVX512BWVL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
>   ; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512BWVL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
> -; AVX512BWVL-NEXT:    vpmovdw %ymm0, %xmm0
> +; AVX512BWVL-NEXT:    vpmovusdw %ymm0, %xmm0
>   ; AVX512BWVL-NEXT:    vzeroupper
>   ; AVX512BWVL-NEXT:    retq
>     %1 = icmp slt <8 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> @@ -1325,10 +1319,9 @@ define <16 x i16> @trunc_packus_v16i32_v
>   ;
>   ; AVX512-LABEL: trunc_packus_v16i32_v16i16:
>   ; AVX512:       # %bb.0:
> -; AVX512-NEXT:    vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
>   ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
> -; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
> +; AVX512-NEXT:    vpmovusdw %zmm0, %ymm0
>   ; AVX512-NEXT:    retq
>     %1 = icmp slt <16 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
>     %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> @@ -2130,10 +2123,9 @@ define void @trunc_packus_v8i64_v8i8_sto
>   ;
>   ; AVX512-LABEL: trunc_packus_v8i64_v8i8_store:
>   ; AVX512:       # %bb.0:
> -; AVX512-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
>   ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
> -; AVX512-NEXT:    vpmovqb %zmm0, (%rdi)
> +; AVX512-NEXT:    vpmovusqb %zmm0, (%rdi)
>   ; AVX512-NEXT:    vzeroupper
>   ; AVX512-NEXT:    retq
>     %1 = icmp slt <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
> @@ -3042,10 +3034,9 @@ define void @trunc_packus_v8i32_v8i8_sto
>   ;
>   ; AVX512VL-LABEL: trunc_packus_v8i32_v8i8_store:
>   ; AVX512VL:       # %bb.0:
> -; AVX512VL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
>   ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512VL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
> -; AVX512VL-NEXT:    vpmovdb %ymm0, (%rdi)
> +; AVX512VL-NEXT:    vpmovusdb %ymm0, (%rdi)
>   ; AVX512VL-NEXT:    vzeroupper
>   ; AVX512VL-NEXT:    retq
>   ;
> @@ -3063,10 +3054,9 @@ define void @trunc_packus_v8i32_v8i8_sto
>   ;
>   ; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8_store:
>   ; AVX512BWVL:       # %bb.0:
> -; AVX512BWVL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
>   ; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512BWVL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
> -; AVX512BWVL-NEXT:    vpmovdb %ymm0, (%rdi)
> +; AVX512BWVL-NEXT:    vpmovusdb %ymm0, (%rdi)
>   ; AVX512BWVL-NEXT:    vzeroupper
>   ; AVX512BWVL-NEXT:    retq
>     %1 = icmp slt <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
> @@ -3107,10 +3097,9 @@ define <16 x i8> @trunc_packus_v16i32_v1
>   ;
>   ; AVX512-LABEL: trunc_packus_v16i32_v16i8:
>   ; AVX512:       # %bb.0:
> -; AVX512-NEXT:    vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
>   ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
> -; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
> +; AVX512-NEXT:    vpmovusdb %zmm0, %xmm0
>   ; AVX512-NEXT:    vzeroupper
>   ; AVX512-NEXT:    retq
>     %1 = icmp slt <16 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
> @@ -3173,10 +3162,9 @@ define <16 x i8> @trunc_packus_v16i16_v1
>   ;
>   ; AVX512BWVL-LABEL: trunc_packus_v16i16_v16i8:
>   ; AVX512BWVL:       # %bb.0:
> -; AVX512BWVL-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
>   ; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512BWVL-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
> -; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
> +; AVX512BWVL-NEXT:    vpmovuswb %ymm0, %xmm0
>   ; AVX512BWVL-NEXT:    vzeroupper
>   ; AVX512BWVL-NEXT:    retq
>     %1 = icmp slt <16 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> @@ -3242,18 +3230,16 @@ define <32 x i8> @trunc_packus_v32i16_v3
>   ;
>   ; AVX512BW-LABEL: trunc_packus_v32i16_v32i8:
>   ; AVX512BW:       # %bb.0:
> -; AVX512BW-NEXT:    vpminsw {{.*}}(%rip), %zmm0, %zmm0
>   ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
> -; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
> +; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm0
>   ; AVX512BW-NEXT:    retq
>   ;
>   ; AVX512BWVL-LABEL: trunc_packus_v32i16_v32i8:
>   ; AVX512BWVL:       # %bb.0:
> -; AVX512BWVL-NEXT:    vpminsw {{.*}}(%rip), %zmm0, %zmm0
>   ; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>   ; AVX512BWVL-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
> -; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
> +; AVX512BWVL-NEXT:    vpmovuswb %zmm0, %ymm0
>   ; AVX512BWVL-NEXT:    retq
>     %1 = icmp slt <32 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
>     %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
> 


More information about the llvm-commits mailing list