[llvm] r323553 - [X86][AVX512] Add combining support for X86ISD::VTRUNCS

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 26 12:01:13 PST 2018


Author: rksimon
Date: Fri Jan 26 12:01:12 2018
New Revision: 323553

URL: http://llvm.org/viewvc/llvm-project?rev=323553&view=rev
Log:
[X86][AVX512] Add combining support for X86ISD::VTRUNCS

Similar to the existing support for X86ISD::VTRUNCUS.

Differential Revision: https://reviews.llvm.org/D42544

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323553&r1=323552&r2=323553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 26 12:01:12 2018
@@ -34109,6 +34109,41 @@ static SDValue detectUSatPattern(SDValue
   return SDValue();
 }
 
+/// Detect patterns of truncation with signed saturation:
+/// (truncate (smin ((smax (x, signed_min_of_dest_type)),
+///                  signed_max_of_dest_type)) to dest_type)
+/// or:
+/// (truncate (smax ((smin (x, signed_max_of_dest_type)),
+///                  signed_min_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectSSatPattern(SDValue In, EVT VT) {
+  unsigned NumDstBits = VT.getScalarSizeInBits();
+  unsigned NumSrcBits = In.getScalarValueSizeInBits();
+  assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
+
+  auto MatchMinMax = [](SDValue V, unsigned Opcode, const APInt &Limit) {
+    APInt C;
+    if (V.getOpcode() == Opcode &&
+        ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit)
+      return V.getOperand(0);
+    return SDValue();
+  };
+
+  APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
+  APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
+
+  if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax))
+    if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin))
+      return SMax;
+
+  if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin))
+    if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax))
+      return SMin;
+
+  return SDValue();
+}
+
 /// Detect a pattern of truncation with saturation:
 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
 /// The types should allow to use VPMOVUS* instruction on AVX512.
@@ -34121,15 +34156,18 @@ static SDValue detectAVX512USatPattern(S
   return detectUSatPattern(In, VT);
 }
 
-static SDValue combineTruncateWithUSat(SDValue In, EVT VT, const SDLoc &DL,
-                                       SelectionDAG &DAG,
-                                       const X86Subtarget &Subtarget) {
+static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
+                                      SelectionDAG &DAG,
+                                      const X86Subtarget &Subtarget) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
     return SDValue();
-  if (auto USatVal = detectUSatPattern(In, VT))
-    if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+  if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) {
+    if (auto SSatVal = detectSSatPattern(In, VT))
+      return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
+    if (auto USatVal = detectUSatPattern(In, VT))
       return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
+  }
   return SDValue();
 }
 
@@ -35393,8 +35431,8 @@ static SDValue combineTruncate(SDNode *N
   if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
     return Avg;
 
-  // Try to combine truncation with unsigned saturation.
-  if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
+  // Try to combine truncation with signed/unsigned saturation.
+  if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
     return Val;
 
   // The bitcast source is a direct mmx result.

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll?rev=323553&r1=323552&r2=323553&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll Fri Jan 26 12:01:12 2018
@@ -258,9 +258,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32
 ;
 ; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
+; AVX512VL-NEXT:    vpmovsqd %ymm0, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
@@ -278,9 +276,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32
 ;
 ; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32:
 ; AVX512BWVL:       # %bb.0:
-; AVX512BWVL-NEXT:    vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT:    vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512BWVL-NEXT:    vpmovqd %ymm0, %xmm0
+; AVX512BWVL-NEXT:    vpmovsqd %ymm0, %xmm0
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
   %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
@@ -705,9 +701,7 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32
 ;
 ; AVX512-LABEL: trunc_ssat_v8i64_v8i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
+; AVX512-NEXT:    vpmovsqd %zmm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
   %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
@@ -1172,9 +1166,7 @@ define <8 x i16> @trunc_ssat_v8i64_v8i16
 ;
 ; AVX512-LABEL: trunc_ssat_v8i64_v8i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
+; AVX512-NEXT:    vpmovsqw %zmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767>
@@ -1302,9 +1294,7 @@ define <8 x i16> @trunc_ssat_v8i32_v8i16
 ;
 ; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
+; AVX512VL-NEXT:    vpmovsdw %ymm0, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
@@ -1321,9 +1311,7 @@ define <8 x i16> @trunc_ssat_v8i32_v8i16
 ;
 ; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16:
 ; AVX512BWVL:       # %bb.0:
-; AVX512BWVL-NEXT:    vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT:    vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT:    vpmovdw %ymm0, %xmm0
+; AVX512BWVL-NEXT:    vpmovsdw %ymm0, %xmm0
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
   %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
@@ -1511,9 +1499,7 @@ define <16 x i16> @trunc_ssat_v16i32_v16
 ;
 ; AVX512-LABEL: trunc_ssat_v16i32_v16i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512-NEXT:    vpmovsdw %zmm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
   %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
@@ -3231,9 +3217,7 @@ define <16 x i8> @trunc_ssat_v16i32_v16i
 ;
 ; AVX512-LABEL: trunc_ssat_v16i32_v16i8:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
@@ -3344,9 +3328,7 @@ define <16 x i8> @trunc_ssat_v16i16_v16i
 ;
 ; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8:
 ; AVX512BWVL:       # %bb.0:
-; AVX512BWVL-NEXT:    vpminsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BWVL-NEXT:    vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BWVL-NEXT:    vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT:    vpmovswb %ymm0, %xmm0
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
   %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
@@ -3500,16 +3482,12 @@ define <32 x i8> @trunc_ssat_v32i16_v32i
 ;
 ; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpminsw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT:    vpmaxsw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8:
 ; AVX512BWVL:       # %bb.0:
-; AVX512BWVL-NEXT:    vpminsw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vpmaxsw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT:    vpmovswb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
   %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
   %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>




More information about the llvm-commits mailing list