[llvm] r293431 - [X86 Codegen] Fixed a bug in unsigned saturation

Elena Demikhovsky via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 29 05:18:30 PST 2017


Author: delena
Date: Sun Jan 29 07:18:30 2017
New Revision: 293431

URL: http://llvm.org/viewvc/llvm-project?rev=293431&view=rev
Log:
[X86 Codegen] Fixed a bug in unsigned saturation

PACKUSWB converts Signed word to Unsigned byte, (the same about DW) and it can't be used for umin+truncate pattern.
AVX-512 VPMOVUS* instructions fit the pattern since they convert Unsigned to Unsigned.

See https://llvm.org/bugs/show_bug.cgi?id=31773

Differential Revision: https://reviews.llvm.org/D29196


Added:
    llvm/trunk/test/CodeGen/X86/pr31773.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-trunc.ll
    llvm/trunk/test/CodeGen/X86/avx512-trunc.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=293431&r1=293430&r2=293431&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jan 29 07:18:30 2017
@@ -31382,21 +31382,6 @@ static bool isSATValidOnAVX512Subtarget(
   return false;
 }
 
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
-  if (Subtarget.hasSSE2())
-    // v16i16 -> v16i8
-    if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
-      return true;
-  if (Subtarget.hasSSE41())
-    // v8i32 -> v8i16
-    if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
-      return true;
-  return false;
-}
-
 /// Detect a pattern of truncation with saturation:
 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
 /// Return the source value to be truncated or SDValue() if the pattern was not
@@ -31437,16 +31422,9 @@ combineTruncateWithUSat(SDValue In, EVT
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
     return SDValue();
-  SDValue USatVal = detectUSatPattern(In, VT);
-  if (USatVal) {
+  if (auto USatVal = detectUSatPattern(In, VT))
     if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
       return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
-    if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
-      SDValue Lo, Hi;
-      std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
-      return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
-    }
-  }
   return SDValue();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/avx-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-trunc.ll?rev=293431&r1=293430&r2=293431&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-trunc.ll Sun Jan 29 07:18:30 2017
@@ -40,28 +40,4 @@ define <16 x i8> @trunc_16_8(<16 x i16>
   ret <16 x i8> %B
 }
 
-define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
-; CHECK-LABEL: usat_trunc_wb_256:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  %x6 = trunc <16 x i16> %x5 to <16 x i8>
-  ret <16 x i8> %x6
-}
 
-define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
-; CHECK-LABEL: usat_trunc_dw_256:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  %x6 = trunc <8 x i32> %x5 to <8 x i16>
-  ret <8 x i16> %x6
-}

Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=293431&r1=293430&r2=293431&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Sun Jan 29 07:18:30 2017
@@ -505,8 +505,9 @@ define void @trunc_wb_128_mem(<8 x i16>
 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
 ; KNL-LABEL: usat_trunc_wb_256_mem:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; KNL-NEXT:    retq
 ;
@@ -524,8 +525,9 @@ define void @usat_trunc_wb_256_mem(<16 x
 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
 ; KNL-LABEL: usat_trunc_wb_256:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: usat_trunc_wb_256:

Added: llvm/trunk/test/CodeGen/X86/pr31773.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr31773.ll?rev=293431&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr31773.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr31773.ll Sun Jan 29 07:18:30 2017
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number.
+define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
+; CHECK-LABEL: usat_trunc_wb_256:
+; CHECK-NOT:    vpackuswb %xmm1, %xmm0, %xmm0
+  %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %x6 = trunc <16 x i16> %x5 to <16 x i8>
+  ret <16 x i8> %x6
+}
+ 
+define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
+; CHECK-LABEL: usat_trunc_dw_256:
+; CHECK-NOT:    vpackusdw %xmm1, %xmm0, %xmm0
+  %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %x6 = trunc <8 x i32> %x5 to <8 x i16>
+  ret <8 x i16> %x6
+}




More information about the llvm-commits mailing list