[llvm] r293431 - [X86 Codegen] Fixed a bug in unsigned saturation
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 29 05:18:30 PST 2017
Author: delena
Date: Sun Jan 29 07:18:30 2017
New Revision: 293431
URL: http://llvm.org/viewvc/llvm-project?rev=293431&view=rev
Log:
[X86 Codegen] Fixed a bug in unsigned saturation
PACKUSWB converts Signed word to Unsigned byte, (the same about DW) and it can't be used for umin+truncate pattern.
AVX-512 VPMOVUS* instructions fit the pattern since they convert Unsigned to Unsigned.
See https://llvm.org/bugs/show_bug.cgi?id=31773
Differential Revision: https://reviews.llvm.org/D29196
Added:
llvm/trunk/test/CodeGen/X86/pr31773.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-trunc.ll
llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=293431&r1=293430&r2=293431&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jan 29 07:18:30 2017
@@ -31382,21 +31382,6 @@ static bool isSATValidOnAVX512Subtarget(
return false;
}
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
- if (Subtarget.hasSSE2())
- // v16i16 -> v16i8
- if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
- return true;
- if (Subtarget.hasSSE41())
- // v8i32 -> v8i16
- if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
- return true;
- return false;
-}
-
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// Return the source value to be truncated or SDValue() if the pattern was not
@@ -31437,16 +31422,9 @@ combineTruncateWithUSat(SDValue In, EVT
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
return SDValue();
- SDValue USatVal = detectUSatPattern(In, VT);
- if (USatVal) {
+ if (auto USatVal = detectUSatPattern(In, VT))
if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
- if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
- return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
- }
- }
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/X86/avx-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-trunc.ll?rev=293431&r1=293430&r2=293431&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-trunc.ll Sun Jan 29 07:18:30 2017
@@ -40,28 +40,4 @@ define <16 x i8> @trunc_16_8(<16 x i16>
ret <16 x i8> %B
}
-define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
-; CHECK-LABEL: usat_trunc_wb_256:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
- %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
- %x6 = trunc <16 x i16> %x5 to <16 x i8>
- ret <16 x i8> %x6
-}
-define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
-; CHECK-LABEL: usat_trunc_dw_256:
-; CHECK: # BB#0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
- %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
- %x6 = trunc <8 x i32> %x5 to <8 x i16>
- ret <8 x i16> %x6
-}
Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=293431&r1=293430&r2=293431&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Sun Jan 29 07:18:30 2017
@@ -505,8 +505,9 @@ define void @trunc_wb_128_mem(<8 x i16>
define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
; KNL-LABEL: usat_trunc_wb_256_mem:
; KNL: ## BB#0:
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vmovdqu %xmm0, (%rdi)
; KNL-NEXT: retq
;
@@ -524,8 +525,9 @@ define void @usat_trunc_wb_256_mem(<16 x
define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
; KNL-LABEL: usat_trunc_wb_256:
; KNL: ## BB#0:
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: usat_trunc_wb_256:
Added: llvm/trunk/test/CodeGen/X86/pr31773.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr31773.ll?rev=293431&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr31773.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr31773.ll Sun Jan 29 07:18:30 2017
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number.
+define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
+; CHECK-LABEL: usat_trunc_wb_256:
+; CHECK-NOT: vpackuswb %xmm1, %xmm0, %xmm0
+ %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <16 x i16> %x5 to <16 x i8>
+ ret <16 x i8> %x6
+}
+
+define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
+; CHECK-LABEL: usat_trunc_dw_256:
+; CHECK-NOT: vpackusdw %xmm1, %xmm0, %xmm0
+ %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x6 = trunc <8 x i32> %x5 to <8 x i16>
+ ret <8 x i16> %x6
+}
More information about the llvm-commits
mailing list