[llvm] r314631 - [X86][SSE] Add faux shuffle combining support for PACKUS
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 1 11:43:48 PDT 2017
Author: rksimon
Date: Sun Oct 1 11:43:48 2017
New Revision: 314631
URL: http://llvm.org/viewvc/llvm-project?rev=314631&view=rev
Log:
[X86][SSE] Add faux shuffle combining support for PACKUS
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314631&r1=314630&r2=314631&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Oct 1 11:43:48 2017
@@ -5931,7 +5931,8 @@ static bool getFauxShuffleMask(SDValue N
Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
return true;
}
- case X86ISD::PACKSS: {
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
@@ -5940,9 +5941,19 @@ static bool getFauxShuffleMask(SDValue N
// If we know input saturation won't happen we can treat this
// as a truncation shuffle.
- if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
- DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
- return false;
+ if (Opcode == X86ISD::PACKSS) {
+ if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
+ DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
+ return false;
+ } else {
+ KnownBits Known0, Known1;
+ DAG.computeKnownBits(N0, Known0);
+ if (Known0.countMinLeadingZeros() < NumBitsPerElt)
+ return false;
+ DAG.computeKnownBits(N1, Known1);
+ if (Known1.countMinLeadingZeros() < NumBitsPerElt)
+ return false;
+ }
bool IsUnary = (N0 == N1);
unsigned Offset = IsUnary ? 0 : NumElts;
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=314631&r1=314630&r2=314631&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sun Oct 1 11:43:48 2017
@@ -846,16 +846,12 @@ declare <32 x i8> @llvm.x86.avx2.packssw
define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
; X32-LABEL: shuffle_combine_packusdw_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpsrld $16, %ymm0, %ymm0
-; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packusdw_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpsrld $16, %ymm0, %ymm0
-; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19]
; X64-NEXT: retq
%1 = lshr <8 x i32> %a0, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
@@ -867,18 +863,12 @@ declare <16 x i16> @llvm.x86.avx2.packus
define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
; X32-LABEL: shuffle_combine_packuswb_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
-; X32-NEXT: vpsrlw $8, %ymm1, %ymm1
-; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packuswb_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
-; X64-NEXT: vpsrlw $8, %ymm1, %ymm1
-; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17]
; X64-NEXT: retq
%1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=314631&r1=314630&r2=314631&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sun Oct 1 11:43:48 2017
@@ -683,18 +683,12 @@ declare <16 x i8> @llvm.x86.sse2.packssw
define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: shuffle_combine_packuswb_pshufb:
; SSE: # BB#0:
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: psrlw $8, %xmm1
-; SSE-NEXT: packuswb %xmm1, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_combine_packuswb_pshufb:
; AVX: # BB#0:
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1]
; AVX-NEXT: retq
%1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
More information about the llvm-commits
mailing list