[llvm] r287461 - [X86][SSE] Improve PSHUFB lowering from either input
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 19 12:41:49 PST 2016
Author: rksimon
Date: Sat Nov 19 14:41:48 2016
New Revision: 287461
URL: http://llvm.org/viewvc/llvm-project?rev=287461&view=rev
Log:
[X86][SSE] Improve PSHUFB lowering from either input
Canonicalization may leave the zeroable vector in the first input.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=287461&r1=287460&r2=287461&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 19 14:41:48 2016
@@ -7582,8 +7582,7 @@ static SmallBitVector computeZeroableShu
return Zeroable;
}
-/// Try to lower a shuffle with a single PSHUFB of V1.
-/// This is only possible if V2 is unused (at all, or only for zero elements).
+/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2,
@@ -7603,6 +7602,7 @@ static SDValue lowerVectorShuffleWithPSH
// Sign bit set in i8 mask means zero element.
SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
+ SDValue V;
for (int i = 0; i < NumBytes; ++i) {
int M = Mask[i / NumEltBytes];
if (M < 0) {
@@ -7613,9 +7613,13 @@ static SDValue lowerVectorShuffleWithPSH
PSHUFBMask[i] = ZeroMask;
continue;
}
- // Only allow V1.
- if (M >= Size)
+
+ // We can only use a single input of V1 or V2.
+ SDValue SrcV = (M >= Size ? V2 : V1);
+ if (V && V != SrcV)
return SDValue();
+ V = SrcV;
+ M %= Size;
// PSHUFB can't cross lanes, ensure this doesn't happen.
if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
@@ -7625,10 +7629,11 @@ static SDValue lowerVectorShuffleWithPSH
M = M * NumEltBytes + (i % NumEltBytes);
PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
}
+ assert(V && "Failed to find a source input");
MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V1),
+ VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll?rev=287461&r1=287460&r2=287461&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll Sat Nov 19 14:41:48 2016
@@ -120,11 +120,7 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzz
;
; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: movl $255, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm1
-; AVX512BW-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
@@ -137,10 +133,7 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzz
;
; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
; AVX512VBMI: # BB#0:
-; AVX512VBMI-NEXT: vmovdqu8 {{.*#+}} zmm2 = [64,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]
-; AVX512VBMI-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm2, %zmm1
-; AVX512VBMI-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512VBMI-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI-NEXT: retq
%shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 0, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
ret <64 x i8> %shuffle
More information about the llvm-commits
mailing list