[llvm] r294896 - [X86][AVX2] Add support for combining target shuffles to VPMOVZX
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 12 06:31:24 PST 2017
Author: rksimon
Date: Sun Feb 12 08:31:23 2017
New Revision: 294896
URL: http://llvm.org/viewvc/llvm-project?rev=294896&view=rev
Log:
[X86][AVX2] Add support for combining target shuffles to VPMOVZX
Initial 256-bit vector support - 512-bit support requires extra checks for AVX512BW support (PMOVZXBW) that will be handled in a future patch.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294896&r1=294895&r2=294896&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 12 08:31:23 2017
@@ -26264,7 +26264,8 @@ bool X86TargetLowering::isGAPlusOffset(S
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
+ bool FloatDomain, SDValue &V1, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
@@ -26280,8 +26281,9 @@ static bool matchUnaryVectorShuffle(MVT
}
// Match against a VZEXT instruction.
- // TODO: Add 256/512-bit vector support.
- if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) {
+ // TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
+ if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
bool Match = true;
@@ -26291,7 +26293,10 @@ static bool matchUnaryVectorShuffle(MVT
Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
}
if (Match) {
- SrcVT = MaskVT;
+ unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
+ SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize);
+ if (SrcVT != MaskVT)
+ V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
Shuffle = X86ISD::VZEXT;
@@ -26908,8 +26913,8 @@ static bool combineX86ShuffleChain(Array
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
- ShuffleSrcVT, ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, DL, DAG,
+ Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=294896&r1=294895&r2=294896&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sun Feb 12 08:31:23 2017
@@ -480,14 +480,12 @@ define <8 x float> @combine_permps_as_pe
define <4 x i64> @combine_pshufb_as_zext(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_as_zext:
; X32: # BB#0:
-; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,1]
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[20,21],zero,zero,zero,zero,zero,zero,ymm0[22,23],zero,zero,zero,zero,zero,zero
+; X32-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_as_zext:
; X64: # BB#0:
-; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,1]
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[10,11],zero,zero,zero,zero,zero,zero,ymm0[20,21],zero,zero,zero,zero,zero,zero,ymm0[22,23],zero,zero,zero,zero,zero,zero
+; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-NEXT: retq
%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 8, i8 9, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 10, i8 11, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 4, i8 5, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
More information about the llvm-commits
mailing list