[llvm] r336900 - [X86][AVX] Use Zeroable mask to improve shuffle mask widening
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 12 06:03:59 PDT 2018
Author: rksimon
Date: Thu Jul 12 06:03:58 2018
New Revision: 336900
URL: http://llvm.org/viewvc/llvm-project?rev=336900&view=rev
Log:
[X86][AVX] Use Zeroable mask to improve shuffle mask widening
Noticed while updating D42044, lowerV2X128VectorShuffle can improve the shuffle mask with the zeroable data to create a target shuffle mask to recognise more 'zero upper 128' patterns.
NOTE: lowerV4X128VectorShuffle could benefit as well but the code needs refactoring first to discriminate between SM_SentinelUndef and SM_SentinelZero for negative shuffle indices.
Differential Revision: https://reviews.llvm.org/D49092
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-cast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=336900&r1=336899&r2=336900&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 12 06:03:58 2018
@@ -4973,6 +4973,19 @@ static bool canWidenShuffleElements(Arra
return true;
}
+static bool canWidenShuffleElements(ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ SmallVectorImpl<int> &WidenedMask) {
+ SmallVector<int, 32> TargetMask(Mask.begin(), Mask.end());
+ for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
+ if (TargetMask[i] == SM_SentinelUndef)
+ continue;
+ if (Zeroable[i])
+ TargetMask[i] = SM_SentinelZero;
+ }
+ return canWidenShuffleElements(TargetMask, WidenedMask);
+}
+
static bool canWidenShuffleElements(ArrayRef<int> Mask) {
SmallVector<int, 32> WidenedMask;
return canWidenShuffleElements(Mask, WidenedMask);
@@ -13144,7 +13157,7 @@ static SDValue lowerV2X128VectorShuffle(
return SDValue();
SmallVector<int, 4> WidenedMask;
- if (!canWidenShuffleElements(Mask, WidenedMask))
+ if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask))
return SDValue();
bool IsLowZero = (Zeroable & 0x3) == 0x3;
@@ -13213,7 +13226,8 @@ static SDValue lowerV2X128VectorShuffle(
// [6] - ignore
// [7] - zero high half of destination
- assert(WidenedMask[0] >= 0 && WidenedMask[1] >= 0 && "Undef half?");
+ assert((WidenedMask[0] >= 0 || IsLowZero) &&
+ (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?");
unsigned PermMask = 0;
PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
@@ -14326,6 +14340,7 @@ static SDValue lowerV4X128VectorShuffle(
// function lowerV2X128VectorShuffle() is better solution.
assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
+ // TODO - use Zeroable like we do for lowerV2X128VectorShuffle?
SmallVector<int, 4> WidenedMask;
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/avx-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-cast.ll?rev=336900&r1=336899&r2=336900&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-cast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-cast.ll Thu Jul 12 06:03:58 2018
@@ -20,9 +20,7 @@ define <8 x float> @castA(<4 x float> %m
define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castB:
; AVX: ## %bb.0:
-; AVX-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT: vmovaps %xmm0, %xmm0
; AVX-NEXT: retq
%shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x double> %shuffle.i
@@ -33,9 +31,7 @@ define <4 x double> @castB(<2 x double>
define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castC:
; AVX: ## %bb.0:
-; AVX-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT: vmovaps %xmm0, %xmm0
; AVX-NEXT: retq
%shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x i64> %shuffle.i
More information about the llvm-commits
mailing list