[llvm] r373846 - [X86][SSE] matchVectorShuffleAsBlend - use Zeroable element mask directly.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 6 05:38:39 PDT 2019
Author: rksimon
Date: Sun Oct 6 05:38:38 2019
New Revision: 373846
URL: http://llvm.org/viewvc/llvm-project?rev=373846&view=rev
Log:
[X86][SSE] matchVectorShuffleAsBlend - use Zeroable element mask directly.
We can make use of the Zeroable mask to indicate which elements we can safely set to zero instead of creating a target shuffle mask on the fly.
This allows us to remove createTargetShuffleMask.
This is part of the work to fix PR43024 and allow us to use SimplifyDemandedElts to simplify shuffle chains - we need to get to a point where the target shuffle masks isn't adjusted by its source inputs in setTargetShuffleZeroElements but instead we cache them in a parallel Zeroable mask.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/packss.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=373846&r1=373845&r2=373846&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Oct 6 05:38:38 2019
@@ -10298,25 +10298,6 @@ static bool isTargetShuffleEquivalent(Ar
return true;
}
-// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
-// mask.
-// TODO: Do we need this? It might be better to use Mask+Zeroable directly.
-static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
- const APInt &Zeroable) {
- int NumElts = Mask.size();
- assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");
-
- SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
- for (int i = 0; i != NumElts; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
- TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
- }
- return TargetMask;
-}
-
// Attempt to create a shuffle mask from a VSELECT condition mask.
static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
SDValue Cond) {
@@ -10967,9 +10948,9 @@ static SDValue getVectorMaskingNode(SDVa
SelectionDAG &DAG);
static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
- MutableArrayRef<int> TargetMask,
- bool &ForceV1Zero, bool &ForceV2Zero,
- uint64_t &BlendMask) {
+ MutableArrayRef<int> Mask,
+ const APInt &Zeroable, bool &ForceV1Zero,
+ bool &ForceV2Zero, uint64_t &BlendMask) {
bool V1IsZeroOrUndef =
V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZeroOrUndef =
@@ -10977,13 +10958,12 @@ static bool matchVectorShuffleAsBlend(SD
BlendMask = 0;
ForceV1Zero = false, ForceV2Zero = false;
- assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
+ assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
- // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
- for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
- int M = TargetMask[i];
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
if (M == i)
@@ -10992,16 +10972,16 @@ static bool matchVectorShuffleAsBlend(SD
BlendMask |= 1ull << i;
continue;
}
- if (M == SM_SentinelZero) {
+ if (Zeroable[i]) {
if (V1IsZeroOrUndef) {
ForceV1Zero = true;
- TargetMask[i] = i;
+ Mask[i] = i;
continue;
}
if (V2IsZeroOrUndef) {
ForceV2Zero = true;
BlendMask |= 1ull << i;
- TargetMask[i] = i + Size;
+ Mask[i] = i + Size;
continue;
}
}
@@ -11030,11 +11010,10 @@ static SDValue lowerShuffleAsBlend(const
const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
-
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
- if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
+ SmallVector<int, 64> Mask(Original.begin(), Original.end());
+ if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
BlendMask))
return SDValue();
@@ -32099,8 +32078,8 @@ static bool matchBinaryPermuteShuffle(
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
- if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
- BlendMask)) {
+ if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
+ ForceV2Zero, BlendMask)) {
if (MaskVT == MVT::v16i16) {
// We can only use v16i16 PBLENDW if the lanes are repeated.
SmallVector<int, 8> RepeatedMask;
Modified: llvm/trunk/test/CodeGen/X86/packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/packss.ll?rev=373846&r1=373845&r2=373846&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/packss.ll Sun Oct 6 05:38:38 2019
@@ -356,18 +356,18 @@ define <32 x i8> @packsswb_icmp_zero_tru
;
; AVX1-LABEL: packsswb_icmp_zero_trunc_256:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = zero,zero,ymm0[0,1]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
More information about the llvm-commits
mailing list