[llvm-branch-commits] [llvm] 5f5a254 - [X86] LowerBUILD_VECTOR - track zero/nonzero elements with APInt masks. NFCI.
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 14 08:33:41 PST 2020
Author: Simon Pilgrim
Date: 2020-12-14T16:28:45Z
New Revision: 5f5a2547c174cf1eaf7874ff02c198629fe02c22
URL: https://github.com/llvm/llvm-project/commit/5f5a2547c174cf1eaf7874ff02c198629fe02c22
DIFF: https://github.com/llvm/llvm-project/commit/5f5a2547c174cf1eaf7874ff02c198629fe02c22.diff
LOG: [X86] LowerBUILD_VECTOR - track zero/nonzero elements with APInt masks. NFCI.
Prep work for undef/zero 'upper elements' handling as proposed in D92645.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dade38c0538a..e4fcafc3352f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7982,7 +7982,7 @@ static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
}
// Use PINSRB/PINSRW/PINSRD to create a build vector.
-static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
+static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -7997,7 +7997,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
bool First = true;
for (unsigned i = 0; i < NumElts; ++i) {
- bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ bool IsNonZero = NonZeroMask[i];
if (!IsNonZero)
continue;
@@ -8024,7 +8024,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
}
/// Custom lower build_vector of v16i8.
-static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -8033,7 +8033,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// SSE4.1 - use PINSRB to insert each byte directly.
if (Subtarget.hasSSE41())
- return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
+ return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
Subtarget);
SDLoc dl(Op);
@@ -8041,8 +8041,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; i += 2) {
- bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
- bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0;
+ bool ThisIsNonZero = NonZeroMask[i];
+ bool NextIsNonZero = NonZeroMask[i + 1];
if (!ThisIsNonZero && !NextIsNonZero)
continue;
@@ -8090,7 +8090,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
/// Custom lower build_vector of v8i16.
-static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -8098,7 +8098,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return SDValue();
// Use PINSRW to insert each byte directly.
- return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
+ return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG,
Subtarget);
}
@@ -10176,10 +10176,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return BitOp;
unsigned EVTBits = EltVT.getSizeInBits();
-
- unsigned NumZero = 0;
- unsigned NumNonZero = 0;
- uint64_t NonZeros = 0;
+ APInt ZeroMask = APInt::getNullValue(NumElems);
+ APInt NonZeroMask = APInt::getNullValue(NumElems);
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
@@ -10192,15 +10190,16 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
IsAllConstants = false;
NumConstants--;
}
- if (X86::isZeroNode(Elt))
- NumZero++;
- else {
- assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
- NonZeros |= ((uint64_t)1 << i);
- NumNonZero++;
+ if (X86::isZeroNode(Elt)) {
+ ZeroMask.setBit(i);
+ } else {
+ NonZeroMask.setBit(i);
}
}
+ unsigned NumZero = ZeroMask.countPopulation();
+ unsigned NumNonZero = NonZeroMask.countPopulation();
+
// All undef vector. Return an UNDEF. All zero vectors were handled above.
if (NumNonZero == 0)
return DAG.getUNDEF(VT);
@@ -10267,7 +10266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue Item = Op.getOperand(Idx);
// If we have a constant or non-constant insertion into the low element of
@@ -10331,7 +10330,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
// Check if it's possible to issue this instead.
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
@@ -10400,7 +10399,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
- unsigned Idx = countTrailingZeros(NonZeros);
+ unsigned Idx = NonZeroMask.countTrailingZeros();
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
@@ -10410,12 +10409,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
- if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
+ if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget))
return V;
if (EVTBits == 16 && NumElems == 8)
- if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
+ if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget))
return V;
@@ -10428,7 +10427,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumElems == 4 && NumZero > 0) {
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < 4; ++i) {
- bool isZero = !(NonZeros & (1ULL << i));
+ bool isZero = !NonZeroMask[i];
if (isZero)
Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
@@ -10436,7 +10435,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
for (unsigned i = 0; i < 2; ++i) {
- switch ((NonZeros >> (i*2)) & 0x3) {
+ switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
default: llvm_unreachable("Unexpected NonZero count");
case 0:
Ops[i] = Ops[i*2]; // Must be a zero vector.
@@ -10453,8 +10452,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- bool Reverse1 = (NonZeros & 0x3) == 2;
- bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
+ bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
int MaskVec[] = {
Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1,
More information about the llvm-branch-commits
mailing list