[llvm] [Instructions] cache computed shufflevector properties (PR #115536)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 7 18:40:46 PST 2024
https://github.com/Prince781 updated https://github.com/llvm/llvm-project/pull/115536
>From 8e12ade1c09dddbf22e9dc3c4013c34d3642d725 Mon Sep 17 00:00:00 2001
From: Princeton Ferro <pferro at nvidia.com>
Date: Sat, 7 Dec 2024 21:30:20 -0500
Subject: [PATCH] [Instructions] cache computed shufflevector properties
- Cache computed properties of a shufflevector mask.
- Merge several shuffle mask property analyses into a single analysis
and introduce ShuffleMaskAttrs.
- Compute the properties on shufflevector construction.
---
llvm/include/llvm/IR/Instructions.h | 160 ++++++++++++++------------
llvm/lib/IR/Instructions.cpp | 169 ++++++++++++++++++----------
2 files changed, 197 insertions(+), 132 deletions(-)
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index a42bf6bca1b9fb..2388acc49cc4a4 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1883,6 +1883,76 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
constexpr int PoisonMaskElem = -1;
+/// Attributes of a shufflevector mask.
+struct ShuffleMaskAttrs {
+ /// If the shuffle chooses elements from exactly one source vector without
+ /// changing the length of that vector.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3>
+ /// TODO: Optionally allow length-changing shuffles.
+ bool SingleSource : 1;
+
+ /// If the shuffle chooses elements from exactly one source vector without
+ /// lane crossings and does not change the number of elements from its input
+ /// vectors.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
+ bool Identity : 1;
+
+ /// If the shuffle lengthens exactly one source vector with undefs in the
+ /// high elements.
+ bool IdentityWithPadding : 1;
+
+ /// If the shuffle extracts the first N elements of exactly one source
+ /// vector.
+ bool IdentityWithExtract : 1;
+
+ /// If the shuffle concatenates the two source vectors. This is false if
+ /// either input is undefined. In that case, the shuffle is better classified
+ /// as an identity with padding operation.
+ bool Concat : 1;
+
+ /// If the shuffle swaps the order of elements from exactly one source vector.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef>
+ /// TODO: Optionally allow length-changing shuffles.
+ bool Reverse : 1;
+
+ /// If all elements of the shuffle are the same value as the first element of
+ /// exactly one source vector without changing the length of that vector.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,0,undef,0>
+ /// TODO: Optionally allow length-changing shuffles.
+ /// TODO: Optionally allow splats from other elements.
+ bool ZeroEltSplat : 1;
+
+ /// Return true if this shuffle chooses elements from its source vectors
+ /// without lane crossings and all operands have the same number of elements.
+ /// In other words, this shuffle is equivalent to a vector select with a
+ /// constant condition operand.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,1,6,3>
+ /// This returns false if the mask does not choose from both input vectors.
+ /// In that case, the shuffle is better classified as an identity shuffle.
+ /// TODO: Optionally allow length-changing shuffles.
+ bool Select : 1;
+
+ /// If the shuffle transposes the elements of its inputs without changing the
+ /// length of the vectors. This operation may also be known as a merge or
+ /// interleave. See the description for isTransposeMask() for the exact
+ /// specification.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6>
+ bool Transpose : 1;
+
+ /// If the shuffle splices two inputs without changing the length of the
+ /// vectors. This operation concatenates the two inputs together and then
+ /// extracts an original width vector starting from the splice index.
+ /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
+ bool Splice : 1;
+
+ /// The starting index of the splice.
+ /// Example: 1, from the previous example
+ int SpliceIndex;
+};
+
+static_assert(sizeof(ShuffleMaskAttrs) <= sizeof(uint64_t),
+ "ShuffleMaskAttrs is too large!");
+
/// This instruction constructs a fixed permutation of two
/// input vectors.
///
@@ -1898,6 +1968,7 @@ class ShuffleVectorInst : public Instruction {
SmallVector<int, 4> ShuffleMask;
Constant *ShuffleMaskForBitcode;
+ ShuffleMaskAttrs MaskAttrs;
protected:
// Note: Instruction needs to be a friend here to call cloneImpl.
@@ -1924,6 +1995,12 @@ class ShuffleVectorInst : public Instruction {
/// of the instruction.
void commute();
+ // Analyze mask of fixed vector. NumOpElts is number of known elements in
+ // operand1/operand2. Scalable is set if any operands are scalable vectors.
+ // HasUndefOp is set if there are any undef operands.
+ static ShuffleMaskAttrs analyzeMask(ArrayRef<int> Mask, int NumOpElts,
+ bool Scalable, bool HasUndefOp);
+
/// Return true if a shufflevector instruction can be
/// formed with the specified operands.
static bool isValidOperands(const Value *V1, const Value *V2,
@@ -2004,14 +2081,7 @@ class ShuffleVectorInst : public Instruction {
return isSingleSourceMask(MaskAsInts, NumSrcElts);
}
- /// Return true if this shuffle chooses elements from exactly one source
- /// vector without changing the length of that vector.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3>
- /// TODO: Optionally allow length-changing shuffles.
- bool isSingleSource() const {
- return !changesLength() &&
- isSingleSourceMask(ShuffleMask, ShuffleMask.size());
- }
+ bool isSingleSource() const { return MaskAttrs.SingleSource; }
/// Return true if this shuffle mask chooses elements from exactly one source
/// vector without lane crossings. A shuffle using this mask is not
@@ -2032,31 +2102,13 @@ class ShuffleVectorInst : public Instruction {
return isIdentityMask(MaskAsInts, NumSrcElts);
}
- /// Return true if this shuffle chooses elements from exactly one source
- /// vector without lane crossings and does not change the number of elements
- /// from its input vectors.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
- bool isIdentity() const {
- // Not possible to express a shuffle mask for a scalable vector for this
- // case.
- if (isa<ScalableVectorType>(getType()))
- return false;
-
- return !changesLength() && isIdentityMask(ShuffleMask, ShuffleMask.size());
- }
+ bool isIdentity() const { return MaskAttrs.Identity; }
- /// Return true if this shuffle lengthens exactly one source vector with
- /// undefs in the high elements.
- bool isIdentityWithPadding() const;
+ bool isIdentityWithPadding() const { return MaskAttrs.IdentityWithPadding; }
- /// Return true if this shuffle extracts the first N elements of exactly one
- /// source vector.
- bool isIdentityWithExtract() const;
+ bool isIdentityWithExtract() const { return MaskAttrs.IdentityWithExtract; }
- /// Return true if this shuffle concatenates its 2 source vectors. This
- /// returns false if either input is undefined. In that case, the shuffle is
- /// is better classified as an identity with padding operation.
- bool isConcat() const;
+ bool isConcat() const { return MaskAttrs.Concat; }
/// Return true if this shuffle mask chooses elements from its source vectors
/// without lane crossings. A shuffle using this mask would be
@@ -2074,17 +2126,7 @@ class ShuffleVectorInst : public Instruction {
return isSelectMask(MaskAsInts, NumSrcElts);
}
- /// Return true if this shuffle chooses elements from its source vectors
- /// without lane crossings and all operands have the same number of elements.
- /// In other words, this shuffle is equivalent to a vector select with a
- /// constant condition operand.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,1,6,3>
- /// This returns false if the mask does not choose from both input vectors.
- /// In that case, the shuffle is better classified as an identity shuffle.
- /// TODO: Optionally allow length-changing shuffles.
- bool isSelect() const {
- return !changesLength() && isSelectMask(ShuffleMask, ShuffleMask.size());
- }
+ bool isSelect() const { return MaskAttrs.Select; }
/// Return true if this shuffle mask swaps the order of elements from exactly
/// one source vector.
@@ -2099,13 +2141,7 @@ class ShuffleVectorInst : public Instruction {
return isReverseMask(MaskAsInts, NumSrcElts);
}
- /// Return true if this shuffle swaps the order of elements from exactly
- /// one source vector.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef>
- /// TODO: Optionally allow length-changing shuffles.
- bool isReverse() const {
- return !changesLength() && isReverseMask(ShuffleMask, ShuffleMask.size());
- }
+ bool isReverse() const { return MaskAttrs.Reverse; }
/// Return true if this shuffle mask chooses all elements with the same value
/// as the first element of exactly one source vector.
@@ -2120,16 +2156,7 @@ class ShuffleVectorInst : public Instruction {
return isZeroEltSplatMask(MaskAsInts, NumSrcElts);
}
- /// Return true if all elements of this shuffle are the same value as the
- /// first element of exactly one source vector without changing the length
- /// of that vector.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,0,undef,0>
- /// TODO: Optionally allow length-changing shuffles.
- /// TODO: Optionally allow splats from other elements.
- bool isZeroEltSplat() const {
- return !changesLength() &&
- isZeroEltSplatMask(ShuffleMask, ShuffleMask.size());
- }
+ bool isZeroEltSplat() const { return MaskAttrs.ZeroEltSplat; }
/// Return true if this shuffle mask is a transpose mask.
/// Transpose vector masks transpose a 2xn matrix. They read corresponding
@@ -2171,14 +2198,7 @@ class ShuffleVectorInst : public Instruction {
return isTransposeMask(MaskAsInts, NumSrcElts);
}
- /// Return true if this shuffle transposes the elements of its inputs without
- /// changing the length of the vectors. This operation may also be known as a
- /// merge or interleave. See the description for isTransposeMask() for the
- /// exact specification.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6>
- bool isTranspose() const {
- return !changesLength() && isTransposeMask(ShuffleMask, ShuffleMask.size());
- }
+ bool isTranspose() const { return MaskAttrs.Transpose; }
/// Return true if this shuffle mask is a splice mask, concatenating the two
/// inputs together and then extracts an original width vector starting from
@@ -2194,13 +2214,9 @@ class ShuffleVectorInst : public Instruction {
return isSpliceMask(MaskAsInts, NumSrcElts, Index);
}
- /// Return true if this shuffle splices two inputs without changing the length
- /// of the vectors. This operation concatenates the two inputs together and
- /// then extracts an original width vector starting from the splice index.
- /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
bool isSplice(int &Index) const {
- return !changesLength() &&
- isSpliceMask(ShuffleMask, ShuffleMask.size(), Index);
+ Index = MaskAttrs.SpliceIndex;
+ return MaskAttrs.Splice;
}
/// Return true if this shuffle mask is an extract subvector mask.
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 4f07a4c4dd017a..72e4dfcb8b8c8f 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1822,6 +1822,14 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
void ShuffleVectorInst::setShuffleMask(ArrayRef<int> Mask) {
ShuffleMask.assign(Mask.begin(), Mask.end());
ShuffleMaskForBitcode = convertShuffleMaskForBitcode(Mask, getType());
+
+ bool HasUndef = isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>());
+ if (auto *FixedVecTy = dyn_cast<FixedVectorType>(Op<0>()->getType())) {
+ int NumOpElts = FixedVecTy->getNumElements();
+ MaskAttrs = analyzeMask(Mask, NumOpElts, false, HasUndef);
+ } else {
+ MaskAttrs = analyzeMask(Mask, Mask.size(), true, HasUndef);
+ }
}
Constant *ShuffleVectorInst::convertShuffleMaskForBitcode(ArrayRef<int> Mask,
@@ -1844,6 +1852,106 @@ Constant *ShuffleVectorInst::convertShuffleMaskForBitcode(ArrayRef<int> Mask,
return ConstantVector::get(MaskConst);
}
+ShuffleMaskAttrs ShuffleVectorInst::analyzeMask(ArrayRef<int> Mask,
+ int NumOpElts, bool Scalable,
+ bool HasUndefOp) {
+ assert(!Mask.empty() && "Shuffle mask must contain elements");
+
+ using SizeTy = decltype(Mask.size());
+ bool UsesLHS = false;
+ bool UsesRHS = false;
+ bool ExtendsWithPadding = Mask.size() > static_cast<SizeTy>(NumOpElts);
+ const bool Extracts = Mask.size() < static_cast<SizeTy>(NumOpElts);
+ const bool PreservesLength = Mask.size() == static_cast<SizeTy>(NumOpElts);
+ bool CrossesLanes = false;
+ bool ReversesLanes = NumOpElts >= 2;
+ bool FirstLaneOnly = true;
+ bool HasTransposeInterleaving = true;
+ std::optional<int> SpliceIndex;
+ bool Splices = true;
+
+ for (int Idx = 0, NumMaskElts = Mask.size(); Idx < NumMaskElts; ++Idx) {
+ const auto I = Mask[Idx];
+ if (I == -1) {
+ HasTransposeInterleaving = false;
+ continue;
+ }
+ assert(I >= 0 && I < (NumOpElts * 2) &&
+ "Out-of-bounds shuffle mask element");
+ UsesLHS |= (I < NumOpElts);
+ UsesRHS |= (I >= NumOpElts);
+ CrossesLanes |= I != Idx && I != (NumOpElts + Idx);
+ ReversesLanes &=
+ I == (NumOpElts - 1 - Idx) || I == (NumOpElts + NumOpElts - 1 - Idx);
+ FirstLaneOnly &= I == 0 || I == NumOpElts;
+
+ if (Idx >= 2)
+ HasTransposeInterleaving &= Mask[Idx] - Mask[Idx - 2] == 2;
+ else if (Idx == 1)
+ HasTransposeInterleaving &= Mask[Idx] - Mask[0] == NumOpElts;
+ else // Idx == 0
+ HasTransposeInterleaving &= I == 0 || I == 1;
+
+ if (!SpliceIndex) {
+ Splices &= I >= Idx && I - Idx < NumOpElts;
+ if (Splices)
+ SpliceIndex = I - Idx;
+ } else {
+ Splices &= I == *SpliceIndex + Idx;
+ }
+
+ // Padding occurs when the mask size is >= operand size (see above) and all
+ // remaining elements must be undef.
+ ExtendsWithPadding &= Idx < NumOpElts;
+ }
+
+ ShuffleMaskAttrs MaskAttrs = {};
+
+ // Single-source if uses either LHS or RHS but not both.
+ MaskAttrs.SingleSource = (UsesLHS ^ UsesRHS) && PreservesLength;
+
+ // Identity if chooses elements without lane-crossings from either LHS or RHS.
+ MaskAttrs.Identity = !Scalable && MaskAttrs.SingleSource && !CrossesLanes;
+
+ // Identity with padding if mask size > operand size and all extra mask
+ // elements are undef/-1.
+ MaskAttrs.IdentityWithPadding =
+ !Scalable && (UsesLHS ^ UsesRHS) && !CrossesLanes && ExtendsWithPadding;
+
+ // Identity with extract if mask size < operand size.
+ MaskAttrs.IdentityWithExtract =
+ !Scalable && (UsesLHS ^ UsesRHS) && !CrossesLanes && Extracts;
+
+ // Concat if chooses elements without lane-crossings from both LHS and RHS.
+ MaskAttrs.Concat = !Scalable && !HasUndefOp && UsesLHS && UsesRHS &&
+ Mask.size() == 2 * static_cast<SizeTy>(NumOpElts) &&
+ !CrossesLanes;
+
+ // Reverse if chooses lanes in reverse order from either LHS or RHS.
+ MaskAttrs.Reverse = MaskAttrs.SingleSource && ReversesLanes;
+
+ // Splat of 0th elt if only picks first lane (or undef) of either LHS or RHS.
+ MaskAttrs.ZeroEltSplat = MaskAttrs.SingleSource && FirstLaneOnly;
+
+ // Select if chooses elements without lane-crossings from both LHS and RHS.
+ MaskAttrs.Select = UsesLHS && UsesRHS && PreservesLength && !CrossesLanes;
+
+ // Transpose if (1) number of elements is >= 2 and a power of 2, (2) first
+ // element is 0 or 1, (3) difference between first 2 elements == mask length,
+ // and (4) difference between consecutive even/odd elements == 2.
+ MaskAttrs.Transpose = PreservesLength && Mask.size() >= 2 &&
+ isPowerOf2_32(Mask.size()) && HasTransposeInterleaving;
+
+ // Splice if (1) the starting index is >= 0, and (2) we have a contiguous
+ // sub-range spanning the 1st and 2nd vectors (or just the 1st).
+ if (PreservesLength && Splices && SpliceIndex) {
+ MaskAttrs.Splice = true;
+ MaskAttrs.SpliceIndex = *SpliceIndex;
+ }
+
+ return MaskAttrs;
+}
+
static bool isSingleSourceMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
assert(!Mask.empty() && "Shuffle mask must contain elements");
bool UsesLHS = false;
@@ -1978,6 +2086,7 @@ bool ShuffleVectorInst::isSpliceMask(ArrayRef<int> Mask, int NumSrcElts,
if (Mask.size() != static_cast<unsigned>(NumSrcElts))
return false;
// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
+ // Counter: shufflevector <4 x n> A, <4 x n> B, <.,0,1,2>
int StartIndex = -1;
for (int I = 0, E = Mask.size(); I != E; ++I) {
int MaskEltVal = Mask[I];
@@ -2109,66 +2218,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
return false;
}
-bool ShuffleVectorInst::isIdentityWithPadding() const {
- // FIXME: Not currently possible to express a shuffle mask for a scalable
- // vector for this case.
- if (isa<ScalableVectorType>(getType()))
- return false;
-
- int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
- int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
- if (NumMaskElts <= NumOpElts)
- return false;
-
- // The first part of the mask must choose elements from exactly 1 source op.
- ArrayRef<int> Mask = getShuffleMask();
- if (!isIdentityMaskImpl(Mask, NumOpElts))
- return false;
-
- // All extending must be with undef elements.
- for (int i = NumOpElts; i < NumMaskElts; ++i)
- if (Mask[i] != -1)
- return false;
-
- return true;
-}
-
-bool ShuffleVectorInst::isIdentityWithExtract() const {
- // FIXME: Not currently possible to express a shuffle mask for a scalable
- // vector for this case.
- if (isa<ScalableVectorType>(getType()))
- return false;
-
- int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
- int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
- if (NumMaskElts >= NumOpElts)
- return false;
-
- return isIdentityMaskImpl(getShuffleMask(), NumOpElts);
-}
-
-bool ShuffleVectorInst::isConcat() const {
- // Vector concatenation is differentiated from identity with padding.
- if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()))
- return false;
-
- // FIXME: Not currently possible to express a shuffle mask for a scalable
- // vector for this case.
- if (isa<ScalableVectorType>(getType()))
- return false;
-
- int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
- int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
- if (NumMaskElts != NumOpElts * 2)
- return false;
-
- // Use the mask length rather than the operands' vector lengths here. We
- // already know that the shuffle returns a vector twice as long as the inputs,
- // and neither of the inputs are undef vectors. If the mask picks consecutive
- // elements from both inputs, then this is a concatenation of the inputs.
- return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
-}
-
static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
int ReplicationFactor, int VF) {
assert(Mask.size() == (unsigned)ReplicationFactor * VF &&
More information about the llvm-commits
mailing list