[llvm] [Instructions] cache computed shufflevector properties (PR #115536)

Princeton Ferro via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 7 18:40:46 PST 2024


https://github.com/Prince781 updated https://github.com/llvm/llvm-project/pull/115536

>From 8e12ade1c09dddbf22e9dc3c4013c34d3642d725 Mon Sep 17 00:00:00 2001
From: Princeton Ferro <pferro at nvidia.com>
Date: Sat, 7 Dec 2024 21:30:20 -0500
Subject: [PATCH] [Instructions] cache computed shufflevector properties

- Cache computed properties of a shufflevector mask.
- Merge several shuffle mask property analyses into a single analysis
  and introduce ShuffleMaskAttrs.
- Compute the properties on shufflevector construction.
---
 llvm/include/llvm/IR/Instructions.h | 160 ++++++++++++++------------
 llvm/lib/IR/Instructions.cpp        | 169 ++++++++++++++++++----------
 2 files changed, 197 insertions(+), 132 deletions(-)

diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index a42bf6bca1b9fb..2388acc49cc4a4 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1883,6 +1883,76 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
 
 constexpr int PoisonMaskElem = -1;
 
+/// Attributes of a shufflevector mask.
+struct ShuffleMaskAttrs {
+  /// If the shuffle chooses elements from exactly one source vector without
+  /// changing the length of that vector.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3>
+  /// TODO: Optionally allow length-changing shuffles.
+  bool SingleSource : 1;
+
+  /// If the shuffle chooses elements from exactly one source vector without
+  /// lane crossings and does not change the number of elements from its input
+  /// vectors.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
+  bool Identity : 1;
+
+  /// If the shuffle lengthens exactly one source vector with undefs in the
+  /// high elements.
+  bool IdentityWithPadding : 1;
+
+  /// If the shuffle extracts the first N elements of exactly one source
+  /// vector.
+  bool IdentityWithExtract : 1;
+
+  /// If the shuffle concatenates the two source vectors. This is false if
+  /// either input is undefined. In that case, the shuffle is better classified
+  /// as an identity with padding operation.
+  bool Concat : 1;
+
+  /// If the shuffle swaps the order of elements from exactly one source vector.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef>
+  /// TODO: Optionally allow length-changing shuffles.
+  bool Reverse : 1;
+
+  /// If all elements of the shuffle are the same value as the first element of
+  /// exactly one source vector without changing the length of that vector.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,0,undef,0>
+  /// TODO: Optionally allow length-changing shuffles.
+  /// TODO: Optionally allow splats from other elements.
+  bool ZeroEltSplat : 1;
+
+  /// Return true if this shuffle chooses elements from its source vectors
+  /// without lane crossings and all operands have the same number of elements.
+  /// In other words, this shuffle is equivalent to a vector select with a
+  /// constant condition operand.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,1,6,3>
+  /// This returns false if the mask does not choose from both input vectors.
+  /// In that case, the shuffle is better classified as an identity shuffle.
+  /// TODO: Optionally allow length-changing shuffles.
+  bool Select : 1;
+
+  /// If the shuffle transposes the elements of its inputs without changing the
+  /// length of the vectors. This operation may also be known as a merge or
+  /// interleave. See the description for isTransposeMask() for the exact
+  /// specification.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6>
+  bool Transpose : 1;
+
+  /// If the shuffle splices two inputs without changing the length of the
+  /// vectors. This operation concatenates the two inputs together and then
+  /// extracts an original width vector starting from the splice index.
+  /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
+  bool Splice : 1;
+
+  /// The starting index of the splice.
+  /// Example: 1, from the previous example
+  int SpliceIndex;
+};
+
+static_assert(sizeof(ShuffleMaskAttrs) <= sizeof(uint64_t),
+              "ShuffleMaskAttrs is too large!");
+
 /// This instruction constructs a fixed permutation of two
 /// input vectors.
 ///
@@ -1898,6 +1968,7 @@ class ShuffleVectorInst : public Instruction {
 
   SmallVector<int, 4> ShuffleMask;
   Constant *ShuffleMaskForBitcode;
+  ShuffleMaskAttrs MaskAttrs;
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
@@ -1924,6 +1995,12 @@ class ShuffleVectorInst : public Instruction {
   /// of the instruction.
   void commute();
 
+  // Analyze mask of fixed vector. NumOpElts is number of known elements in
+  // operand1/operand2. Scalable is set if any operands are scalable vectors.
+  // HasUndefOp is set if there are any undef operands.
+  static ShuffleMaskAttrs analyzeMask(ArrayRef<int> Mask, int NumOpElts,
+                                      bool Scalable, bool HasUndefOp);
+
   /// Return true if a shufflevector instruction can be
   /// formed with the specified operands.
   static bool isValidOperands(const Value *V1, const Value *V2,
@@ -2004,14 +2081,7 @@ class ShuffleVectorInst : public Instruction {
     return isSingleSourceMask(MaskAsInts, NumSrcElts);
   }
 
-  /// Return true if this shuffle chooses elements from exactly one source
-  /// vector without changing the length of that vector.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3>
-  /// TODO: Optionally allow length-changing shuffles.
-  bool isSingleSource() const {
-    return !changesLength() &&
-           isSingleSourceMask(ShuffleMask, ShuffleMask.size());
-  }
+  bool isSingleSource() const { return MaskAttrs.SingleSource; }
 
   /// Return true if this shuffle mask chooses elements from exactly one source
   /// vector without lane crossings. A shuffle using this mask is not
@@ -2032,31 +2102,13 @@ class ShuffleVectorInst : public Instruction {
     return isIdentityMask(MaskAsInts, NumSrcElts);
   }
 
-  /// Return true if this shuffle chooses elements from exactly one source
-  /// vector without lane crossings and does not change the number of elements
-  /// from its input vectors.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
-  bool isIdentity() const {
-    // Not possible to express a shuffle mask for a scalable vector for this
-    // case.
-    if (isa<ScalableVectorType>(getType()))
-      return false;
-
-    return !changesLength() && isIdentityMask(ShuffleMask, ShuffleMask.size());
-  }
+  bool isIdentity() const { return MaskAttrs.Identity; }
 
-  /// Return true if this shuffle lengthens exactly one source vector with
-  /// undefs in the high elements.
-  bool isIdentityWithPadding() const;
+  bool isIdentityWithPadding() const { return MaskAttrs.IdentityWithPadding; }
 
-  /// Return true if this shuffle extracts the first N elements of exactly one
-  /// source vector.
-  bool isIdentityWithExtract() const;
+  bool isIdentityWithExtract() const { return MaskAttrs.IdentityWithExtract; }
 
-  /// Return true if this shuffle concatenates its 2 source vectors. This
-  /// returns false if either input is undefined. In that case, the shuffle is
-  /// is better classified as an identity with padding operation.
-  bool isConcat() const;
+  bool isConcat() const { return MaskAttrs.Concat; }
 
   /// Return true if this shuffle mask chooses elements from its source vectors
   /// without lane crossings. A shuffle using this mask would be
@@ -2074,17 +2126,7 @@ class ShuffleVectorInst : public Instruction {
     return isSelectMask(MaskAsInts, NumSrcElts);
   }
 
-  /// Return true if this shuffle chooses elements from its source vectors
-  /// without lane crossings and all operands have the same number of elements.
-  /// In other words, this shuffle is equivalent to a vector select with a
-  /// constant condition operand.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,1,6,3>
-  /// This returns false if the mask does not choose from both input vectors.
-  /// In that case, the shuffle is better classified as an identity shuffle.
-  /// TODO: Optionally allow length-changing shuffles.
-  bool isSelect() const {
-    return !changesLength() && isSelectMask(ShuffleMask, ShuffleMask.size());
-  }
+  bool isSelect() const { return MaskAttrs.Select; }
 
   /// Return true if this shuffle mask swaps the order of elements from exactly
   /// one source vector.
@@ -2099,13 +2141,7 @@ class ShuffleVectorInst : public Instruction {
     return isReverseMask(MaskAsInts, NumSrcElts);
   }
 
-  /// Return true if this shuffle swaps the order of elements from exactly
-  /// one source vector.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef>
-  /// TODO: Optionally allow length-changing shuffles.
-  bool isReverse() const {
-    return !changesLength() && isReverseMask(ShuffleMask, ShuffleMask.size());
-  }
+  bool isReverse() const { return MaskAttrs.Reverse; }
 
   /// Return true if this shuffle mask chooses all elements with the same value
   /// as the first element of exactly one source vector.
@@ -2120,16 +2156,7 @@ class ShuffleVectorInst : public Instruction {
     return isZeroEltSplatMask(MaskAsInts, NumSrcElts);
   }
 
-  /// Return true if all elements of this shuffle are the same value as the
-  /// first element of exactly one source vector without changing the length
-  /// of that vector.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,0,undef,0>
-  /// TODO: Optionally allow length-changing shuffles.
-  /// TODO: Optionally allow splats from other elements.
-  bool isZeroEltSplat() const {
-    return !changesLength() &&
-           isZeroEltSplatMask(ShuffleMask, ShuffleMask.size());
-  }
+  bool isZeroEltSplat() const { return MaskAttrs.ZeroEltSplat; }
 
   /// Return true if this shuffle mask is a transpose mask.
   /// Transpose vector masks transpose a 2xn matrix. They read corresponding
@@ -2171,14 +2198,7 @@ class ShuffleVectorInst : public Instruction {
     return isTransposeMask(MaskAsInts, NumSrcElts);
   }
 
-  /// Return true if this shuffle transposes the elements of its inputs without
-  /// changing the length of the vectors. This operation may also be known as a
-  /// merge or interleave. See the description for isTransposeMask() for the
-  /// exact specification.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6>
-  bool isTranspose() const {
-    return !changesLength() && isTransposeMask(ShuffleMask, ShuffleMask.size());
-  }
+  bool isTranspose() const { return MaskAttrs.Transpose; }
 
   /// Return true if this shuffle mask is a splice mask, concatenating the two
   /// inputs together and then extracts an original width vector starting from
@@ -2194,13 +2214,9 @@ class ShuffleVectorInst : public Instruction {
     return isSpliceMask(MaskAsInts, NumSrcElts, Index);
   }
 
-  /// Return true if this shuffle splices two inputs without changing the length
-  /// of the vectors. This operation concatenates the two inputs together and
-  /// then extracts an original width vector starting from the splice index.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
   bool isSplice(int &Index) const {
-    return !changesLength() &&
-           isSpliceMask(ShuffleMask, ShuffleMask.size(), Index);
+    Index = MaskAttrs.SpliceIndex;
+    return MaskAttrs.Splice;
   }
 
   /// Return true if this shuffle mask is an extract subvector mask.
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 4f07a4c4dd017a..72e4dfcb8b8c8f 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1822,6 +1822,14 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
 void ShuffleVectorInst::setShuffleMask(ArrayRef<int> Mask) {
   ShuffleMask.assign(Mask.begin(), Mask.end());
   ShuffleMaskForBitcode = convertShuffleMaskForBitcode(Mask, getType());
+
+  bool HasUndef = isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>());
+  if (auto *FixedVecTy = dyn_cast<FixedVectorType>(Op<0>()->getType())) {
+    int NumOpElts = FixedVecTy->getNumElements();
+    MaskAttrs = analyzeMask(Mask, NumOpElts, false, HasUndef);
+  } else {
+    MaskAttrs = analyzeMask(Mask, Mask.size(), true, HasUndef);
+  }
 }
 
 Constant *ShuffleVectorInst::convertShuffleMaskForBitcode(ArrayRef<int> Mask,
@@ -1844,6 +1852,106 @@ Constant *ShuffleVectorInst::convertShuffleMaskForBitcode(ArrayRef<int> Mask,
   return ConstantVector::get(MaskConst);
 }
 
+ShuffleMaskAttrs ShuffleVectorInst::analyzeMask(ArrayRef<int> Mask,
+                                                int NumOpElts, bool Scalable,
+                                                bool HasUndefOp) {
+  assert(!Mask.empty() && "Shuffle mask must contain elements");
+
+  using SizeTy = decltype(Mask.size());
+  bool UsesLHS = false;
+  bool UsesRHS = false;
+  bool ExtendsWithPadding = Mask.size() > static_cast<SizeTy>(NumOpElts);
+  const bool Extracts = Mask.size() < static_cast<SizeTy>(NumOpElts);
+  const bool PreservesLength = Mask.size() == static_cast<SizeTy>(NumOpElts);
+  bool CrossesLanes = false;
+  bool ReversesLanes = NumOpElts >= 2;
+  bool FirstLaneOnly = true;
+  bool HasTransposeInterleaving = true;
+  std::optional<int> SpliceIndex;
+  bool Splices = true;
+
+  for (int Idx = 0, NumMaskElts = Mask.size(); Idx < NumMaskElts; ++Idx) {
+    const auto I = Mask[Idx];
+    if (I == -1) {
+      HasTransposeInterleaving = false;
+      continue;
+    }
+    assert(I >= 0 && I < (NumOpElts * 2) &&
+           "Out-of-bounds shuffle mask element");
+    UsesLHS |= (I < NumOpElts);
+    UsesRHS |= (I >= NumOpElts);
+    CrossesLanes |= I != Idx && I != (NumOpElts + Idx);
+    ReversesLanes &=
+        I == (NumOpElts - 1 - Idx) || I == (NumOpElts + NumOpElts - 1 - Idx);
+    FirstLaneOnly &= I == 0 || I == NumOpElts;
+
+    if (Idx >= 2)
+      HasTransposeInterleaving &= Mask[Idx] - Mask[Idx - 2] == 2;
+    else if (Idx == 1)
+      HasTransposeInterleaving &= Mask[Idx] - Mask[0] == NumOpElts;
+    else // Idx == 0
+      HasTransposeInterleaving &= I == 0 || I == 1;
+
+    if (!SpliceIndex) {
+      Splices &= I >= Idx && I - Idx < NumOpElts;
+      if (Splices)
+        SpliceIndex = I - Idx;
+    } else {
+      Splices &= I == *SpliceIndex + Idx;
+    }
+
+    // Padding occurs when the mask size is >= operand size (see above) and all
+    // remaining elements must be undef.
+    ExtendsWithPadding &= Idx < NumOpElts;
+  }
+
+  ShuffleMaskAttrs MaskAttrs = {};
+
+  // Single-source if uses either LHS or RHS but not both.
+  MaskAttrs.SingleSource = (UsesLHS ^ UsesRHS) && PreservesLength;
+
+  // Identity if chooses elements without lane-crossings from either LHS or RHS.
+  MaskAttrs.Identity = !Scalable && MaskAttrs.SingleSource && !CrossesLanes;
+
+  // Identity with padding if mask size > operand size and all extra mask
+  // elements are undef/-1.
+  MaskAttrs.IdentityWithPadding =
+      !Scalable && (UsesLHS ^ UsesRHS) && !CrossesLanes && ExtendsWithPadding;
+
+  // Identity with extract if mask size < operand size.
+  MaskAttrs.IdentityWithExtract =
+      !Scalable && (UsesLHS ^ UsesRHS) && !CrossesLanes && Extracts;
+
+  // Concat if chooses elements without lane-crossings from both LHS and RHS.
+  MaskAttrs.Concat = !Scalable && !HasUndefOp && UsesLHS && UsesRHS &&
+                     Mask.size() == 2 * static_cast<SizeTy>(NumOpElts) &&
+                     !CrossesLanes;
+
+  // Reverse if chooses lanes in reverse order from either LHS or RHS.
+  MaskAttrs.Reverse = MaskAttrs.SingleSource && ReversesLanes;
+
+  // Splat of 0th elt if only picks first lane (or undef) of either LHS or RHS.
+  MaskAttrs.ZeroEltSplat = MaskAttrs.SingleSource && FirstLaneOnly;
+
+  // Select if chooses elements without lane-crossings from both LHS and RHS.
+  MaskAttrs.Select = UsesLHS && UsesRHS && PreservesLength && !CrossesLanes;
+
+  // Transpose if (1) number of elements is >= 2 and a power of 2, (2) first
+  // element is 0 or 1, (3) difference between first 2 elements == mask length,
+  // and (4) difference between consecutive even/odd elements == 2.
+  MaskAttrs.Transpose = PreservesLength && Mask.size() >= 2 &&
+                        isPowerOf2_32(Mask.size()) && HasTransposeInterleaving;
+
+  // Splice if (1) the starting index is >= 0, and (2) we have a contiguous
+  // sub-range spanning the 1st and 2nd vectors (or just the 1st).
+  if (PreservesLength && Splices && SpliceIndex) {
+    MaskAttrs.Splice = true;
+    MaskAttrs.SpliceIndex = *SpliceIndex;
+  }
+
+  return MaskAttrs;
+}
+
 static bool isSingleSourceMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
   assert(!Mask.empty() && "Shuffle mask must contain elements");
   bool UsesLHS = false;
@@ -1978,6 +2086,7 @@ bool ShuffleVectorInst::isSpliceMask(ArrayRef<int> Mask, int NumSrcElts,
   if (Mask.size() != static_cast<unsigned>(NumSrcElts))
     return false;
   // Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4>
+  // Counter: shufflevector <4 x n> A, <4 x n> B, <.,0,1,2>
   int StartIndex = -1;
   for (int I = 0, E = Mask.size(); I != E; ++I) {
     int MaskEltVal = Mask[I];
@@ -2109,66 +2218,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
   return false;
 }
 
-bool ShuffleVectorInst::isIdentityWithPadding() const {
-  // FIXME: Not currently possible to express a shuffle mask for a scalable
-  // vector for this case.
-  if (isa<ScalableVectorType>(getType()))
-    return false;
-
-  int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
-  int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
-  if (NumMaskElts <= NumOpElts)
-    return false;
-
-  // The first part of the mask must choose elements from exactly 1 source op.
-  ArrayRef<int> Mask = getShuffleMask();
-  if (!isIdentityMaskImpl(Mask, NumOpElts))
-    return false;
-
-  // All extending must be with undef elements.
-  for (int i = NumOpElts; i < NumMaskElts; ++i)
-    if (Mask[i] != -1)
-      return false;
-
-  return true;
-}
-
-bool ShuffleVectorInst::isIdentityWithExtract() const {
-  // FIXME: Not currently possible to express a shuffle mask for a scalable
-  // vector for this case.
-  if (isa<ScalableVectorType>(getType()))
-    return false;
-
-  int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
-  int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
-  if (NumMaskElts >= NumOpElts)
-    return false;
-
-  return isIdentityMaskImpl(getShuffleMask(), NumOpElts);
-}
-
-bool ShuffleVectorInst::isConcat() const {
-  // Vector concatenation is differentiated from identity with padding.
-  if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()))
-    return false;
-
-  // FIXME: Not currently possible to express a shuffle mask for a scalable
-  // vector for this case.
-  if (isa<ScalableVectorType>(getType()))
-    return false;
-
-  int NumOpElts = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
-  int NumMaskElts = cast<FixedVectorType>(getType())->getNumElements();
-  if (NumMaskElts != NumOpElts * 2)
-    return false;
-
-  // Use the mask length rather than the operands' vector lengths here. We
-  // already know that the shuffle returns a vector twice as long as the inputs,
-  // and neither of the inputs are undef vectors. If the mask picks consecutive
-  // elements from both inputs, then this is a concatenation of the inputs.
-  return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
-}
-
 static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
                                         int ReplicationFactor, int VF) {
   assert(Mask.size() == (unsigned)ReplicationFactor * VF &&



More information about the llvm-commits mailing list