[llvm] [SandboxVectorizer] Define SeedBundle: a set of instructions to be vectorized [retry] (PR #111073)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 3 16:14:32 PDT 2024


https://github.com/Sterling-Augustine created https://github.com/llvm/llvm-project/pull/111073

[Retry 110696 with a proper rebase.]

Seed collection will assemble instructions to be vectorized into SeedBundles. This data structure is not intended to be used directly, but will be the basis for load bundles, store bundles, and so on.


>From 0d15683111402d107737e3e54e1e96413b46f9dd Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Tue, 24 Sep 2024 15:55:36 -0700
Subject: [PATCH 1/4] [SandboxVectorizer] Define SeedBundle as a set of
 instructions to be vectorized

Seed collection will assemble instructions to be vectorized into SeedBundles.
This data structure is not intended to be used directly, but will be the
basis for load bundles, store bundles, and so on.
---
 .../SandboxVectorizer/SeedCollector.h         | 136 ++++++++++++++++++
 llvm/lib/Transforms/Vectorize/CMakeLists.txt  |   1 +
 .../SandboxVectorizer/SeedCollector.cpp       |  63 ++++++++
 .../SandboxVectorizer/CMakeLists.txt          |   3 +-
 .../SandboxVectorizer/SeedCollectorTest.cpp   | 101 +++++++++++++
 5 files changed, 303 insertions(+), 1 deletion(-)
 create mode 100644 llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
 create mode 100644 llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
 create mode 100644 llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
new file mode 100644
index 00000000000000..38088f30c8af61
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -0,0 +1,136 @@
+//===- SeedCollector.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file contains the mechanism for collecting the seed instructions that
+// are used as starting points for forming the vectorization graph.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/SandboxIR/Utils.h"
+#include "llvm/SandboxIR/Value.h"
+#include <iterator>
+#include <memory>
+
+namespace llvm::sandboxir {
+class Instruction;
+class StoreInst;
+class BasicBlock;
+
+/// An ordered set of Instructions that can be vectorized.
+class SeedBundle {
+public:
+  using SeedList = SmallVector<sandboxir::Instruction *>;
+  /// Initialize a bundle with \p I.
+  explicit SeedBundle(sandboxir::Instruction *I, const DataLayout &DL) {
+    insertAt(begin(), I, DL);
+  }
+  explicit SeedBundle(SeedList &&L, const DataLayout &DL)
+      : Seeds(std::move(L)) {
+    for (auto &S : Seeds) {
+      NumUnusedBits += sandboxir::Utils::getNumBits(S, DL);
+    }
+  }
+  /// No need to allow copies.
+  SeedBundle(const SeedBundle &) = delete;
+  SeedBundle &operator=(const SeedBundle &) = delete;
+  virtual ~SeedBundle() {}
+
+  using iterator = SeedList::iterator;
+  using const_iterator = SeedList::const_iterator;
+  iterator begin() { return Seeds.begin(); }
+  iterator end() { return Seeds.end(); }
+  const_iterator begin() const { return Seeds.begin(); }
+  const_iterator end() const { return Seeds.end(); }
+
+  sandboxir::Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; }
+
+  /// Insert \p I into position \p P. Clients should choose Pos
+  /// by symbol, symbol-offset, and program order (which depends if scheduling
+  /// bottom-up or top-down).
+  void insertAt(iterator Pos, sandboxir::Instruction *I, const DataLayout &DL) {
+#ifdef EXPENSIVE_CHECKS
+    for (auto Itr : Seeds) {
+      assert(*Itr != I && "Attempt to insert an instruction twice.");
+    }
+#endif
+    Seeds.insert(Pos, I);
+    NumUnusedBits += sandboxir::Utils::getNumBits(I, DL);
+  }
+
+  unsigned getFirstUnusedElementIdx() const {
+    for (unsigned ElmIdx : seq<unsigned>(0, Seeds.size()))
+      if (!isUsed(ElmIdx))
+        return ElmIdx;
+    return Seeds.size();
+  }
+  /// Marks elements as 'used' so that we skip them in `getSlice()`.
+  void setUsed(unsigned ElementIdx, const DataLayout &DL, unsigned Sz = 1,
+               bool VerifyUnused = true) {
+    if (ElementIdx + Sz >= UsedLanes.size())
+      UsedLanes.resize(ElementIdx + Sz);
+    for (unsigned Idx : seq<unsigned>(ElementIdx, ElementIdx + Sz)) {
+      assert((!VerifyUnused || !UsedLanes.test(Idx)) &&
+             "Already marked as used!");
+      UsedLanes.set(Idx);
+      UsedLaneCount++;
+    }
+    NumUnusedBits -= sandboxir::Utils::getNumBits(Seeds[ElementIdx], DL);
+  }
+
+  void setUsed(sandboxir::Instruction *V, const DataLayout &DL) {
+    auto It = std::find(begin(), end(), V);
+    assert(It != end() && "V not in the bundle!");
+    auto Idx = It - begin();
+    setUsed(Idx, DL, 1, /*VerifyUnused=*/false);
+  }
+  bool isUsed(unsigned Element) const {
+    return Element >= UsedLanes.size() ? false : UsedLanes.test(Element);
+  }
+  bool allUsed() const { return UsedLaneCount == Seeds.size(); }
+  unsigned getNumUnusedBits() const { return NumUnusedBits; }
+
+  /// \Returns a slice of seed elements, starting at the element \p StartIdx,
+  /// with a total size <= \p MaxVecRegBits. If \p ForcePowOf2 is true, then the
+  /// returned slice should have a total number of bits that is a power of 2.
+  MutableArrayRef<SeedList> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+                                     bool ForcePowOf2, const DataLayout &DL);
+
+protected:
+  SeedList Seeds;
+  /// The lanes that we have already vectorized.
+  BitVector UsedLanes;
+  /// Tracks used lanes for constant-time accessor.
+  unsigned UsedLaneCount = 0;
+  /// Tracks the remaining bits available to vectorize
+  unsigned NumUnusedBits = 0;
+
+public:
+#ifndef NDEBUG
+  void dump(raw_ostream &OS) const {
+    for (auto [ElmIdx, I] : enumerate(*this)) {
+      OS.indent(2) << ElmIdx << ". ";
+      if (isUsed(ElmIdx))
+        OS << "[USED]";
+      else
+        OS << *I;
+      OS << "\n";
+    }
+  }
+  LLVM_DUMP_METHOD void dump() const {
+    dump(dbgs());
+    dbgs() << "\n";
+  }
+#endif // NDEBUG
+};
+} // namespace llvm::sandboxir
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index eeff4a9f6a8bae..887c2089c5a520 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
   SandboxVectorizer/DependencyGraph.cpp
   SandboxVectorizer/Passes/BottomUpVec.cpp
   SandboxVectorizer/SandboxVectorizer.cpp
+  SandboxVectorizer/SeedCollector.cpp
   SLPVectorizer.cpp
   Vectorize.cpp
   VectorCombine.cpp
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
new file mode 100644
index 00000000000000..668fea6904f866
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -0,0 +1,63 @@
+//===- SeedCollection.cpp  -0000000----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Type.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/SandboxIR/Utils.h"
+#include "llvm/Support/Debug.h"
+#include <span>
+
+using namespace llvm;
+
+MutableArrayRef<sandboxir::SeedBundle::SeedList>
+sandboxir::SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+                                bool ForcePowerOf2, const DataLayout &DL) {
+  // Use uint32_t for counts to make it clear we are also using the proper
+  // isPowerOf2_[32|64].
+
+  // Count both the bits and the elements of the slice we are about to build.
+  // The bits tell us whether this is a legal slice (that is <= MaxVecRegBits),
+  // and the num of elements help us do the actual slicing.
+  uint32_t BitsSum = 0;
+  // As we are collecting slice elements we may go over the limit, so we need to
+  // remember the last legal one. This is used for the creation of the slice.
+  uint32_t LastGoodBitsSum = 0;
+  uint32_t LastGoodNumSliceElements = 0;
+  // Skip any used elements (which have already been handled) and all below
+  // `StartIdx`.
+  assert(StartIdx >= getFirstUnusedElementIdx() &&
+         "Expected unused at StartIdx");
+  uint32_t FirstGoodElementIdx = StartIdx;
+  // Go through elements starting at FirstGoodElementIdx.
+  for (auto [ElementCnt, S] : enumerate(make_range(
+           std::next(Seeds.begin(), FirstGoodElementIdx), Seeds.end()))) {
+    // Stop if we found a used element.
+    if (isUsed(FirstGoodElementIdx + ElementCnt))
+      break;
+    BitsSum += sandboxir::Utils::getNumBits(S, DL);
+    // Stop if the bits sum is over the limit.
+    if (BitsSum > MaxVecRegBits)
+      break;
+    // If forcing a power-of-2 bit-size we check if this bit size is accepted.
+    if (ForcePowerOf2 && !isPowerOf2_32(BitsSum))
+      continue;
+    LastGoodBitsSum = BitsSum;
+    LastGoodNumSliceElements = ElementCnt + 1;
+  }
+  if (LastGoodNumSliceElements < 2)
+    return {};
+  if (LastGoodBitsSum == 0)
+    return {};
+  return MutableArrayRef<sandboxir::SeedBundle::SeedList>(
+      &Seeds + FirstGoodElementIdx, LastGoodNumSliceElements);
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
index 9f1a3409c0c394..dcd7232db5f60c 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
@@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests
   DependencyGraphTest.cpp
   IntervalTest.cpp
   LegalityTest.cpp
-  )
+  SeedCollectorTest.cpp	
+)
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
new file mode 100644
index 00000000000000..6756ce944f9a91
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
@@ -0,0 +1,101 @@
+//===- SeedCollectorTest.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/SandboxIR/Function.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+struct SeedBundleTest : public testing::Test {
+  LLVMContext C;
+  std::unique_ptr<Module> M;
+
+  void parseIR(LLVMContext &C, const char *IR) {
+    SMDiagnostic Err;
+    M = parseAssemblyString(IR, Err, C);
+    if (!M)
+      Err.print("LegalityTest", errs());
+  }
+};
+
+TEST_F(SeedBundleTest, SeedBundle) {
+  parseIR(C, R"IR(
+define void @foo(float %v0, float %v1) {
+bb:
+  %add0 = fadd float %v0, %v1
+  %add1 = fadd float %v0, %v1
+  %add2 = fadd float %v0, %v1
+  %add3 = fadd float %v0, %v1
+  %add4 = fadd float %v0, %v1
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  DataLayout DL(M->getDataLayout());
+  auto *BB = &*F.begin();
+  auto It = BB->begin();
+  auto *I0 = &*It++;
+  auto *I1 = &*It++;
+  // Assume test instructions are identical in the number of bits.
+  const unsigned kFloatBits = sandboxir::Utils::getNumBits(I0, DL);
+  // Constructor
+  sandboxir::SeedBundle SBO(I0, DL);
+  EXPECT_EQ(*SBO.begin(), I0);
+  // getNumUnusedBits after constructor
+  EXPECT_EQ(SBO.getNumUnusedBits(), kFloatBits);
+  // setUsed
+  SBO.setUsed(I0, DL);
+  // allUsed
+  EXPECT_TRUE(SBO.allUsed());
+  // isUsed
+  EXPECT_TRUE(SBO.isUsed(0));
+  // getNumUnusedBits after setUsed
+  EXPECT_EQ(SBO.getNumUnusedBits(), 0u);
+  // insertAt
+  SBO.insertAt(SBO.end(), I1, DL);
+  EXPECT_NE(*SBO.begin(), I1);
+  // getNumUnusedBits after insertAt
+  EXPECT_EQ(SBO.getNumUnusedBits(), kFloatBits);
+  // allUsed
+  EXPECT_FALSE(SBO.allUsed());
+  // getFirstUnusedElement
+  EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u);
+
+  sandboxir::SeedBundle::SeedList Seeds;
+  It = BB->begin();
+  Seeds.push_back(&*It++);
+  Seeds.push_back(&*It++);
+  Seeds.push_back(&*It++);
+  Seeds.push_back(&*It++);
+  Seeds.push_back(&*It++);
+  // Constructor
+  sandboxir::SeedBundle SB1(std::move(Seeds), DL);
+  // getNumUnusedBits after constructor
+  EXPECT_EQ(SB1.getNumUnusedBits(), 5 * kFloatBits);
+  // setUsed with index
+  SB1.setUsed(1, DL);
+  // getFirstUnusedElementIdx
+  EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u);
+  EXPECT_EQ(SB1.getNumUnusedBits(), 4 * kFloatBits);
+  SB1.setUsed(unsigned(0), DL);
+  // getFirstUnusedElementIdx not at end
+  EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u);
+  // getSlice
+  auto Slice0 = SB1.getSlice(2, /* MaxVecRegBits */ kFloatBits * 2,
+                             /* ForcePowerOf2 */ true, DL);
+  EXPECT_EQ(Slice0.size(), 2u);
+  auto Slice1 = SB1.getSlice(2, /* MaxVecRegBits */ kFloatBits * 3,
+                             /* ForcePowerOf2 */ false, DL);
+  EXPECT_EQ(Slice1.size(), 3u);
+}

>From 4ab340b8e9cf554837867be3fdc0e19d3fddf420 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Tue, 1 Oct 2024 13:39:22 -0700
Subject: [PATCH 2/4] Address comments

---
 .../SandboxVectorizer/SeedCollector.h         | 32 +++++++++----------
 .../SandboxVectorizer/SeedCollector.cpp       |  4 +--
 .../SandboxVectorizer/SeedCollectorTest.cpp   | 16 +++++-----
 3 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 38088f30c8af61..35e0958a793051 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -22,23 +22,17 @@
 #include <memory>
 
 namespace llvm::sandboxir {
-class Instruction;
-class StoreInst;
-class BasicBlock;
 
-/// An ordered set of Instructions that can be vectorized.
+/// A set of candidate Instructions for vectorizing together.
 class SeedBundle {
 public:
   using SeedList = SmallVector<sandboxir::Instruction *>;
   /// Initialize a bundle with \p I.
-  explicit SeedBundle(sandboxir::Instruction *I, const DataLayout &DL) {
-    insertAt(begin(), I, DL);
-  }
-  explicit SeedBundle(SeedList &&L, const DataLayout &DL)
-      : Seeds(std::move(L)) {
-    for (auto &S : Seeds) {
-      NumUnusedBits += sandboxir::Utils::getNumBits(S, DL);
-    }
+  explicit SeedBundle(sandboxir::Instruction *I) { insertAt(begin(), I); }
+  explicit SeedBundle(SeedList &&L) : Seeds(std::move(L)) {
+    for (auto &S : Seeds)
+      NumUnusedBits +=
+          Utils::getNumBits(S, S->getTopmostLLVMInstruction()->getDataLayout());
   }
   /// No need to allow copies.
   SeedBundle(const SeedBundle &) = delete;
@@ -57,14 +51,15 @@ class SeedBundle {
   /// Insert \p I into position \p P. Clients should choose Pos
   /// by symbol, symbol-offset, and program order (which depends if scheduling
   /// bottom-up or top-down).
-  void insertAt(iterator Pos, sandboxir::Instruction *I, const DataLayout &DL) {
+  void insertAt(iterator Pos, sandboxir::Instruction *I) {
 #ifdef EXPENSIVE_CHECKS
     for (auto Itr : Seeds) {
       assert(*Itr != I && "Attempt to insert an instruction twice.");
     }
 #endif
     Seeds.insert(Pos, I);
-    NumUnusedBits += sandboxir::Utils::getNumBits(I, DL);
+    NumUnusedBits +=
+        Utils::getNumBits(S, S->getTopmostLLVMInstruction()->getDataLayout());
   }
 
   unsigned getFirstUnusedElementIdx() const {
@@ -84,7 +79,9 @@ class SeedBundle {
       UsedLanes.set(Idx);
       UsedLaneCount++;
     }
-    NumUnusedBits -= sandboxir::Utils::getNumBits(Seeds[ElementIdx], DL);
+    NumUnusedBits -= sandboxir::Utils::getNumBits(
+        Seeds[ElementIdx],
+        Utils::getNumBits(S, S->getTopmostLLVMInstruction()->getDataLayout()););
   }
 
   void setUsed(sandboxir::Instruction *V, const DataLayout &DL) {
@@ -94,7 +91,8 @@ class SeedBundle {
     setUsed(Idx, DL, 1, /*VerifyUnused=*/false);
   }
   bool isUsed(unsigned Element) const {
-    return Element >= UsedLanes.size() ? false : UsedLanes.test(Element);
+    // return Element >= UsedLanes.size() ? false : UsedLanes.test(Element);
+    return Element < UsedLanes.size() && UsedLanes.test(Element);
   }
   bool allUsed() const { return UsedLaneCount == Seeds.size(); }
   unsigned getNumUnusedBits() const { return NumUnusedBits; }
@@ -103,7 +101,7 @@ class SeedBundle {
   /// with a total size <= \p MaxVecRegBits. If \p ForcePowOf2 is true, then the
   /// returned slice should have a total number of bits that is a power of 2.
   MutableArrayRef<SeedList> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
-                                     bool ForcePowOf2, const DataLayout &DL);
+                                     bool ForcePowOf2);
 
 protected:
   SeedList Seeds;
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
index 668fea6904f866..a32649af870c79 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 
 MutableArrayRef<sandboxir::SeedBundle::SeedList>
 sandboxir::SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
-                                bool ForcePowerOf2, const DataLayout &DL) {
+                                bool ForcePowerOf2) {
   // Use uint32_t for counts to make it clear we are also using the proper
   // isPowerOf2_[32|64].
 
@@ -44,7 +44,7 @@ sandboxir::SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
     // Stop if we found a used element.
     if (isUsed(FirstGoodElementIdx + ElementCnt))
       break;
-    BitsSum += sandboxir::Utils::getNumBits(S, DL);
+    BitsSum += sandboxir::Utils::getNumBits(S);
     // Stop if the bits sum is over the limit.
     if (BitsSum > MaxVecRegBits)
       break;
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
index 6756ce944f9a91..440241c7e10277 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
@@ -50,12 +50,12 @@ define void @foo(float %v0, float %v1) {
   // Assume test instructions are identical in the number of bits.
   const unsigned kFloatBits = sandboxir::Utils::getNumBits(I0, DL);
   // Constructor
-  sandboxir::SeedBundle SBO(I0, DL);
+  sandboxir::SeedBundle SBO(I0);
   EXPECT_EQ(*SBO.begin(), I0);
   // getNumUnusedBits after constructor
   EXPECT_EQ(SBO.getNumUnusedBits(), kFloatBits);
   // setUsed
-  SBO.setUsed(I0, DL);
+  SBO.setUsed(I0);
   // allUsed
   EXPECT_TRUE(SBO.allUsed());
   // isUsed
@@ -63,7 +63,7 @@ define void @foo(float %v0, float %v1) {
   // getNumUnusedBits after setUsed
   EXPECT_EQ(SBO.getNumUnusedBits(), 0u);
   // insertAt
-  SBO.insertAt(SBO.end(), I1, DL);
+  SBO.insertAt(SBO.end(), I1);
   EXPECT_NE(*SBO.begin(), I1);
   // getNumUnusedBits after insertAt
   EXPECT_EQ(SBO.getNumUnusedBits(), kFloatBits);
@@ -80,22 +80,22 @@ define void @foo(float %v0, float %v1) {
   Seeds.push_back(&*It++);
   Seeds.push_back(&*It++);
   // Constructor
-  sandboxir::SeedBundle SB1(std::move(Seeds), DL);
+  sandboxir::SeedBundle SB1(std::move(Seeds));
   // getNumUnusedBits after constructor
   EXPECT_EQ(SB1.getNumUnusedBits(), 5 * kFloatBits);
   // setUsed with index
-  SB1.setUsed(1, DL);
+  SB1.setUsed(1);
   // getFirstUnusedElementIdx
   EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u);
   EXPECT_EQ(SB1.getNumUnusedBits(), 4 * kFloatBits);
-  SB1.setUsed(unsigned(0), DL);
+  SB1.setUsed(unsigned(0));
   // getFirstUnusedElementIdx not at end
   EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u);
   // getSlice
   auto Slice0 = SB1.getSlice(2, /* MaxVecRegBits */ kFloatBits * 2,
-                             /* ForcePowerOf2 */ true, DL);
+                             /* ForcePowerOf2 */ true);
   EXPECT_EQ(Slice0.size(), 2u);
   auto Slice1 = SB1.getSlice(2, /* MaxVecRegBits */ kFloatBits * 3,
-                             /* ForcePowerOf2 */ false, DL);
+                             /* ForcePowerOf2 */ false);
   EXPECT_EQ(Slice1.size(), 3u);
 }

>From 08e46cc077ddbd9875adf7d64dfb0cbd1f770019 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 2 Oct 2024 18:12:45 -0700
Subject: [PATCH 3/4] Address comments

---
 .../SandboxVectorizer/SeedCollector.h         | 28 ++++++-----
 .../SandboxVectorizer/SeedCollectorTest.cpp   | 46 ++++++++++++-------
 2 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 35e0958a793051..32856d6bd6fc07 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -31,8 +31,7 @@ class SeedBundle {
   explicit SeedBundle(sandboxir::Instruction *I) { insertAt(begin(), I); }
   explicit SeedBundle(SeedList &&L) : Seeds(std::move(L)) {
     for (auto &S : Seeds)
-      NumUnusedBits +=
-          Utils::getNumBits(S, S->getTopmostLLVMInstruction()->getDataLayout());
+      NumUnusedBits += Utils::getNumBits(S);
   }
   /// No need to allow copies.
   SeedBundle(const SeedBundle &) = delete;
@@ -58,8 +57,7 @@ class SeedBundle {
     }
 #endif
     Seeds.insert(Pos, I);
-    NumUnusedBits +=
-        Utils::getNumBits(S, S->getTopmostLLVMInstruction()->getDataLayout());
+    NumUnusedBits += Utils::getNumBits(I);
   }
 
   unsigned getFirstUnusedElementIdx() const {
@@ -69,8 +67,7 @@ class SeedBundle {
     return Seeds.size();
   }
   /// Marks elements as 'used' so that we skip them in `getSlice()`.
-  void setUsed(unsigned ElementIdx, const DataLayout &DL, unsigned Sz = 1,
-               bool VerifyUnused = true) {
+  void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) {
     if (ElementIdx + Sz >= UsedLanes.size())
       UsedLanes.resize(ElementIdx + Sz);
     for (unsigned Idx : seq<unsigned>(ElementIdx, ElementIdx + Sz)) {
@@ -79,19 +76,20 @@ class SeedBundle {
       UsedLanes.set(Idx);
       UsedLaneCount++;
     }
-    NumUnusedBits -= sandboxir::Utils::getNumBits(
-        Seeds[ElementIdx],
-        Utils::getNumBits(S, S->getTopmostLLVMInstruction()->getDataLayout()););
+    NumUnusedBits -= sandboxir::Utils::getNumBits(Seeds[ElementIdx]);
   }
-
-  void setUsed(sandboxir::Instruction *V, const DataLayout &DL) {
-    auto It = std::find(begin(), end(), V);
-    assert(It != end() && "V not in the bundle!");
+  /// Marks instruction \p I "used" within the bundle. Clients
+  /// use this property when assembling a vectorized instruction from
+  /// the seeds in a bundle. This allows constant time evaluation
+  /// and "removal" from the list.
+  void setUsed(sandboxir::Instruction *I) {
+    auto It = std::find(begin(), end(), I);
+    assert(It != end() && "Instruction not in the bundle!");
     auto Idx = It - begin();
-    setUsed(Idx, DL, 1, /*VerifyUnused=*/false);
+    setUsed(Idx, 1, /*VerifyUnused=*/false);
   }
+  /// \Returns whether or not \p Element has been used.
   bool isUsed(unsigned Element) const {
-    // return Element >= UsedLanes.size() ? false : UsedLanes.test(Element);
     return Element < UsedLanes.size() && UsedLanes.test(Element);
   }
   bool allUsed() const { return UsedLaneCount == Seeds.size(); }
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
index 440241c7e10277..9ed7c4017d35ed 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
@@ -29,13 +29,16 @@ struct SeedBundleTest : public testing::Test {
 
 TEST_F(SeedBundleTest, SeedBundle) {
   parseIR(C, R"IR(
-define void @foo(float %v0, float %v1) {
+define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) {
 bb:
-  %add0 = fadd float %v0, %v1
-  %add1 = fadd float %v0, %v1
-  %add2 = fadd float %v0, %v1
-  %add3 = fadd float %v0, %v1
-  %add4 = fadd float %v0, %v1
+  %add0 = fadd float %v0, %v0
+  %add1 = fadd float %v0, %v0
+  %add2 = add i8 %i2, %i2
+  %add3 = add i16 %i1, %i1
+  %add4 = add i32 %i0, %i0
+  %add5 = add i16 %i1, %i1
+  %add6 = add i8 %i2, %i2
+  %add7 = add i8 %i2, %i2
   ret void
 }
 )IR");
@@ -47,13 +50,13 @@ define void @foo(float %v0, float %v1) {
   auto It = BB->begin();
   auto *I0 = &*It++;
   auto *I1 = &*It++;
-  // Assume test instructions are identical in the number of bits.
-  const unsigned kFloatBits = sandboxir::Utils::getNumBits(I0, DL);
+  // Assume first two instructions are identical in the number of bits.
+  const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL);
   // Constructor
   sandboxir::SeedBundle SBO(I0);
   EXPECT_EQ(*SBO.begin(), I0);
   // getNumUnusedBits after constructor
-  EXPECT_EQ(SBO.getNumUnusedBits(), kFloatBits);
+  EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
   // setUsed
   SBO.setUsed(I0);
   // allUsed
@@ -66,36 +69,47 @@ define void @foo(float %v0, float %v1) {
   SBO.insertAt(SBO.end(), I1);
   EXPECT_NE(*SBO.begin(), I1);
   // getNumUnusedBits after insertAt
-  EXPECT_EQ(SBO.getNumUnusedBits(), kFloatBits);
+  EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
   // allUsed
   EXPECT_FALSE(SBO.allUsed());
   // getFirstUnusedElement
   EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u);
 
   sandboxir::SeedBundle::SeedList Seeds;
-  It = BB->begin();
+  // add2 through add7
   Seeds.push_back(&*It++);
   Seeds.push_back(&*It++);
   Seeds.push_back(&*It++);
   Seeds.push_back(&*It++);
   Seeds.push_back(&*It++);
+  Seeds.push_back(&*It++);
+  unsigned BundleBits = 0;
+  for (auto &S : Seeds)
+    BundleBits += sandboxir::Utils::getNumBits(S);
+  // Ensure the instructions are as expected.
+  EXPECT_EQ(BundleBits, 88u);
   // Constructor
   sandboxir::SeedBundle SB1(std::move(Seeds));
   // getNumUnusedBits after constructor
-  EXPECT_EQ(SB1.getNumUnusedBits(), 5 * kFloatBits);
+  EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits);
   // setUsed with index
   SB1.setUsed(1);
   // getFirstUnusedElementIdx
   EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u);
-  EXPECT_EQ(SB1.getNumUnusedBits(), 4 * kFloatBits);
   SB1.setUsed(unsigned(0));
   // getFirstUnusedElementIdx not at end
   EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u);
   // getSlice
-  auto Slice0 = SB1.getSlice(2, /* MaxVecRegBits */ kFloatBits * 2,
+  auto Slice0 = SB1.getSlice(2, /* MaxVecRegBits */ 64,
                              /* ForcePowerOf2 */ true);
-  EXPECT_EQ(Slice0.size(), 2u);
-  auto Slice1 = SB1.getSlice(2, /* MaxVecRegBits */ kFloatBits * 3,
+  EXPECT_EQ(Slice0.size(), 4u);
+  SB1.setUsed(2);
+  auto Slice1 = SB1.getSlice(3, /* MaxVecRegBits */ 64,
                              /* ForcePowerOf2 */ false);
   EXPECT_EQ(Slice1.size(), 3u);
+  // getSlice empty case
+  SB1.setUsed(3);
+  auto Slice2 = SB1.getSlice(4, /* MaxVecRegBits */ 8,
+                             /* ForcePowerOf2 */ true);
+  EXPECT_EQ(Slice2.size(), 0u);
 }

>From 1ee5eac019b8a3ffa3536cff26e9d6bf4f57d62d Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 3 Oct 2024 15:46:50 -0700
Subject: [PATCH 4/4] More comments

---
 .../SandboxVectorizer/SeedCollector.h         | 37 +++++-----
 .../SandboxVectorizer/SeedCollector.cpp       | 73 +++++++++----------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 32856d6bd6fc07..12dd35386018dd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -26,9 +26,9 @@ namespace llvm::sandboxir {
 /// A set of candidate Instructions for vectorizing together.
 class SeedBundle {
 public:
-  using SeedList = SmallVector<sandboxir::Instruction *>;
+  using SeedList = SmallVector<Instruction *>;
   /// Initialize a bundle with \p I.
-  explicit SeedBundle(sandboxir::Instruction *I) { insertAt(begin(), I); }
+  explicit SeedBundle(Instruction *I) { insertAt(begin(), I); }
   explicit SeedBundle(SeedList &&L) : Seeds(std::move(L)) {
     for (auto &S : Seeds)
       NumUnusedBits += Utils::getNumBits(S);
@@ -45,12 +45,12 @@ class SeedBundle {
   const_iterator begin() const { return Seeds.begin(); }
   const_iterator end() const { return Seeds.end(); }
 
-  sandboxir::Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; }
+  Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; }
 
   /// Insert \p I into position \p P. Clients should choose Pos
   /// by symbol, symbol-offset, and program order (which depends if scheduling
   /// bottom-up or top-down).
-  void insertAt(iterator Pos, sandboxir::Instruction *I) {
+  void insertAt(iterator Pos, Instruction *I) {
 #ifdef EXPENSIVE_CHECKS
     for (auto Itr : Seeds) {
       assert(*Itr != I && "Attempt to insert an instruction twice.");
@@ -66,7 +66,17 @@ class SeedBundle {
         return ElmIdx;
     return Seeds.size();
   }
-  /// Marks elements as 'used' so that we skip them in `getSlice()`.
+  /// Marks instruction \p I "used" within the bundle. Clients
+  /// use this property when assembling a vectorized instruction from
+  /// the seeds in a bundle. This allows constant time evaluation
+  /// and "removal" from the list.
+  void setUsed(Instruction *I) {
+    auto It = std::find(begin(), end(), I);
+    assert(It != end() && "Instruction not in the bundle!");
+    auto Idx = It - begin();
+    setUsed(Idx, 1, /*VerifyUnused=*/false);
+  }
+
   void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) {
     if (ElementIdx + Sz >= UsedLanes.size())
       UsedLanes.resize(ElementIdx + Sz);
@@ -76,17 +86,7 @@ class SeedBundle {
       UsedLanes.set(Idx);
       UsedLaneCount++;
     }
-    NumUnusedBits -= sandboxir::Utils::getNumBits(Seeds[ElementIdx]);
-  }
-  /// Marks instruction \p I "used" within the bundle. Clients
-  /// use this property when assembling a vectorized instruction from
-  /// the seeds in a bundle. This allows constant time evaluation
-  /// and "removal" from the list.
-  void setUsed(sandboxir::Instruction *I) {
-    auto It = std::find(begin(), end(), I);
-    assert(It != end() && "Instruction not in the bundle!");
-    auto Idx = It - begin();
-    setUsed(Idx, 1, /*VerifyUnused=*/false);
+    NumUnusedBits -= Utils::getNumBits(Seeds[ElementIdx]);
   }
   /// \Returns whether or not \p Element has been used.
   bool isUsed(unsigned Element) const {
@@ -96,8 +96,9 @@ class SeedBundle {
   unsigned getNumUnusedBits() const { return NumUnusedBits; }
 
   /// \Returns a slice of seed elements, starting at the element \p StartIdx,
-  /// with a total size <= \p MaxVecRegBits. If \p ForcePowOf2 is true, then the
-  /// returned slice should have a total number of bits that is a power of 2.
+  /// with a total size <= \p MaxVecRegBits, or an empty slice if the
+  /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
+  /// slice will have a total number of bits that is a power of 2.
   MutableArrayRef<SeedList> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
                                      bool ForcePowOf2);
 
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
index a32649af870c79..56d6dc2fd0038b 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -18,46 +18,45 @@
 #include <span>
 
 using namespace llvm;
+namespace llvm::sandboxir {
 
-MutableArrayRef<sandboxir::SeedBundle::SeedList>
-sandboxir::SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
-                                bool ForcePowerOf2) {
-  // Use uint32_t for counts to make it clear we are also using the proper
-  // isPowerOf2_[32|64].
+MutableArrayRef<SeedBundle::SeedList>
+SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+                     bool ForcePowerOf2) {
+  // Use uint32_t here for compatibility with IsPowerOf2_32
 
-  // Count both the bits and the elements of the slice we are about to build.
-  // The bits tell us whether this is a legal slice (that is <= MaxVecRegBits),
-  // and the num of elements help us do the actual slicing.
-  uint32_t BitsSum = 0;
-  // As we are collecting slice elements we may go over the limit, so we need to
-  // remember the last legal one. This is used for the creation of the slice.
-  uint32_t LastGoodBitsSum = 0;
-  uint32_t LastGoodNumSliceElements = 0;
-  // Skip any used elements (which have already been handled) and all below
-  // `StartIdx`.
-  assert(StartIdx >= getFirstUnusedElementIdx() &&
-         "Expected unused at StartIdx");
-  uint32_t FirstGoodElementIdx = StartIdx;
-  // Go through elements starting at FirstGoodElementIdx.
-  for (auto [ElementCnt, S] : enumerate(make_range(
-           std::next(Seeds.begin(), FirstGoodElementIdx), Seeds.end()))) {
-    // Stop if we found a used element.
-    if (isUsed(FirstGoodElementIdx + ElementCnt))
+  // BitCount tracks the size of the working slice. From that we can tell
+  // when the working slice's size is a power-of-two and when it exceeds
+  // the legal size in MaxVecBits.
+  uint32_t BitCount = 0;
+  uint32_t NumElements = 0;
+  // Can't start a slice with a used instruction.
+  assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
+  for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
+    uint32_t InstBits = Utils::getNumBits(S);
+    // Stop if this instruction is used, or if adding it puts the slice over
+    // the limit.
+    if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
       break;
-    BitsSum += sandboxir::Utils::getNumBits(S);
-    // Stop if the bits sum is over the limit.
-    if (BitsSum > MaxVecRegBits)
-      break;
-    // If forcing a power-of-2 bit-size we check if this bit size is accepted.
-    if (ForcePowerOf2 && !isPowerOf2_32(BitsSum))
-      continue;
-    LastGoodBitsSum = BitsSum;
-    LastGoodNumSliceElements = ElementCnt + 1;
+    NumElements++;
+    BitCount += Utils::getNumBits(S);
   }
-  if (LastGoodNumSliceElements < 2)
-    return {};
-  if (LastGoodBitsSum == 0)
+  // Most slices will already be power-of-two-sized. But this one isn't, remove
+  // instructions until it is. This could be tracked in the loop above but the
+  // logic is harder to follow. TODO: Move if performance is unacceptable.
+  if (ForcePowerOf2) {
+    while (!isPowerOf2_32(BitCount) && NumElements > 1) {
+      BitCount -= Utils::getNumBits(Seeds[StartIdx + NumElements - 1]);
+      NumElements--;
+    }
+  }
+
+  // Return any non-empty slice
+  if (NumElements > 1)
+    return MutableArrayRef<SeedBundle::SeedList>(&Seeds + StartIdx,
+                                                 NumElements);
+  else
     return {};
-  return MutableArrayRef<sandboxir::SeedBundle::SeedList>(
-      &Seeds + FirstGoodElementIdx, LastGoodNumSliceElements);
 }
+
+} // namespace llvm::sandboxir



More information about the llvm-commits mailing list