[llvm] [SandboxVec][BottomUpVec] Use SeedCollector and slice seeds (PR #120826)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 9 10:01:03 PST 2025


https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/120826

>From 2343a4618bbbabe4073de461771ebfcdf4c7b197 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Wed, 6 Nov 2024 11:53:02 -0800
Subject: [PATCH] [SandboxVec][BottomUpVec] Use SeedCollector and slice seeds

---
 llvm/include/llvm/SandboxIR/Pass.h            |  6 +-
 llvm/include/llvm/SandboxIR/Utils.h           |  7 +-
 .../Vectorize/SandboxVectorizer/Legality.h    |  1 +
 .../Vectorize/SandboxVectorizer/Scheduler.h   |  7 ++
 .../SandboxVectorizer/SeedCollector.h         |  4 +-
 .../Vectorize/SandboxVectorizer/VecUtils.h    | 10 ++
 .../SandboxVectorizer/Passes/BottomUpVec.cpp  | 91 +++++++++++++++----
 .../SandboxVectorizer/SandboxVectorizer.cpp   |  2 +-
 .../SandboxVectorizer/SeedCollector.cpp       | 17 ++--
 .../SandboxVectorizer/bottomup_basic.ll       |  2 +-
 .../SandboxVectorizer/bottomup_seed_slice.ll  | 33 +++++++
 .../bottomup_seed_slice_pow2.ll               | 37 ++++++++
 .../SandboxVectorizer/VecUtilsTest.cpp        | 11 +++
 13 files changed, 196 insertions(+), 32 deletions(-)
 create mode 100644 llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
 create mode 100644 llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll

diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h
index 4f4eae87cd3ff7..267389a8a87a2e 100644
--- a/llvm/include/llvm/SandboxIR/Pass.h
+++ b/llvm/include/llvm/SandboxIR/Pass.h
@@ -16,6 +16,7 @@ namespace llvm {
 
 class AAResults;
 class ScalarEvolution;
+class TargetTransformInfo;
 
 namespace sandboxir {
 
@@ -25,15 +26,18 @@ class Region;
 class Analyses {
   AAResults *AA = nullptr;
   ScalarEvolution *SE = nullptr;
+  TargetTransformInfo *TTI = nullptr;
 
   Analyses() = default;
 
 public:
-  Analyses(AAResults &AA, ScalarEvolution &SE) : AA(&AA), SE(&SE) {}
+  Analyses(AAResults &AA, ScalarEvolution &SE, TargetTransformInfo &TTI)
+      : AA(&AA), SE(&SE), TTI(&TTI) {}
 
 public:
   AAResults &getAA() const { return *AA; }
   ScalarEvolution &getScalarEvolution() const { return *SE; }
+  TargetTransformInfo &getTTI() const { return *TTI; }
   /// For use by unit tests.
   static Analyses emptyForTesting() { return Analyses(); }
 };
diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h
index a73498adea1d59..d58fe522143953 100644
--- a/llvm/include/llvm/SandboxIR/Utils.h
+++ b/llvm/include/llvm/SandboxIR/Utils.h
@@ -60,11 +60,16 @@ class Utils {
         getUnderlyingObject(LSI->getPointerOperand()->Val));
   }
 
+  /// \Returns the number of bits of \p Ty.
+  static unsigned getNumBits(Type *Ty, const DataLayout &DL) {
+    return DL.getTypeSizeInBits(Ty->LLVMTy);
+  }
+
   /// \Returns the number of bits required to represent the operands or return
   /// value of \p V in \p DL.
   static unsigned getNumBits(Value *V, const DataLayout &DL) {
     Type *Ty = getExpectedType(V);
-    return DL.getTypeSizeInBits(Ty->LLVMTy);
+    return getNumBits(Ty, DL);
   }
 
   /// \Returns the number of bits required to represent the operands or
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 63d6ef31c86453..233cf82a1b3dfb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -177,6 +177,7 @@ class LegalityAnalysis {
   // TODO: Try to remove the SkipScheduling argument by refactoring the tests.
   const LegalityResult &canVectorize(ArrayRef<Value *> Bndl,
                                      bool SkipScheduling = false);
+  void clear() { Sched.clear(); }
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
index 3959f84c601e04..1e8c0101cf77cd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -143,6 +143,13 @@ class Scheduler {
   ~Scheduler() {}
 
   bool trySchedule(ArrayRef<Instruction *> Instrs);
+  /// Clear the scheduler's state, including the DAG.
+  void clear() {
+    Bndls.clear();
+    // TODO: clear view once it lands.
+    DAG.clear();
+    ScheduleTopItOpt = std::nullopt;
+  }
 
 #ifndef NDEBUG
   void dump(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 6e16a84d832e5e..73b2bdf8f181f6 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -95,8 +95,8 @@ class SeedBundle {
   /// with a total size <= \p MaxVecRegBits, or an empty slice if the
   /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
   /// slice will have a total number of bits that is a power of 2.
-  MutableArrayRef<Instruction *>
-  getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2);
+  ArrayRef<Instruction *> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+                                   bool ForcePowOf2);
 
   /// \Returns the number of seed elements in the bundle.
   std::size_t size() const { return Seeds.size(); }
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index fc9d67fcfcdec4..28fa33656dd5f4 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -133,6 +133,16 @@ class VecUtils {
     assert(tryGetCommonScalarType(Bndl) && "Expected common scalar type!");
     return ScalarTy;
   }
+  /// \Returns the first integer power of 2 that is <= Num.
+  static unsigned getFloorPowerOf2(unsigned Num) {
+    if (Num == 0)
+      return Num;
+    unsigned Mask = Num;
+    Mask >>= 1;
+    for (unsigned ShiftBy = 1; ShiftBy < sizeof(Num) * 8; ShiftBy <<= 1)
+      Mask |= Mask >> ShiftBy;
+    return Num & ~Mask;
+  }
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index a2ea11be59b8ed..18e072c17d202b 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -8,29 +8,31 @@
 
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/SandboxIR/Function.h"
 #include "llvm/SandboxIR/Instruction.h"
 #include "llvm/SandboxIR/Module.h"
 #include "llvm/SandboxIR/Utils.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
 
-namespace llvm::sandboxir {
+namespace llvm {
+
+static cl::opt<unsigned>
+    OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden,
+                       cl::desc("Override the vector register size in bits, "
+                                "which is otherwise found by querying TTI."));
+static cl::opt<bool>
+    AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden,
+                 cl::desc("Allow non-power-of-2 vectorization."));
+
+namespace sandboxir {
 
 BottomUpVec::BottomUpVec(StringRef Pipeline)
     : FunctionPass("bottom-up-vec"),
       RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {}
 
-// TODO: This is a temporary function that returns some seeds.
-//       Replace this with SeedCollector's function when it lands.
-static llvm::SmallVector<Value *, 4> collectSeeds(BasicBlock &BB) {
-  llvm::SmallVector<Value *, 4> Seeds;
-  for (auto &I : BB)
-    if (auto *SI = llvm::dyn_cast<StoreInst>(&I))
-      Seeds.push_back(SI);
-  return Seeds;
-}
-
 static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl,
                                           unsigned OpIdx) {
   SmallVector<Value *, 4> Operands;
@@ -265,6 +267,7 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth) {
 
 bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
   DeadInstrCandidates.clear();
+  Legality->clear();
   vectorizeRec(Bndl, /*Depth=*/0);
   tryEraseDeadInstrs();
   return Change;
@@ -275,17 +278,67 @@ bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) {
       A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
       F.getContext());
   Change = false;
+  const auto &DL = F.getParent()->getDataLayout();
+  unsigned VecRegBits =
+      OverrideVecRegBits != 0
+          ? OverrideVecRegBits
+          : A.getTTI()
+                .getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
+                .getFixedValue();
+
   // TODO: Start from innermost BBs first
   for (auto &BB : F) {
-    // TODO: Replace with proper SeedCollector function.
-    auto Seeds = collectSeeds(BB);
-    // TODO: Slice Seeds into smaller chunks.
-    // TODO: If vectorization succeeds, run the RegionPassManager on the
-    // resulting region.
-    if (Seeds.size() >= 2)
-      Change |= tryVectorize(Seeds);
+    SeedCollector SC(&BB, A.getScalarEvolution());
+    for (SeedBundle &Seeds : SC.getStoreSeeds()) {
+      unsigned ElmBits =
+          Utils::getNumBits(VecUtils::getElementType(Utils::getExpectedType(
+                                Seeds[Seeds.getFirstUnusedElementIdx()])),
+                            DL);
+
+      auto DivideBy2 = [](unsigned Num) {
+        auto Floor = VecUtils::getFloorPowerOf2(Num);
+        if (Floor == Num)
+          return Floor / 2;
+        return Floor;
+      };
+      // Try to create the largest vector supported by the target. If it fails
+      // reduce the vector size by half.
+      for (unsigned SliceElms = std::min(VecRegBits / ElmBits,
+                                         Seeds.getNumUnusedBits() / ElmBits);
+           SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
+        if (Seeds.allUsed())
+          break;
+        // Keep trying offsets after FirstUnusedElementIdx, until we vectorize
+        // the slice. This could be quite expensive, so we enforce a limit.
+        for (unsigned Offset = Seeds.getFirstUnusedElementIdx(),
+                      OE = Seeds.size();
+             Offset + 1 < OE; Offset += 1) {
+          // Seeds are getting used as we vectorize, so skip them.
+          if (Seeds.isUsed(Offset))
+            continue;
+          if (Seeds.allUsed())
+            break;
+
+          auto SeedSlice =
+              Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2);
+          if (SeedSlice.empty())
+            continue;
+
+          assert(SeedSlice.size() >= 2 && "Should have been rejected!");
+
+          // TODO: If vectorization succeeds, run the RegionPassManager on the
+          // resulting region.
+
+          // TODO: Refactor to remove the unnecessary copy to SeedSliceVals.
+          SmallVector<Value *> SeedSliceVals(SeedSlice.begin(),
+                                             SeedSlice.end());
+          Change |= tryVectorize(SeedSliceVals);
+        }
+      }
+    }
   }
   return Change;
 }
 
-} // namespace llvm::sandboxir
+} // namespace sandboxir
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index c22eb01d74a1cb..a6e2b40000529a 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -86,6 +86,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) {
 
   // Create SandboxIR for LLVMF and run BottomUpVec on it.
   sandboxir::Function &F = *Ctx->createFunction(&LLVMF);
-  sandboxir::Analyses A(*AA, *SE);
+  sandboxir::Analyses A(*AA, *SE, *TTI);
   return FPM.runOnFunction(F, A);
 }
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
index 6ea34c5e0598df..a3ce663407c4a9 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -31,9 +31,9 @@ cl::opt<unsigned> SeedGroupsLimit(
     cl::desc("Limit the number of collected seeds groups in a BB to "
              "cap compilation time."));
 
-MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
-                                                    unsigned MaxVecRegBits,
-                                                    bool ForcePowerOf2) {
+ArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
+                                             unsigned MaxVecRegBits,
+                                             bool ForcePowerOf2) {
   // Use uint32_t here for compatibility with IsPowerOf2_32
 
   // BitCount tracks the size of the working slice. From that we can tell
@@ -47,10 +47,13 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
   // Can't start a slice with a used instruction.
   assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
   for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
+    // Stop if this instruction is used. This needs to be done before
+    // getNumBits() because a "used" instruction may have been erased.
+    if (isUsed(StartIdx + NumElements))
+      break;
     uint32_t InstBits = Utils::getNumBits(S);
-    // Stop if this instruction is used, or if adding it puts the slice over
-    // the limit.
-    if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
+    // Stop if adding it puts the slice over the limit.
+    if (BitCount + InstBits > MaxVecRegBits)
       break;
     NumElements++;
     BitCount += InstBits;
@@ -68,7 +71,7 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
          "Must be a power of two");
   // Return any non-empty slice
   if (NumElements > 1)
-    return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
+    return ArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
   else
     return {};
 }
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index 7422d287ff3e2a..785d1f4ef666fc 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=sandbox-vectorizer -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
 
 define void @store_load(ptr %ptr) {
 ; CHECK-LABEL: define void @store_load(
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
new file mode 100644
index 00000000000000..46cda3c80aaa35
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+
+
+declare void @foo()
+define void @slice_seeds(ptr %ptr, float %val) {
+; CHECK-LABEL: define void @slice_seeds(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; CHECK-NEXT:    [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; CHECK-NEXT:    store float [[LD2]], ptr [[PTR2]], align 4
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; CHECK-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+  %ld2 = load float, ptr %ptr2
+  store float %ld2, ptr %ptr2
+  ; This call blocks scheduling of all 3 stores.
+  call void @foo()
+
+  %ld0 = load float, ptr %ptr0
+  %ld1 = load float, ptr %ptr1
+  store float %ld0, ptr %ptr0
+  store float %ld1, ptr %ptr1
+  ret void
+}
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
new file mode 100644
index 00000000000000..22119c4491b929
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=POW2
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=NON-POW2
+
+define void @pow2(ptr %ptr, float %val) {
+; POW2-LABEL: define void @pow2(
+; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; POW2-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; POW2-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; POW2-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; POW2-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; POW2-NEXT:    [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; POW2-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; POW2-NEXT:    store float [[LD2]], ptr [[PTR2]], align 4
+; POW2-NEXT:    ret void
+;
+; NON-POW2-LABEL: define void @pow2(
+; NON-POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; NON-POW2-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; NON-POW2-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; NON-POW2-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; NON-POW2-NEXT:    [[PACK2:%.*]] = load <3 x float>, ptr [[PTR0]], align 4
+; NON-POW2-NEXT:    store <3 x float> [[PACK2]], ptr [[PTR0]], align 4
+; NON-POW2-NEXT:    ret void
+;
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+  %ld0 = load float, ptr %ptr0
+  %ld1 = load float, ptr %ptr1
+  %ld2 = load float, ptr %ptr2
+  store float %ld0, ptr %ptr0
+  store float %ld1, ptr %ptr1
+  store float %ld2, ptr %ptr2
+  ret void
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
index cf7b6cbc7e55cb..8661dcd5067c0a 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -472,3 +472,14 @@ define void @foo(i8 %v, ptr %ptr) {
 #endif // NDEBUG
   }
 }
+
+TEST_F(VecUtilsTest, FloorPowerOf2) {
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(0), 0u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(1 << 0), 1u << 0);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(3), 2u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(4), 4u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(5), 4u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(7), 4u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u);
+}



More information about the llvm-commits mailing list