[llvm] [SLP] Collect candidate VFs in vector in vectorizeStores (NFC). (PR #82793)

Wed Feb 28 08:14:16 PST 2024

https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/82793

>From e189eec90a234d77cf7dc3fd5a6be65f8e84ad54 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 23 Feb 2024 16:52:57 +0000
Subject: [PATCH 1/4] [SLP] Collect candidate VFs in vector in vectorizeStores
 (NFC).

This is in preparation for
https://github.com/llvm/llvm-project/pull/77790 and makes it easy to add
other, non-power-of-2 VFs for processing.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index de4e56ff80659a..8ee840e97e94b7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13918,10 +13918,14 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
                           << "MinVF (" << MinVF << ")\n");
       }
 
-      // FIXME: Is division-by-2 the correct step? Should we assert that the
-      // register size is a power-of-2?
-      unsigned StartIdx = 0;
+      SmallVector<unsigned> CandidateVFs;
       for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
+        // FIXME: Is division-by-2 the correct step? Should we assert that the
+        // register size is a power-of-2?
+        CandidateVFs.push_back(Size);
+      }
+      unsigned StartIdx = 0;
+      for (unsigned Size : CandidateVFs) {
         for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
           ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
           assert(

>From 8e7339aa06534156d18c9d179c457aeea16755cc Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 28 Feb 2024 16:03:14 +0000
Subject: [PATCH 2/4] [SLP] Exit early .

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 743eaf9039b285..f7bfb0d506e3e0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13912,10 +13912,11 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
       unsigned MinVF = TTI->getStoreMinimumVF(
           R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
 
-      if (MaxVF <= MinVF) {
+      if (MaxVF < MinVF) {
         LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
                           << ") <= "
                           << "MinVF (" << MinVF << ")\n");
+        return;
       }
 
       SmallVector<unsigned> CandidateVFs;

>From 3eacfa64a35f0aed09180c00cbb5272c4a4c9ca0 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 28 Feb 2024 16:03:14 +0000
Subject: [PATCH 3/4] [SLP] Exit early if MaxVF < MinVF (NFCI).

Exit early if MaxVF < MinVF. In that case, the loop body below will
never get entered. Note that this adjusts the condition from MaxVF <=
MinVF. If MaxVF == MinVF, vectorization may still be feasible (and the
loop below gets entered).
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2b7d518c1c1a78..e381cd2c5794b1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13912,10 +13912,11 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
       unsigned MinVF = TTI->getStoreMinimumVF(
           R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
 
-      if (MaxVF <= MinVF) {
+      if (MaxVF < MinVF) {
         LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
                           << ") <= "
                           << "MinVF (" << MinVF << ")\n");
+        return;
       }
 
       // FIXME: Is division-by-2 the correct step? Should we assert that the

>From 8b6b0e820792b1950abfebb5e3b8cb8122628d62 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 28 Feb 2024 16:13:04 +0000
Subject: [PATCH 4/4] !fixup use for_each.

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f7bfb0d506e3e0..9dce67328d95b0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13919,12 +13919,15 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
         return;
       }
 
-      SmallVector<unsigned> CandidateVFs;
-      for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
-        // FIXME: Is division-by-2 the correct step? Should we assert that the
-        // register size is a power-of-2?
-        CandidateVFs.push_back(Size);
-      }
+      unsigned Sz = 1 + Log2_32(MaxVF) - Log2_32(MinVF);
+      SmallVector<unsigned> CandidateVFs(Sz);
+      // FIXME: Is division-by-2 the correct step? Should we assert that the
+      // register size is a power-of-2?
+      unsigned Size = MaxVF;
+      for_each(CandidateVFs, [&](unsigned &VF) {
+        VF = Size;
+        Size /= 2;
+      });
       unsigned StartIdx = 0;
       for (unsigned Size : CandidateVFs) {
         for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {