[llvm] [SLPVectorizer] minor tweaks around lambdas for compatibilty with older compilers (PR #122348)

Thu Jan 9 15:05:34 PST 2025

https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/122348

>From 7ef3e241257965c82e6f1534a3067a729d9fc4ba Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Sun, 22 Dec 2024 21:23:09 +0000
Subject: [PATCH 1/2] [SLPVectorizer] minor tweaks around lambdas for
 compatibilty with some older compilers

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 36fed8937aec28..8e361f505419df 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6923,8 +6923,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
                              Loads.size());
     Align Alignment = computeCommonAlignment<LoadInst>(Values);
     auto *Ty = getWidenedType(Loads.front()->getType(), Loads.size());
-    return TTI->isLegalMaskedGather(Ty, Alignment) &&
-           !TTI->forceScalarizeMaskedGather(Ty, Alignment);
+    return this->TTI->isLegalMaskedGather(Ty, Alignment) &&
+           !this->TTI->forceScalarizeMaskedGather(Ty, Alignment);
   };
 
   auto GetVectorizedRanges = [this](ArrayRef<LoadInst *> Loads,
@@ -7085,9 +7085,10 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
           }
           SmallVector<std::pair<LoadInst *, int>> LocalLoadsDists(LoadsDists);
           SmallVector<LoadInst *> OriginalLoads(LocalLoadsDists.size());
-          transform(
-              LoadsDists, OriginalLoads.begin(),
-              [](const std::pair<LoadInst *, int> &L) { return L.first; });
+          transform(LoadsDists, OriginalLoads.begin(),
+                    [](const std::pair<LoadInst *, int> &L) -> LoadInst * {
+                      return L.first;
+                    });
           stable_sort(LocalLoadsDists, LoadSorter);
           SmallVector<LoadInst *> Loads;
           unsigned MaxConsecutiveDistance = 0;
@@ -7314,7 +7315,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
     if (!Ref.empty() && !NonVectorized.empty() &&
         std::accumulate(
             Ref.begin(), Ref.end(), 0u,
-            [](unsigned S, ArrayRef<std::pair<LoadInst *, int>> LoadsDists) {
+            [](unsigned S,
+               ArrayRef<std::pair<LoadInst *, int>> LoadsDists) -> unsigned {
               return S + LoadsDists.size();
             }) != NonVectorized.size() &&
         IsMaskedGatherSupported(NonVectorized)) {
@@ -17003,8 +17005,8 @@ void BoUpSLP::optimizeGatherSequence() {
     // Check if the last undefs actually change the final number of used vector
     // registers.
     return SM1.size() - LastUndefsCnt > 1 &&
-           TTI->getNumberOfParts(SI1->getType()) ==
-               TTI->getNumberOfParts(
+           this->TTI->getNumberOfParts(SI1->getType()) ==
+               this->TTI->getNumberOfParts(
                    getWidenedType(SI1->getType()->getElementType(),
                                   SM1.size() - LastUndefsCnt));
   };
@@ -17784,8 +17786,8 @@ bool BoUpSLP::collectValuesToDemote(
       const unsigned VF = E.Scalars.size();
       Type *OrigScalarTy = E.Scalars.front()->getType();
       if (UniqueBases.size() <= 2 ||
-          TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
-              TTI->getNumberOfParts(getWidenedType(
+          this->TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
+              this->TTI->getNumberOfParts(getWidenedType(
                   IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
         ToDemote.push_back(E.Idx);
     }

>From 07c41f2ceddccb367ac48e5eddce99989f6ec803 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 9 Jan 2025 23:05:24 +0000
Subject: [PATCH 2/2] address comments

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8e361f505419df..e3cea4da175495 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6918,13 +6918,13 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
     return L1.second > L2.second;
   };
 
-  auto IsMaskedGatherSupported = [&](ArrayRef<LoadInst *> Loads) {
+  auto IsMaskedGatherSupported = [&, TTI = TTI](ArrayRef<LoadInst *> Loads) {
     ArrayRef<Value *> Values(reinterpret_cast<Value *const *>(Loads.begin()),
                              Loads.size());
     Align Alignment = computeCommonAlignment<LoadInst>(Values);
     auto *Ty = getWidenedType(Loads.front()->getType(), Loads.size());
-    return this->TTI->isLegalMaskedGather(Ty, Alignment) &&
-           !this->TTI->forceScalarizeMaskedGather(Ty, Alignment);
+    return TTI->isLegalMaskedGather(Ty, Alignment) &&
+           !TTI->forceScalarizeMaskedGather(Ty, Alignment);
   };
 
   auto GetVectorizedRanges = [this](ArrayRef<LoadInst *> Loads,
@@ -16972,8 +16972,9 @@ void BoUpSLP::optimizeGatherSequence() {
   // and its mask indeces are the same as in the first one or undefs. E.g.
   // shuffle %0, poison, <0, 0, 0, undef> is less defined than shuffle %0,
   // poison, <0, 0, 0, 0>.
-  auto &&IsIdenticalOrLessDefined = [this](Instruction *I1, Instruction *I2,
-                                           SmallVectorImpl<int> &NewMask) {
+  auto &&IsIdenticalOrLessDefined = [TTI = TTI](Instruction *I1,
+                                                Instruction *I2,
+                                                SmallVectorImpl<int> &NewMask) {
     if (I1->getType() != I2->getType())
       return false;
     auto *SI1 = dyn_cast<ShuffleVectorInst>(I1);
@@ -17005,8 +17006,8 @@ void BoUpSLP::optimizeGatherSequence() {
     // Check if the last undefs actually change the final number of used vector
     // registers.
     return SM1.size() - LastUndefsCnt > 1 &&
-           this->TTI->getNumberOfParts(SI1->getType()) ==
-               this->TTI->getNumberOfParts(
+           TTI->getNumberOfParts(SI1->getType()) ==
+               TTI->getNumberOfParts(
                    getWidenedType(SI1->getType()->getElementType(),
                                   SM1.size() - LastUndefsCnt));
   };
@@ -17767,7 +17768,7 @@ bool BoUpSLP::collectValuesToDemote(
     BitWidth = std::max(BitWidth, BitWidth1);
     return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
   };
-  auto FinalAnalysis = [&]() {
+  auto FinalAnalysis = [&, TTI = TTI]() {
     if (!IsProfitableToDemote)
       return false;
     bool Res = all_of(
@@ -17786,8 +17787,8 @@ bool BoUpSLP::collectValuesToDemote(
       const unsigned VF = E.Scalars.size();
       Type *OrigScalarTy = E.Scalars.front()->getType();
       if (UniqueBases.size() <= 2 ||
-          this->TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
-              this->TTI->getNumberOfParts(getWidenedType(
+          TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
+              TTI->getNumberOfParts(getWidenedType(
                   IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
         ToDemote.push_back(E.Idx);
     }