[llvm] a96a0de - [SLP]Fix the matching of the nodes with the same scalars, but reused

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 7 10:45:40 PST 2026


Author: Alexey Bataev
Date: 2026-03-07T10:29:34-08:00
New Revision: a96a0ded25c63e2a9d5838fe50e578a3339743fd

URL: https://github.com/llvm/llvm-project/commit/a96a0ded25c63e2a9d5838fe50e578a3339743fd
DIFF: https://github.com/llvm/llvm-project/commit/a96a0ded25c63e2a9d5838fe50e578a3339743fd.diff

LOG: [SLP]Fix the matching of the nodes with the same scalars, but reused

If the scalars are reused and the ReuseShuffleIndices is set, we may
miss matching for the buildvector/gather nodes and add an extra cost

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/RISCV/same-node-reused.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 631af61e2cfba..8bc16972beddc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -18224,12 +18224,19 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
   SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
   SmallDenseMap<Value *, int> UsedValuesEntry;
   SmallPtrSet<const Value *, 16> VisitedValue;
+  bool IsReusedNodeFound = false;
   auto CheckAndUseSameNode = [&](const TreeEntry *TEPtr) {
     // The node is reused - exit.
+    if (IsReusedNodeFound)
+      return false;
     if ((TEPtr->getVectorFactor() != VL.size() &&
          TEPtr->Scalars.size() != VL.size()) ||
         (!TEPtr->isSame(VL) && !TEPtr->isSame(TE->Scalars)))
       return false;
+    IsReusedNodeFound =
+        equal(TE->Scalars, TEPtr->Scalars) &&
+        equal(TE->ReorderIndices, TEPtr->ReorderIndices) &&
+        equal(TE->ReuseShuffleIndices, TEPtr->ReuseShuffleIndices);
     UsedTEs.clear();
     UsedTEs.emplace_back().insert(TEPtr);
     for (Value *V : VL) {
@@ -18424,6 +18431,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
         VToTEs.insert(VTE);
       }
     }
+    if (IsReusedNodeFound)
+      break;
     if (VToTEs.empty())
       continue;
     if (UsedTEs.empty()) {
@@ -18482,7 +18491,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
       return EntryPtr->isSame(VL) || EntryPtr->isSame(TE->Scalars);
     });
     if (It != FirstEntries.end() &&
-        ((*It)->getVectorFactor() == VL.size() ||
+        (IsReusedNodeFound || (*It)->getVectorFactor() == VL.size() ||
          ((*It)->getVectorFactor() == TE->Scalars.size() &&
           TE->ReuseShuffleIndices.size() == VL.size() &&
           (*It)->isSame(TE->Scalars)))) {
@@ -20149,9 +20158,18 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
         // Reset the builder(s) to correctly handle perfect diamond matched
         // nodes.
         ShuffleBuilder.resetForSameNode();
-        ShuffleBuilder.add(*FrontTE, Mask);
         // Full matched entry found, no need to insert subvectors.
-        Res = ShuffleBuilder.finalize(E->getCommonMask(), {}, {});
+        if (equal(E->Scalars, FrontTE->Scalars) &&
+            equal(E->ReorderIndices, FrontTE->ReorderIndices) &&
+            equal(E->ReuseShuffleIndices, FrontTE->ReuseShuffleIndices)) {
+          Mask.resize(FrontTE->getVectorFactor());
+          std::iota(Mask.begin(), Mask.end(), 0);
+          ShuffleBuilder.add(*FrontTE, Mask);
+          Res = ShuffleBuilder.finalize({}, {}, {});
+        } else {
+          ShuffleBuilder.add(*FrontTE, Mask);
+          Res = ShuffleBuilder.finalize(E->getCommonMask(), {}, {});
+        }
         return Res;
       }
       if (!Resized) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/same-node-reused.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/same-node-reused.ll
index fb76cfcdcef5f..e007a0dae11f2 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/same-node-reused.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/same-node-reused.ll
@@ -6,21 +6,14 @@ define void @test(ptr %dest, ptr %p) {
 ; CHECK-SAME: ptr [[DEST:%.*]], ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[INC0:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
-; CHECK-NEXT:    [[INC1:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 2
 ; CHECK-NEXT:    [[INC2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 3
 ; CHECK-NEXT:    [[E1:%.*]] = load i16, ptr [[INC0]], align 2
 ; CHECK-NEXT:    [[E3:%.*]] = load i16, ptr [[INC2]], align 2
-; CHECK-NEXT:    [[INC5:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 2
-; CHECK-NEXT:    [[INC6:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = call <3 x i16> @llvm.masked.load.v3i16.p0(ptr align 4 [[P]], <3 x i1> <i1 true, i1 false, i1 true>, <3 x i16> poison)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[E2:%.*]] = load i16, ptr [[INC1]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i16> [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[A2:%.*]] = add i16 [[E2]], [[E2]]
-; CHECK-NEXT:    [[A3:%.*]] = add i16 [[E2]], [[E2]]
-; CHECK-NEXT:    store <2 x i16> [[TMP2]], ptr [[DEST]], align 4
-; CHECK-NEXT:    store i16 [[A2]], ptr [[INC5]], align 2
-; CHECK-NEXT:    store i16 [[A3]], ptr [[INC6]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    store <4 x i16> [[TMP3]], ptr [[DEST]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:


        


More information about the llvm-commits mailing list