[llvm] [SLP]Do not consider non-profitable loads slices (PR #149209)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 04:59:45 PDT 2025
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/149209
>From 821def9085d5374e2674329ea97fbfd3afa6dfa9 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Wed, 16 Jul 2025 22:45:19 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 6 +++
.../X86/matched-nodes-updated.ll | 44 ++++++++-----------
2 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index da6af353c709f..9446e16532115 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11693,6 +11693,7 @@ void BoUpSLP::transformNodes() {
if (StartIdx + VF > End)
continue;
SmallVector<std::pair<unsigned, unsigned>> Slices;
+ bool AllStrided = true;
for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) {
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
// If any instruction is vectorized already - do not try again.
@@ -11743,6 +11744,9 @@ void BoUpSLP::transformNodes() {
SmallVector<Value *> PointerOps;
LoadsState Res =
canVectorizeLoads(Slice, Slice.front(), Order, PointerOps);
+ AllStrided &= Res == LoadsState::StridedVectorize ||
+ Res == LoadsState::ScatterVectorize ||
+ Res == LoadsState::Gather;
// Do not vectorize gathers.
if (Res == LoadsState::ScatterVectorize ||
Res == LoadsState::Gather) {
@@ -11772,6 +11776,8 @@ void BoUpSLP::transformNodes() {
}
Slices.emplace_back(Cnt, Slice.size());
}
+ if (VF == 2 && AllStrided && Slices.size() > 2)
+ continue;
auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt, unsigned Sz) {
E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt);
if (StartIdx == Cnt)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
index f56af934f19f5..b1864b43512d8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
@@ -14,50 +14,44 @@ define i32 @test(i32 %s.0) {
; CHECK: [[IF_END3:.*]]:
; CHECK-NEXT: br label %[[IF_END6:.*]]
; CHECK: [[IF_END6]]:
-; CHECK-NEXT: [[J_4:%.*]] = phi i32 [ 0, %[[IF_END3]] ], [ [[TMP28:%.*]], %[[O]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP22:%.*]], %[[O]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP24:%.*]], %[[O]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP29:%.*]], %[[O]] ]
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
-; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[TMP22:%.*]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 1, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP30]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; CHECK-NEXT: br i1 false, label %[[IF_END24:.*]], label %[[IF_THEN11:.*]]
; CHECK: [[IF_THEN11]]:
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison>
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> poison, i32 [[J_4]], i32 0
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP15]], <8 x i32> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>
-; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 4, i32 5, i32 6, i32 14>
; CHECK-NEXT: br label %[[IF_END24]]
; CHECK: [[IF_THEN18:.*]]:
; CHECK-NEXT: br label %[[T]]
; CHECK: [[T]]:
-; CHECK-NEXT: [[TMP34:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
; CHECK: [[IF_END24]]:
-; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP34]], %[[T]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
+; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP12]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP13]], %[[T]] ]
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 6, i32 1>
+; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 7>
; CHECK-NEXT: br label %[[O]]
; CHECK: [[O]]:
-; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP23]] = phi <4 x i32> [ [[TMP1]], %[[K]] ], [ [[TMP20]], %[[IF_END24]] ]
-; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ]
+; CHECK-NEXT: [[TMP24]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP34]], %[[IF_END24]] ]
+; CHECK-NEXT: [[TMP22]] = extractelement <2 x i32> [[TMP24]], i32 1
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> <i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP33]] = shufflevector <8 x i32> [[TMP26]], <8 x i32> [[TMP32]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: [[TMP28]] = extractelement <4 x i32> [[TMP24]], i32 3
+; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <2 x i32> [[TMP24]], <2 x i32> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> poison, <2 x i32> <i32 poison, i32 3>
+; CHECK-NEXT: [[TMP29]] = shufflevector <2 x i32> [[TMP35]], <2 x i32> [[TMP28]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br i1 false, label %[[T]], label %[[IF_END6]]
;
entry:
>From c911b47b2538ae6ec3269425876198dcc982c0dd Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Thu, 17 Jul 2025 11:59:36 +0000
Subject: [PATCH 2/2] address comments
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9446e16532115..6ad5c60105a28 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11776,6 +11776,9 @@ void BoUpSLP::transformNodes() {
}
Slices.emplace_back(Cnt, Slice.size());
}
+ // Do not try to vectorize if all slides are strided or gathered with
+ // vector factor 2 and there are more than 2 slices. Better to handle
+ // them in gathered loads analysis, may result in better vectorization.
if (VF == 2 && AllStrided && Slices.size() > 2)
continue;
auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt, unsigned Sz) {
More information about the llvm-commits
mailing list