[llvm] [SLP]Better support for copyable values in stores (PR #153213)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 1 13:08:14 PDT 2025
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/153213
>From f81929e548a1810d4beb29676f660cf582dcc300 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 12 Aug 2025 15:33:04 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 45 +++++++----
.../SLPVectorizer/X86/no_alternate_divrem.ll | 27 ++++---
.../X86/vect_copyable_in_binops.ll | 77 ++++---------------
3 files changed, 58 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3045eeb3eb48e..b605cddc9258b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -25457,7 +25457,7 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
template <typename T>
static bool tryToVectorizeSequence(
SmallVectorImpl<T *> &Incoming, function_ref<bool(T *, T *)> Comparator,
- function_ref<bool(T *, T *)> AreCompatible,
+ function_ref<bool(ArrayRef<T *>, T *)> AreCompatible,
function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
bool MaxVFOnly, BoUpSLP &R) {
bool Changed = false;
@@ -25479,10 +25479,10 @@ static bool tryToVectorizeSequence(
auto *SameTypeIt = IncIt;
while (SameTypeIt != E && (!isa<Instruction>(*SameTypeIt) ||
R.isDeleted(cast<Instruction>(*SameTypeIt)) ||
- AreCompatible(*SameTypeIt, *IncIt))) {
+ AreCompatible(VL, *SameTypeIt))) {
auto *I = dyn_cast<Instruction>(*SameTypeIt);
++SameTypeIt;
- if (I && !R.isDeleted(I))
+ if (!I || !R.isDeleted(I))
VL.push_back(cast<T>(I));
}
@@ -25677,10 +25677,10 @@ bool SLPVectorizerPass::vectorizeCmpInsts(iterator_range<ItT> CmpInsts,
return compareCmp<false>(V, V2, *TLI, *DT);
};
- auto AreCompatibleCompares = [&](Value *V1, Value *V2) {
- if (V1 == V2)
+ auto AreCompatibleCompares = [&](ArrayRef<Value *> VL, Value *V1) {
+ if (VL.empty() || VL.back() == V1)
return true;
- return compareCmp<true>(V1, V2, *TLI, *DT);
+ return compareCmp<true>(V1, VL.back(), *TLI, *DT);
};
SmallVector<Value *> Vals;
@@ -25886,9 +25886,11 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
return false;
};
- auto AreCompatiblePHIs = [&PHIToOpcodes, this, &R](Value *V1, Value *V2) {
- if (V1 == V2)
+ auto AreCompatiblePHIs = [&PHIToOpcodes, this, &R](ArrayRef<Value *> VL,
+ Value *V1) {
+ if (VL.empty() || V1 == VL.back())
return true;
+ Value *V2 = VL.back();
if (V1->getType() != V2->getType())
return false;
ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
@@ -26259,7 +26261,11 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
V2->getValueOperand()->getValueID();
};
- auto &&AreCompatibleStores = [this](StoreInst *V1, StoreInst *V2) {
+ auto AreCompatibleStores = [this, &R](ArrayRef<StoreInst *> VL,
+ StoreInst *V1) {
+ if (VL.empty())
+ return true;
+ StoreInst *V2 = VL.back();
if (V1 == V2)
return true;
if (V1->getValueOperand()->getType() != V2->getValueOperand()->getType())
@@ -26270,15 +26276,24 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
if (isa<UndefValue>(V1->getValueOperand()) ||
isa<UndefValue>(V2->getValueOperand()))
return true;
- if (auto *I1 = dyn_cast<Instruction>(V1->getValueOperand()))
- if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
- if (I1->getParent() != I2->getParent())
- return false;
- return getSameOpcode({I1, I2}, *TLI).valid();
- }
if (isa<Constant>(V1->getValueOperand()) &&
isa<Constant>(V2->getValueOperand()))
return true;
+ auto *I1 = dyn_cast<Instruction>(V1->getValueOperand());
+ auto *I2 = dyn_cast<Instruction>(V2->getValueOperand());
+ if (I1 || I2) {
+ if (I1 && I2 && I1->getParent() != I2->getParent())
+ return false;
+ SmallVector<Value *> NewVL(VL.size() + 1);
+ for (auto [SI, V] : zip(VL, NewVL))
+ V = SI->getValueOperand();
+ NewVL.back() = V1->getValueOperand();
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S =
+ Analysis.buildInstructionsState(NewVL, R, VectorizeCopyableElements);
+ if (S)
+ return true;
+ }
return V1->getValueOperand()->getValueID() ==
V2->getValueOperand()->getValueID();
};
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index 89051c7aba42c..ed0bd3f38ead7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -4,24 +4,23 @@
define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_sdiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
-; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A1:%.*]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1146, i32 146>
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
+; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]]
-; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[ARR2]], align 4
-; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4
-; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 4f3d551e21122..75aec45d3e2fc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -33,32 +33,12 @@ entry:
}
define void @add1(ptr noalias %dst, ptr noalias %src) {
-; NON-POW2-LABEL: @add1(
-; NON-POW2-NEXT: entry:
-; NON-POW2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
-; NON-POW2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; NON-POW2-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
-; NON-POW2-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
-; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[INCDEC_PTR]], align 4
-; NON-POW2-NEXT: [[TMP2:%.*]] = add nsw <3 x i32> [[TMP1]], <i32 1, i32 2, i32 3>
-; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
-; NON-POW2-NEXT: ret void
-;
-; POW2-ONLY-LABEL: @add1(
-; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
-; POW2-ONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; POW2-ONLY-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
-; POW2-ONLY-NEXT: store i32 [[TMP0]], ptr [[DST]], align 4
-; POW2-ONLY-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
-; POW2-ONLY-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
-; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
-; POW2-ONLY-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 2>
-; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[INCDEC_PTR1]], align 4
-; POW2-ONLY-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
-; POW2-ONLY-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP3]], 3
-; POW2-ONLY-NEXT: store i32 [[ADD9]], ptr [[INCDEC_PTR7]], align 4
-; POW2-ONLY-NEXT: ret void
+; CHECK-LABEL: @add1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP0]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
+; CHECK-NEXT: ret void
;
entry:
%incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
@@ -84,18 +64,9 @@ entry:
define void @sub0(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @sub0(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
-; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
-; CHECK-NEXT: store i32 [[SUB]], ptr [[DST]], align 4
-; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
-; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
-; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
-; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR3]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 0, i32 -2, i32 -3>
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -182,18 +153,9 @@ entry:
define void @addsub0(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub0(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
-; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
-; CHECK-NEXT: store i32 [[SUB]], ptr [[DST]], align 4
-; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
-; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
-; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 3>
-; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 0, i32 -2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -220,18 +182,9 @@ entry:
define void @addsub1(ptr noalias %dst, ptr noalias %src) {
; CHECK-LABEL: @addsub1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
-; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 1>
-; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4
-; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
-; CHECK-NEXT: store i32 [[TMP4]], ptr [[INCDEC_PTR3]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
-; CHECK-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP5]], -3
-; CHECK-NEXT: store i32 [[SUB8]], ptr [[INCDEC_PTR6]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], <i32 -1, i32 1, i32 0, i32 3>
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
>From f24ae708fcecd04027fae5d3125ffdb1886971b5 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 12 Aug 2025 16:07:15 +0000
Subject: [PATCH 2/2] Address comments
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b605cddc9258b..fcc926f8de238 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -25482,7 +25482,7 @@ static bool tryToVectorizeSequence(
AreCompatible(VL, *SameTypeIt))) {
auto *I = dyn_cast<Instruction>(*SameTypeIt);
++SameTypeIt;
- if (!I || !R.isDeleted(I))
+ if (I && !R.isDeleted(I))
VL.push_back(cast<T>(I));
}
More information about the llvm-commits
mailing list