[llvm] [SLP]Better copyable vectorization for stores with non-instructions (PR #174249)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 14:53:37 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
If the stored values contain non-instructions, better to put them in the
tail of the values to allow the algorithm for copyable elements to
detect them.
Fixes #<!-- -->53238
---
Full diff: https://github.com/llvm/llvm-project/pull/174249.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+18-15)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll (+4-15)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5748d7ca752d2..9f7c3b9ef2d8a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -27338,21 +27338,24 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
V2->getValueOperand()->getType()->getScalarSizeInBits())
return false;
// UndefValues are compatible with all other values.
- if (auto *I1 = dyn_cast<Instruction>(V->getValueOperand()))
- if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
- DomTreeNodeBase<llvm::BasicBlock> *NodeI1 =
- DT->getNode(I1->getParent());
- DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
- DT->getNode(I2->getParent());
- assert(NodeI1 && "Should only process reachable instructions");
- assert(NodeI2 && "Should only process reachable instructions");
- assert((NodeI1 == NodeI2) ==
- (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
- "Different nodes should have different DFS numbers");
- if (NodeI1 != NodeI2)
- return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
- return I1->getOpcode() < I2->getOpcode();
- }
+ auto *I1 = dyn_cast<Instruction>(V->getValueOperand());
+ auto *I2 = dyn_cast<Instruction>(V2->getValueOperand());
+ if (I1 && I2) {
+ DomTreeNodeBase<llvm::BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
+ DomTreeNodeBase<llvm::BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
+ assert(NodeI1 && "Should only process reachable instructions");
+ assert(NodeI2 && "Should only process reachable instructions");
+ assert((NodeI1 == NodeI2) ==
+ (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeI1 != NodeI2)
+ return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
+ return I1->getOpcode() < I2->getOpcode();
+ }
+ if (I1 && !I2)
+ return true;
+ if (!I1 && I2)
+ return false;
return V->getValueOperand()->getValueID() <
V2->getValueOperand()->getValueID();
};
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll
index 659a3b07bb938..efdbdb2ea867e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll
@@ -5,21 +5,10 @@ define void @test(i32 %a, ptr %out) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: i32 [[A:%.*]], ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: store i32 [[A]], ptr [[OUT]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: [[ARRAYIDX9_1:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 4
-; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[ARRAYIDX9_1]], align 4
-; CHECK-NEXT: [[SHR_5:%.*]] = lshr i32 [[A]], 5
-; CHECK-NEXT: [[ARRAYIDX9_5:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 20
-; CHECK-NEXT: store i32 [[SHR_5]], ptr [[ARRAYIDX9_5]], align 4
-; CHECK-NEXT: [[SHR_6:%.*]] = lshr i32 [[A]], 6
-; CHECK-NEXT: [[ARRAYIDX9_6:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 24
-; CHECK-NEXT: store i32 [[SHR_6]], ptr [[ARRAYIDX9_6]], align 4
-; CHECK-NEXT: [[SHR_7:%.*]] = lshr i32 [[A]], 7
-; CHECK-NEXT: [[ARRAYIDX9_7:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 28
-; CHECK-NEXT: store i32 [[SHR_7]], ptr [[ARRAYIDX9_7]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[TMP1]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[OUT]], align 4
; CHECK-NEXT: ret void
;
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/174249
More information about the llvm-commits
mailing list