[llvm] [SLP]Better copyable vectorization for stores with non-instructions (PR #174249)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 2 14:53:37 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

<details>
<summary>Changes</summary>

If the stored values contain non-instructions, better to put them in the
tail of the values to allow the algorithm for copyable elements to
detect them.

Fixes #<!-- -->53238


---
Full diff: https://github.com/llvm/llvm-project/pull/174249.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+18-15) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll (+4-15) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5748d7ca752d2..9f7c3b9ef2d8a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -27338,21 +27338,24 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
         V2->getValueOperand()->getType()->getScalarSizeInBits())
       return false;
     // UndefValues are compatible with all other values.
-    if (auto *I1 = dyn_cast<Instruction>(V->getValueOperand()))
-      if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
-        DomTreeNodeBase<llvm::BasicBlock> *NodeI1 =
-            DT->getNode(I1->getParent());
-        DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
-            DT->getNode(I2->getParent());
-        assert(NodeI1 && "Should only process reachable instructions");
-        assert(NodeI2 && "Should only process reachable instructions");
-        assert((NodeI1 == NodeI2) ==
-                   (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
-               "Different nodes should have different DFS numbers");
-        if (NodeI1 != NodeI2)
-          return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
-        return I1->getOpcode() < I2->getOpcode();
-      }
+    auto *I1 = dyn_cast<Instruction>(V->getValueOperand());
+    auto *I2 = dyn_cast<Instruction>(V2->getValueOperand());
+    if (I1 && I2) {
+      DomTreeNodeBase<llvm::BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
+      DomTreeNodeBase<llvm::BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
+      assert(NodeI1 && "Should only process reachable instructions");
+      assert(NodeI2 && "Should only process reachable instructions");
+      assert((NodeI1 == NodeI2) ==
+                 (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
+             "Different nodes should have different DFS numbers");
+      if (NodeI1 != NodeI2)
+        return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
+      return I1->getOpcode() < I2->getOpcode();
+    }
+    if (I1 && !I2)
+      return true;
+    if (!I1 && I2)
+      return false;
     return V->getValueOperand()->getValueID() <
            V2->getValueOperand()->getValueID();
   };
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll
index 659a3b07bb938..efdbdb2ea867e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyable-non-inst-in-stores.ll
@@ -5,21 +5,10 @@ define void @test(i32 %a, ptr %out) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: i32 [[A:%.*]], ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    store i32 [[A]], ptr [[OUT]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    [[ARRAYIDX9_1:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 4
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[ARRAYIDX9_1]], align 4
-; CHECK-NEXT:    [[SHR_5:%.*]] = lshr i32 [[A]], 5
-; CHECK-NEXT:    [[ARRAYIDX9_5:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 20
-; CHECK-NEXT:    store i32 [[SHR_5]], ptr [[ARRAYIDX9_5]], align 4
-; CHECK-NEXT:    [[SHR_6:%.*]] = lshr i32 [[A]], 6
-; CHECK-NEXT:    [[ARRAYIDX9_6:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 24
-; CHECK-NEXT:    store i32 [[SHR_6]], ptr [[ARRAYIDX9_6]], align 4
-; CHECK-NEXT:    [[SHR_7:%.*]] = lshr i32 [[A]], 7
-; CHECK-NEXT:    [[ARRAYIDX9_7:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i64 28
-; CHECK-NEXT:    store i32 [[SHR_7]], ptr [[ARRAYIDX9_7]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i32> [[TMP1]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[OUT]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:

``````````

</details>


https://github.com/llvm/llvm-project/pull/174249


More information about the llvm-commits mailing list