[llvm] 1bfc610 - [SLP]Fix spill cost analysis for split vectorized nodes

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 12:46:04 PDT 2025


Author: Alexey Bataev
Date: 2025-03-28T12:45:53-07:00
New Revision: 1bfc61064ab46e44865453d2c2176f5e7f96987c

URL: https://github.com/llvm/llvm-project/commit/1bfc61064ab46e44865453d2c2176f5e7f96987c
DIFF: https://github.com/llvm/llvm-project/commit/1bfc61064ab46e44865453d2c2176f5e7f96987c.diff

LOG: [SLP]Fix spill cost analysis for split vectorized nodes

If the entry is SplitVectorize, it can be skipped in favor of its
operands, operands allow correctly detect spill costs.

Fixes #133288

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f29fb6780253b..850895895d44d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13347,7 +13347,8 @@ InstructionCost BoUpSLP::getSpillCost() {
     for (const TreeEntry *Op : Operands) {
       if (!Op->isGather())
         LiveEntries.push_back(Op);
-      if ((Entry->getOpcode() != Instruction::PHI && Op->isGather()) ||
+      if (Entry->State == TreeEntry::SplitVectorize ||
+          (Entry->getOpcode() != Instruction::PHI && Op->isGather()) ||
           (Op->isGather() && allConstant(Op->Scalars)))
         continue;
       Budget = 0;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll
new file mode 100644
index 0000000000000..5491e8ea7e0f8
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 -mattr=+sse4.1 < %s | FileCheck %s
+
+define void @test(i32 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(3) null, align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr addrspace(3) null, align 4
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr addrspace(3) null, align 4
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i32, ptr addrspace(3) null, align 4
+; CHECK-NEXT:    br label %[[BB4:.*]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    switch i32 0, label %[[BB8:.*]] [
+; CHECK-NEXT:      i32 0, label %[[BB7:.*]]
+; CHECK-NEXT:      i32 1, label %[[BB21:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[BB5:.*:]]
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB6:.*]]:
+; CHECK-NEXT:    br label %[[BB12:.*]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB4]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[LOAD]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LOAD1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LOAD3]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[LOAD2]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP5]], <4 x i32> [[TMP4]], i64 4)
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi <8 x i32> [ [[TMP6]], %[[BB8]] ], [ poison, %[[BB6]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %load = load i32, ptr addrspace(3) null, align 4
+  %load1 = load i32, ptr addrspace(3) null, align 4
+  %load2 = load i32, ptr addrspace(3) null, align 4
+  %load3 = load i32, ptr addrspace(3) null, align 4
+  br label %bb4
+
+bb4:
+  switch i32 0, label %bb8 [
+  i32 0, label %bb7
+  i32 1, label %bb21
+  ]
+
+bb5:
+  %srem = srem i32 0, 0
+  br label %bb21
+
+bb6:
+  br label %bb12
+
+bb7:
+  ret void
+
+bb8:
+  %phi = phi i32 [ 0, %bb4 ]
+  %phi9 = phi i32 [ 0, %bb4 ]
+  %phi10 = phi i32 [ 0, %bb4 ]
+  %phi11 = phi i32 [ 0, %bb4 ]
+  br label %bb12
+
+bb12:
+  %phi13 = phi i32 [ %load, %bb8 ], [ 0, %bb6 ]
+  %phi14 = phi i32 [ %load1, %bb8 ], [ 0, %bb6 ]
+  %phi15 = phi i32 [ %load2, %bb8 ], [ %arg, %bb6 ]
+  %phi16 = phi i32 [ %load3, %bb8 ], [ 0, %bb6 ]
+  %phi17 = phi i32 [ %phi, %bb8 ], [ %srem, %bb6 ]
+  %phi18 = phi i32 [ %phi11, %bb8 ], [ 0, %bb6 ]
+  %phi19 = phi i32 [ %phi9, %bb8 ], [ 0, %bb6 ]
+  %phi20 = phi i32 [ %phi10, %bb8 ], [ 0, %bb6 ]
+  ret void
+
+bb21:
+  ret void
+}


        


More information about the llvm-commits mailing list