[llvm] e6b0be3 - [SLP]Correctly calculate number of copyable operands

Sun Oct 19 12:19:40 PDT 2025

Author: Alexey Bataev
Date: 2025-10-19T12:14:39-07:00
New Revision: e6b0be376412bf0599f6e75aa5e67f95cd36b411

URL: https://github.com/llvm/llvm-project/commit/e6b0be376412bf0599f6e75aa5e67f95cd36b411
DIFF: https://github.com/llvm/llvm-project/commit/e6b0be376412bf0599f6e75aa5e67f95cd36b411.diff

LOG: [SLP]Correctly calculate number of copyable operands

The compiler shall not check for overflow of the number of copyable
operands counter, otherwise non-copyable operand can be counted as
copyable and lead to a compiler crash.

Fixes #164164

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9cd52da1ce1cd..048a3e691fe55 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5343,7 +5343,7 @@ class BoUpSLP {
             unsigned &OpCnt =
                 OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
             EdgeInfo EI(TE, U.getOperandNo());
-            if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps)
+            if (!getScheduleCopyableData(EI, Op))
               continue;
             // Found copyable operand - continue.
             ++OpCnt;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll b/llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll
new file mode 100644
index 0000000000000..06ad332dcb34a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-apple-macosx15.0.0  -mcpu=skylake-avx512 -S < %s | FileCheck %s
+
+define void @test(ptr %output) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ARRAYIDX_2_I:%.*]] = getelementptr i8, ptr [[OUTPUT]], i64 8
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[OUTPUT]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> <i32 -1, i32 0>, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> splat (i32 2))
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[ARRAYIDX_2_I]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx.2.i = getelementptr i8, ptr %output, i64 8
+  %0 = load i32, ptr %output, align 4
+  %arrayidx.3.i = getelementptr i8, ptr %output, i64 12
+  %1 = load i32, ptr %arrayidx.3.i, align 4
+  %xor7 = xor i32 -1, %0
+  %or.i = tail call i32 @llvm.fshl.i32(i32 %xor7, i32 0, i32 2)
+  %or.i11 = tail call i32 @llvm.fshl.i32(i32 %1, i32 %1, i32 2)
+  store i32 %or.i, ptr %arrayidx.2.i, align 4
+  store i32 %or.i11, ptr %arrayidx.3.i, align 4
+  ret void
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)