[llvm] e6b0be3 - [SLP]Correctly calculate number of copyable operands
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 19 12:19:40 PDT 2025
Author: Alexey Bataev
Date: 2025-10-19T12:14:39-07:00
New Revision: e6b0be376412bf0599f6e75aa5e67f95cd36b411
URL: https://github.com/llvm/llvm-project/commit/e6b0be376412bf0599f6e75aa5e67f95cd36b411
DIFF: https://github.com/llvm/llvm-project/commit/e6b0be376412bf0599f6e75aa5e67f95cd36b411.diff
LOG: [SLP]Correctly calculate number of copyable operands
The compiler shall not check for overflow of the number of copyable
operands counter, otherwise non-copyable operand can be counted as
copyable and lead to a compiler crash.
Fixes #164164
Added:
llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9cd52da1ce1cd..048a3e691fe55 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5343,7 +5343,7 @@ class BoUpSLP {
unsigned &OpCnt =
OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
EdgeInfo EI(TE, U.getOperandNo());
- if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps)
+ if (!getScheduleCopyableData(EI, Op))
continue;
// Found copyable operand - continue.
++OpCnt;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll b/llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll
new file mode 100644
index 0000000000000..06ad332dcb34a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/num-uses-for-copyable-elements.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-apple-macosx15.0.0 -mcpu=skylake-avx512 -S < %s | FileCheck %s
+
+define void @test(ptr %output) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARRAYIDX_2_I:%.*]] = getelementptr i8, ptr [[OUTPUT]], i64 8
+; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[OUTPUT]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> poison)
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> <i32 -1, i32 0>, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> splat (i32 2))
+; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[ARRAYIDX_2_I]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %arrayidx.2.i = getelementptr i8, ptr %output, i64 8
+ %0 = load i32, ptr %output, align 4
+ %arrayidx.3.i = getelementptr i8, ptr %output, i64 12
+ %1 = load i32, ptr %arrayidx.3.i, align 4
+ %xor7 = xor i32 -1, %0
+ %or.i = tail call i32 @llvm.fshl.i32(i32 %xor7, i32 0, i32 2)
+ %or.i11 = tail call i32 @llvm.fshl.i32(i32 %1, i32 %1, i32 2)
+ store i32 %or.i, ptr %arrayidx.2.i, align 4
+ store i32 %or.i11, ptr %arrayidx.3.i, align 4
+ ret void
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
More information about the llvm-commits
mailing list