[llvm] 154138c - [SLP]Do not pack div-like copyable values
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 05:19:55 PDT 2025
Author: Alexey Bataev
Date: 2025-10-20T05:19:42-07:00
New Revision: 154138c25f358ed812eafc2880225c3d88221e8a
URL: https://github.com/llvm/llvm-project/commit/154138c25f358ed812eafc2880225c3d88221e8a
DIFF: https://github.com/llvm/llvm-project/commit/154138c25f358ed812eafc2880225c3d88221e8a.diff
LOG: [SLP]Do not pack div-like copyable values
If a main instruction in the copyables is a div-like instruction, the
compiler cannot pack duplicates, extending with poisons, these
instructions, being vectorize, will result in undefined behavior.
Fixes #164185
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 048a3e691fe55..3f18bd70539a0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
PoisonValue::get(UniqueValues.front()->getType()));
// Check that extended with poisons/copyable operations are still valid
// for vectorization (div/rem are not allowed).
- if (!S.areInstructionsWithCopyableElements() &&
- !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+ if ((!S.areInstructionsWithCopyableElements() &&
+ !getSameOpcode(PaddedUniqueValues, TLI).valid()) ||
+ (S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() &&
+ (S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() ||
+ isa<CallInst>(S.getMainOp())))) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
ReuseShuffleIndices.clear();
return false;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
new file mode 100644
index 0000000000000..d16843c81144d
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define ptr @test(ptr %d) {
+; CHECK-LABEL: define ptr @test(
+; CHECK-SAME: ptr [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0
+; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0>
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <6 x i64> [[TMP9]], i32 2
+; CHECK-NEXT: [[SCEVGEP42:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <6 x i64> [[TMP9]], i32 3
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <6 x i64> [[TMP9]], i32 4
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <6 x i64> [[TMP9]], i32 5
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP19]]
+; CHECK-NEXT: ret ptr [[TMP20]]
+;
+entry:
+ %0 = load i8, ptr null, align 1
+ %cmp4.2 = icmp eq i8 %0, 0
+ %1 = select i1 %cmp4.2, i64 0, i64 0
+ %2 = shl i64 %1, 1
+ %3 = getelementptr i8, ptr %d, i64 %2
+ %4 = xor i64 0, 0
+ %5 = udiv i64 %4, 0
+ %6 = mul i64 %5, 6
+ %7 = getelementptr i8, ptr %d, i64 %6
+ %8 = shl i64 %1, 0
+ %scevgep42 = getelementptr i8, ptr %d, i64 %8
+ %9 = mul i64 %5, 1
+ %10 = getelementptr i8, ptr %d, i64 %9
+ %11 = udiv i64 1, 0
+ %12 = mul i64 %11, 1
+ %13 = getelementptr i8, ptr %d, i64 %12
+ %14 = mul i64 %11, 0
+ %15 = getelementptr i8, ptr %d, i64 %14
+ ret ptr %15
+}
More information about the llvm-commits
mailing list