[llvm] 154138c - [SLP]Do not pack div-like copyable values

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 20 05:19:55 PDT 2025


Author: Alexey Bataev
Date: 2025-10-20T05:19:42-07:00
New Revision: 154138c25f358ed812eafc2880225c3d88221e8a

URL: https://github.com/llvm/llvm-project/commit/154138c25f358ed812eafc2880225c3d88221e8a
DIFF: https://github.com/llvm/llvm-project/commit/154138c25f358ed812eafc2880225c3d88221e8a.diff

LOG: [SLP]Do not pack div-like copyable values

If a main instruction in the copyables is a div-like instruction, the
compiler cannot pack duplicates, extending with poisons, these
instructions, being vectorize, will result in undefined behavior.

Fixes #164185

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 048a3e691fe55..3f18bd70539a0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
             PoisonValue::get(UniqueValues.front()->getType()));
         // Check that extended with poisons/copyable operations are still valid
         // for vectorization (div/rem are not allowed).
-        if (!S.areInstructionsWithCopyableElements() &&
-            !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+        if ((!S.areInstructionsWithCopyableElements() &&
+             !getSameOpcode(PaddedUniqueValues, TLI).valid()) ||
+            (S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() &&
+             (S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() ||
+              isa<CallInst>(S.getMainOp())))) {
           LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
           ReuseShuffleIndices.clear();
           return false;

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
new file mode 100644
index 0000000000000..d16843c81144d
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define ptr @test(ptr %d) {
+; CHECK-LABEL: define ptr @test(
+; CHECK-SAME: ptr [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT:    [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 0, 0
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = udiv i64 1, 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <6 x i64> [[TMP9]], i32 2
+; CHECK-NEXT:    [[SCEVGEP42:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <6 x i64> [[TMP9]], i32 3
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <6 x i64> [[TMP9]], i32 4
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <6 x i64> [[TMP9]], i32 5
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP19]]
+; CHECK-NEXT:    ret ptr [[TMP20]]
+;
+entry:
+  %0 = load i8, ptr null, align 1
+  %cmp4.2 = icmp eq i8 %0, 0
+  %1 = select i1 %cmp4.2, i64 0, i64 0
+  %2 = shl i64 %1, 1
+  %3 = getelementptr i8, ptr %d, i64 %2
+  %4 = xor i64 0, 0
+  %5 = udiv i64 %4, 0
+  %6 = mul i64 %5, 6
+  %7 = getelementptr i8, ptr %d, i64 %6
+  %8 = shl i64 %1, 0
+  %scevgep42 = getelementptr i8, ptr %d, i64 %8
+  %9 = mul i64 %5, 1
+  %10 = getelementptr i8, ptr %d, i64 %9
+  %11 = udiv i64 1, 0
+  %12 = mul i64 %11, 1
+  %13 = getelementptr i8, ptr %d, i64 %12
+  %14 = mul i64 %11, 0
+  %15 = getelementptr i8, ptr %d, i64 %14
+  ret ptr %15
+}


        


More information about the llvm-commits mailing list