[llvm] 8c41859 - [SLP]Clear the operands deps of non-schedulable nodes, if previously all operands were copyable
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 12:11:42 PDT 2025
Author: Alexey Bataev
Date: 2025-09-18T12:11:33-07:00
New Revision: 8c41859a21a4d0cfda164cc58f4a5336dbcd30d1
URL: https://github.com/llvm/llvm-project/commit/8c41859a21a4d0cfda164cc58f4a5336dbcd30d1
DIFF: https://github.com/llvm/llvm-project/commit/8c41859a21a4d0cfda164cc58f4a5336dbcd30d1.diff
LOG: [SLP]Clear the operands deps of non-schedulable nodes, if previously all operands were copyable
If all operands of the non-schedulable nodes were previously only
copyables, need to clear the dependencies of the original schedule data
for such copyable operands and recalculate them to correctly handle
number of dependecies.
Fixes #159406
Added:
llvm/test/Transforms/SLPVectorizer/X86/non-sched-inst-has-copyable-before.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0c94a1d593ce0..6ac9018df641e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20804,12 +20804,45 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const EdgeInfo &EI) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
- bool HasCopyables = S.areInstructionsWithCopyableElements();
if (isa<PHINode>(S.getMainOp()) ||
- isVectorLikeInstWithConstOps(S.getMainOp()) ||
- (!HasCopyables && doesNotNeedToSchedule(VL)) ||
- all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
+ isVectorLikeInstWithConstOps(S.getMainOp()))
+ return nullptr;
+ bool HasCopyables = S.areInstructionsWithCopyableElements();
+ if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
+ all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
+ // If all operands were replaced by copyables, the operands of this node
+ // might be not, so need to recalculate dependencies for schedule data,
+ // replaced by copyable schedule data.
+ SmallVector<ScheduleData *> ControlDependentMembers;
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || (HasCopyables && S.isCopyableElement(V)))
+ continue;
+ SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
+ for (const Use &U : I->operands()) {
+ unsigned &NumOps =
+ UserOpToNumOps.try_emplace(std::make_pair(I, U.get()), 0)
+ .first->getSecond();
+ ++NumOps;
+ if (auto *Op = dyn_cast<Instruction>(U.get());
+ Op && areAllOperandsReplacedByCopyableData(I, Op, *SLP, NumOps)) {
+ if (ScheduleData *OpSD = getScheduleData(Op);
+ OpSD && OpSD->hasValidDependencies()) {
+ OpSD->clearDirectDependencies();
+ if (RegionHasStackSave ||
+ !isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
+ ControlDependentMembers.push_back(OpSD);
+ }
+ }
+ }
+ }
+ if (!ControlDependentMembers.empty()) {
+ ScheduleBundle Invalid = ScheduleBundle::invalid();
+ calculateDependencies(Invalid, /*InsertInReadyList=*/true, SLP,
+ ControlDependentMembers);
+ }
return nullptr;
+ }
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-sched-inst-has-copyable-before.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-sched-inst-has-copyable-before.ll
new file mode 100644
index 0000000000000..fe389ee78071a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-sched-inst-has-copyable-before.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-cros-linux-gnu < %s | FileCheck %s
+
+%struct.fe = type { [5 x i64] }
+
+define i32 @test(i64 %0, i128 %1, i1 %2) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i64 [[TMP0:%.*]], i128 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_FE:%.*]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 24
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 32
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 8
+; CHECK-NEXT: br label %[[BB9:.*]]
+; CHECK: [[BB9]]:
+; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ undef, [[TMP3:%.*]] ], [ [[TMP29:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP26:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP23:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP20:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP17:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[DOTSROA_14_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP52:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[DOTSROA_11_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP50:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[DOTSROA_8_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP57:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[DOTSROA_4_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP56:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[DOTSROA_0_0:%.*]] = phi i64 [ undef, [[TMP3]] ], [ [[TMP54:%.*]], %[[BB9]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[DOTSROA_0_0]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], [[TMP0]]
+; CHECK-NEXT: [[TMP17]] = xor i64 [[TMP16]], 1
+; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[DOTSROA_4_0]], [[TMP13]]
+; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], [[TMP0]]
+; CHECK-NEXT: [[TMP20]] = xor i64 [[TMP19]], 1
+; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[DOTSROA_8_0]], [[TMP12]]
+; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], [[TMP0]]
+; CHECK-NEXT: [[TMP23]] = xor i64 [[TMP22]], 1
+; CHECK-NEXT: store i64 [[TMP23]], ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[DOTSROA_11_0]], [[TMP11]]
+; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], [[TMP0]]
+; CHECK-NEXT: [[TMP26]] = xor i64 [[TMP25]], 1
+; CHECK-NEXT: store i64 [[TMP26]], ptr [[TMP6]], align 8
+; CHECK-NEXT: [[TMP27:%.*]] = xor i64 [[DOTSROA_14_0]], [[TMP10]]
+; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], [[TMP0]]
+; CHECK-NEXT: [[TMP29]] = xor i64 [[TMP28]], 1
+; CHECK-NEXT: store i64 [[TMP29]], ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr null, align 4294967296
+; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP19]], 1
+; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP16]], 1
+; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP17]], 1
+; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP29]], 19
+; CHECK-NEXT: [[TMP35:%.*]] = zext i64 [[TMP34]] to i128
+; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[TMP26]], 19
+; CHECK-NEXT: [[TMP37:%.*]] = zext i64 [[TMP36]] to i128
+; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP23]], 19
+; CHECK-NEXT: [[TMP39:%.*]] = zext i64 [[TMP38]] to i128
+; CHECK-NEXT: [[TMP40:%.*]] = mul nuw nsw i128 [[TMP39]], 24
+; CHECK-NEXT: [[TMP41:%.*]] = zext i64 [[TMP32]] to i128
+; CHECK-NEXT: [[TMP42:%.*]] = mul nuw i128 [[TMP37]], [[TMP41]]
+; CHECK-NEXT: [[TMP43:%.*]] = zext i64 [[TMP31]] to i128
+; CHECK-NEXT: [[TMP44:%.*]] = mul nuw i128 [[TMP35]], [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = zext i64 [[TMP33]] to i128
+; CHECK-NEXT: [[TMP46:%.*]] = mul i128 [[TMP1]], [[TMP45]]
+; CHECK-NEXT: [[TMP47:%.*]] = add i128 [[TMP40]], [[TMP46]]
+; CHECK-NEXT: [[TMP48:%.*]] = add i128 [[TMP47]], [[TMP42]]
+; CHECK-NEXT: [[TMP49:%.*]] = add i128 [[TMP48]], [[TMP44]]
+; CHECK-NEXT: [[TMP50]] = and i64 [[TMP29]], 1
+; CHECK-NEXT: [[TMP51:%.*]] = trunc i128 [[TMP49]] to i64
+; CHECK-NEXT: [[TMP52]] = and i64 [[TMP30]], 1
+; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[TMP51]], 1
+; CHECK-NEXT: [[TMP54]] = and i64 [[TMP53]], 1
+; CHECK-NEXT: [[TMP55:%.*]] = lshr i64 [[TMP53]], 1
+; CHECK-NEXT: [[TMP56]] = and i64 [[TMP19]], 1
+; CHECK-NEXT: [[TMP57]] = or i64 [[TMP55]], 1
+; CHECK-NEXT: br i1 [[TMP2]], label %[[BB58:.*]], label %[[BB9]]
+; CHECK: [[BB58]]:
+; CHECK-NEXT: call void @g(ptr nonnull [[TMP4]])
+; CHECK-NEXT: ret i32 0
+;
+ %4 = alloca %struct.fe, align 8
+ %5 = getelementptr inbounds nuw i8, ptr %4, i64 16
+ %6 = getelementptr inbounds nuw i8, ptr %4, i64 24
+ %7 = getelementptr inbounds nuw i8, ptr %4, i64 32
+ %8 = getelementptr inbounds nuw i8, ptr %4, i64 8
+ br label %9
+
+9:
+ %10 = phi i64 [ undef, %3 ], [ %29, %9 ]
+ %11 = phi i64 [ undef, %3 ], [ %26, %9 ]
+ %12 = phi i64 [ undef, %3 ], [ %23, %9 ]
+ %13 = phi i64 [ undef, %3 ], [ %20, %9 ]
+ %14 = phi i64 [ undef, %3 ], [ %17, %9 ]
+ %.sroa.14.0 = phi i64 [ undef, %3 ], [ %52, %9 ]
+ %.sroa.11.0 = phi i64 [ undef, %3 ], [ %50, %9 ]
+ %.sroa.8.0 = phi i64 [ undef, %3 ], [ %57, %9 ]
+ %.sroa.4.0 = phi i64 [ undef, %3 ], [ %56, %9 ]
+ %.sroa.0.0 = phi i64 [ undef, %3 ], [ %54, %9 ]
+ %15 = xor i64 %.sroa.0.0, %14
+ %16 = and i64 %15, %0
+ %17 = xor i64 %16, 1
+ store i64 %17, ptr %4, align 8
+ %18 = xor i64 %.sroa.4.0, %13
+ %19 = and i64 %18, %0
+ %20 = xor i64 %19, 1
+ store i64 %20, ptr %8, align 8
+ %21 = xor i64 %.sroa.8.0, %12
+ %22 = and i64 %21, %0
+ %23 = xor i64 %22, 1
+ store i64 %23, ptr %5, align 8
+ %24 = xor i64 %.sroa.11.0, %11
+ %25 = and i64 %24, %0
+ %26 = xor i64 %25, 1
+ store i64 %26, ptr %6, align 8
+ %27 = xor i64 %.sroa.14.0, %10
+ %28 = and i64 %27, %0
+ %29 = xor i64 %28, 1
+ store i64 %29, ptr %7, align 8
+ %30 = load i64, ptr null, align 4294967296
+ %31 = or i64 %19, 1
+ %32 = or i64 %16, 1
+ %33 = add i64 %17, 1
+ %34 = mul i64 %29, 19
+ %35 = zext i64 %34 to i128
+ %36 = mul i64 %26, 19
+ %37 = zext i64 %36 to i128
+ %38 = mul i64 %23, 19
+ %39 = zext i64 %38 to i128
+ %40 = mul nuw nsw i128 %39, 24
+ %41 = zext i64 %32 to i128
+ %42 = mul nuw i128 %37, %41
+ %43 = zext i64 %31 to i128
+ %44 = mul nuw i128 %35, %43
+ %45 = zext i64 %33 to i128
+ %46 = mul i128 %1, %45
+ %47 = add i128 %40, %46
+ %48 = add i128 %47, %42
+ %49 = add i128 %48, %44
+ %50 = and i64 %29, 1
+ %51 = trunc i128 %49 to i64
+ %52 = and i64 %30, 1
+ %53 = add i64 %51, 1
+ %54 = and i64 %53, 1
+ %55 = lshr i64 %53, 1
+ %56 = and i64 %19, 1
+ %57 = or i64 %55, 1
+ br i1 %2, label %58, label %9
+
+58:
+ call void @g(ptr nonnull %4)
+ ret i32 0
+}
+
+declare void @g(ptr)
+
More information about the llvm-commits
mailing list