[llvm] [SLP]Fix/improve getSpillCost analysis (PR #129258)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 09:53:30 PST 2025
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/129258
>From e0dfe4c41a30a9f36c768f0f2523425a55c5d7f1 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 28 Feb 2025 15:11:22 +0000
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 272 ++++++++++++------
.../SLPVectorizer/RISCV/math-function.ll | 16 +-
.../SLPVectorizer/RISCV/spillcost.ll | 10 +-
3 files changed, 201 insertions(+), 97 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2d343ca31f07b..6c48ae7d645ab 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12448,109 +12448,201 @@ InstructionCost BoUpSLP::getSpillCost() {
// (for example, if spills and fills are required).
InstructionCost Cost = 0;
- SmallPtrSet<const TreeEntry *, 4> LiveEntries;
- const TreeEntry *Prev = nullptr;
-
- // The entries in VectorizableTree are not necessarily ordered by their
- // position in basic blocks. Collect them and order them by dominance so later
- // instructions are guaranteed to be visited first. For instructions in
- // different basic blocks, we only scan to the beginning of the block, so
- // their order does not matter, as long as all instructions in a basic block
- // are grouped together. Using dominance ensures a deterministic order.
- SmallVector<TreeEntry *, 16> OrderedEntries;
- for (const auto &TEPtr : VectorizableTree) {
- if (TEPtr->isGather())
- continue;
- OrderedEntries.push_back(TEPtr.get());
- }
- llvm::stable_sort(OrderedEntries, [&](const TreeEntry *TA,
- const TreeEntry *TB) {
- Instruction &A = getLastInstructionInBundle(TA);
- Instruction &B = getLastInstructionInBundle(TB);
- auto *NodeA = DT->getNode(A.getParent());
- auto *NodeB = DT->getNode(B.getParent());
- assert(NodeA && "Should only process reachable instructions");
- assert(NodeB && "Should only process reachable instructions");
- assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
- "Different nodes should have different DFS numbers");
- if (NodeA != NodeB)
- return NodeA->getDFSNumIn() > NodeB->getDFSNumIn();
- return B.comesBefore(&A);
- });
-
- for (const TreeEntry *TE : OrderedEntries) {
- if (!Prev) {
- Prev = TE;
- continue;
- }
+ const TreeEntry *Root = VectorizableTree.front().get();
+ if (Root->isGather())
+ return Cost;
- LiveEntries.erase(Prev);
- for (unsigned I : seq<unsigned>(Prev->getNumOperands())) {
- const TreeEntry *Op = getVectorizedOperand(Prev, I);
- if (!Op)
- continue;
- assert(!Op->isGather() && "Expected vectorized operand.");
- LiveEntries.insert(Op);
+ SmallDenseMap<const TreeEntry *, SmallVector<const TreeEntry *>>
+ EntriesToOperands;
+ SmallDenseMap<const TreeEntry *, Instruction *> EntriesToLastInstruction;
+ SmallPtrSet<const Instruction *, 8> LastInstructions;
+ for (const auto &TEPtr : VectorizableTree) {
+ if (!TEPtr->isGather()) {
+ Instruction *LastInst = &getLastInstructionInBundle(TEPtr.get());
+ EntriesToLastInstruction.try_emplace(TEPtr.get(), LastInst);
+ LastInstructions.insert(LastInst);
}
+ if (TEPtr->UserTreeIndex)
+ EntriesToOperands[TEPtr->UserTreeIndex.UserTE].push_back(TEPtr.get());
+ }
- LLVM_DEBUG({
- dbgs() << "SLP: #LV: " << LiveEntries.size();
- for (auto *X : LiveEntries)
- X->dump();
- dbgs() << ", Looking at ";
- TE->dump();
- });
-
- // Now find the sequence of instructions between PrevInst and Inst.
- unsigned NumCalls = 0;
- const Instruction *PrevInst = &getLastInstructionInBundle(Prev);
- BasicBlock::const_reverse_iterator
- InstIt = ++getLastInstructionInBundle(TE).getIterator().getReverse(),
- PrevInstIt = PrevInst->getIterator().getReverse();
- while (InstIt != PrevInstIt) {
- if (PrevInstIt == PrevInst->getParent()->rend()) {
- PrevInstIt = getLastInstructionInBundle(TE).getParent()->rbegin();
- continue;
- }
-
- auto NoCallIntrinsic = [this](const Instruction *I) {
- const auto *II = dyn_cast<IntrinsicInst>(I);
- if (!II)
- return false;
- if (II->isAssumeLikeIntrinsic())
- return true;
- IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II);
- InstructionCost IntrCost =
- TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
- InstructionCost CallCost =
- TTI->getCallInstrCost(nullptr, II->getType(), ICA.getArgTypes(),
- TTI::TCK_RecipThroughput);
- return IntrCost < CallCost;
- };
+ auto NoCallIntrinsic = [this](const Instruction *I) {
+ const auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+ if (II->isAssumeLikeIntrinsic())
+ return true;
+ IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II);
+ InstructionCost IntrCost =
+ TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
+ InstructionCost CallCost = TTI->getCallInstrCost(
+ nullptr, II->getType(), ICA.getArgTypes(), TTI::TCK_RecipThroughput);
+ return IntrCost < CallCost;
+ };
+ SmallDenseMap<const Instruction *, PointerIntPair<const Instruction *, 1>>
+ CheckedInstructions;
+ unsigned Budget = 0;
+ const unsigned BudgetLimit =
+ ScheduleRegionSizeBudget / VectorizableTree.size();
+ auto CheckForNonVecCallsInSameBlock = [&](Instruction *First,
+ Instruction *Last) {
+ assert(First->getParent() == Last->getParent() &&
+ "Expected instructions in same block.");
+ if (Last == First || Last->comesBefore(First))
+ return true;
+ BasicBlock::const_reverse_iterator InstIt =
+ ++First->getIterator().getReverse(),
+ PrevInstIt =
+ Last->getIterator().getReverse();
+ auto It = CheckedInstructions.find(Last);
+ if (It != CheckedInstructions.end()) {
+ const Instruction *Checked = It->second.getPointer();
+ if (Checked == First || Checked->comesBefore(First))
+ return It->second.getInt() != 0;
+ PrevInstIt = Checked->getIterator().getReverse();
+ }
+ SmallVector<const Instruction *> LastInstsInRange(1, Last);
+ while (InstIt != PrevInstIt && Budget <= BudgetLimit) {
// Debug information does not impact spill cost.
// Vectorized calls, represented as vector intrinsics, do not impact spill
// cost.
if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
- CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
- NumCalls++;
+ CB && !NoCallIntrinsic(CB) && !isVectorized(CB)) {
+ for (const Instruction *LastInst : LastInstsInRange)
+ CheckedInstructions.try_emplace(LastInst, &*PrevInstIt, 0);
+ return false;
+ }
+ if (LastInstructions.contains(&*PrevInstIt))
+ LastInstsInRange.push_back(&*PrevInstIt);
++PrevInstIt;
+ ++Budget;
}
-
- if (NumCalls) {
- SmallVector<Type *, 4> EntriesTypes;
- for (const TreeEntry *TE : LiveEntries) {
- auto *ScalarTy = TE->getMainOp()->getType();
- auto It = MinBWs.find(TE);
- if (It != MinBWs.end())
- ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
- EntriesTypes.push_back(getWidenedType(ScalarTy, TE->getVectorFactor()));
+ for (const Instruction *LastInst : LastInstsInRange)
+ CheckedInstructions.try_emplace(
+ LastInst, PrevInstIt == InstIt ? First : &*PrevInstIt,
+ Budget <= BudgetLimit ? 1 : 0);
+ return Budget <= BudgetLimit;
+ };
+ auto AddCosts = [&](const TreeEntry *Op) {
+ Type *ScalarTy = Op->Scalars.front()->getType();
+ auto It = MinBWs.find(Op);
+ if (It != MinBWs.end())
+ ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
+ auto *VecTy = getWidenedType(ScalarTy, Op->getVectorFactor());
+ Cost += TTI->getCostOfKeepingLiveOverCall(VecTy);
+ if (ScalarTy->isVectorTy()) {
+ // Handle revec dead vector instructions.
+ Cost -= Op->Scalars.size() * TTI->getCostOfKeepingLiveOverCall(ScalarTy);
+ }
+ };
+ SmallDenseMap<const BasicBlock *, bool> BlocksToCalls;
+ auto CheckPredecessors = [&](BasicBlock *Root, BasicBlock *Pred,
+ BasicBlock *OpParent) {
+ SmallVector<BasicBlock *> Worklist;
+ if (Pred)
+ Worklist.push_back(Pred);
+ else
+ Worklist.append(pred_begin(Root), pred_end(Root));
+ SmallPtrSet<const BasicBlock *, 16> Visited;
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (BB == OpParent || !Visited.insert(BB).second)
+ continue;
+ if (auto It = BlocksToCalls.find(BB); It != BlocksToCalls.end()) {
+ Worklist.append(pred_begin(BB), pred_end(BB));
+ if (!It->second)
+ return false;
+ continue;
+ }
+ BlocksToCalls[BB] = false;
+ if (BB->sizeWithoutDebug() > ScheduleRegionSizeBudget)
+ return false;
+ Budget += BB->sizeWithoutDebug();
+ if (Budget > BudgetLimit)
+ return false;
+ if (!CheckForNonVecCallsInSameBlock(&*BB->getFirstNonPHIOrDbgOrAlloca(),
+ BB->getTerminator()))
+ return false;
+ BlocksToCalls[BB] = true;
+ Worklist.append(pred_begin(BB), pred_end(BB));
+ }
+ return true;
+ };
+ SmallVector<const TreeEntry *> LiveEntries(1, Root);
+ while (!LiveEntries.empty()) {
+ const TreeEntry *Entry = LiveEntries.pop_back_val();
+ SmallVector<const TreeEntry *> Operands = EntriesToOperands.lookup(Entry);
+ if (Operands.empty())
+ continue;
+ Instruction *LastInst = EntriesToLastInstruction.at(Entry);
+ for (const TreeEntry *Op : Operands) {
+ if (!Op->isGather())
+ LiveEntries.push_back(Op);
+ BasicBlock *Parent = Entry->getMainOp()->getParent();
+ if ((Entry->getOpcode() != Instruction::PHI && Op->isGather()) ||
+ (Op->isGather() && allConstant(Op->Scalars)))
+ continue;
+ Budget = 0;
+ BasicBlock *Pred = Entry->getOpcode() == Instruction::PHI
+ ? cast<PHINode>(Entry->getMainOp())
+ ->getIncomingBlock(Op->UserTreeIndex.EdgeIdx)
+ : nullptr;
+ BasicBlock *OpParent;
+ Instruction *OpLastInst;
+ if (Op->isGather()) {
+ assert(Entry->getOpcode() == Instruction::PHI &&
+ "Expected phi node only.");
+ OpParent = cast<PHINode>(Entry->getMainOp())
+ ->getIncomingBlock(Op->UserTreeIndex.EdgeIdx);
+ OpLastInst = OpParent->getTerminator();
+ for (Value *V : Op->Scalars) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ continue;
+ if (isVectorized(V)) {
+ OpParent = Inst->getParent();
+ OpLastInst = Inst;
+ break;
+ }
+ }
+ } else {
+ OpLastInst = EntriesToLastInstruction.at(Op);
+ OpParent = Op->getMainOp()->getParent();
+ }
+ // Check the call instructions within the same basic blocks.
+ if (OpParent == Parent) {
+ if (Entry->getOpcode() == Instruction::PHI) {
+ if (!CheckForNonVecCallsInSameBlock(LastInst, OpLastInst))
+ AddCosts(Op);
+ continue;
+ }
+ if (!CheckForNonVecCallsInSameBlock(OpLastInst, LastInst))
+ AddCosts(Op);
+ continue;
+ }
+ // Check for call instruction in between blocks.
+ // 1. Check entry's block to the head.
+ if (Entry->getOpcode() != Instruction::PHI &&
+ !CheckForNonVecCallsInSameBlock(
+ &*LastInst->getParent()->getFirstNonPHIOrDbgOrAlloca(),
+ LastInst)) {
+ AddCosts(Op);
+ continue;
+ }
+ // 2. Check op's block from the end.
+ if (!CheckForNonVecCallsInSameBlock(OpLastInst,
+ OpParent->getTerminator())) {
+ AddCosts(Op);
+ continue;
+ }
+ // 3. Check the predecessors of entry's block till op's block.
+ if (!CheckPredecessors(Parent, Pred, OpParent)) {
+ AddCosts(Op);
+ continue;
}
- Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(EntriesTypes);
}
-
- Prev = TE;
}
return Cost;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
index 8cb620f870331..fc71643f6a51d 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -1740,7 +1740,9 @@ entry:
define void @f(i1 %c, ptr %p, ptr %q, ptr %r) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 8
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P]], i64 1
+; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 8
; CHECK-NEXT: br i1 [[C]], label [[FOO:%.*]], label [[BAR:%.*]]
; CHECK: foo:
; CHECK-NEXT: [[Y0:%.*]] = load float, ptr [[R]], align 4
@@ -1751,12 +1753,16 @@ define void @f(i1 %c, ptr %p, ptr %q, ptr %r) {
; CHECK-NEXT: [[Z1:%.*]] = call float @fabsf(float [[Z0]])
; CHECK-NEXT: br label [[BAZ]]
; CHECK: baz:
-; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[Q]], align 8
+; CHECK-NEXT: store i64 [[X0]], ptr [[Q]], align 8
+; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[Q]], i64 1
+; CHECK-NEXT: store i64 [[X1]], ptr [[Q1]], align 8
; CHECK-NEXT: ret void
;
; DEFAULT-LABEL: define void @f
; DEFAULT-SAME: (i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) #[[ATTR1]] {
-; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 8
+; DEFAULT-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 8
+; DEFAULT-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P]], i64 1
+; DEFAULT-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 8
; DEFAULT-NEXT: br i1 [[C]], label [[FOO:%.*]], label [[BAR:%.*]]
; DEFAULT: foo:
; DEFAULT-NEXT: [[Y0:%.*]] = load float, ptr [[R]], align 4
@@ -1767,7 +1773,9 @@ define void @f(i1 %c, ptr %p, ptr %q, ptr %r) {
; DEFAULT-NEXT: [[Z1:%.*]] = call float @fabsf(float [[Z0]])
; DEFAULT-NEXT: br label [[BAZ]]
; DEFAULT: baz:
-; DEFAULT-NEXT: store <2 x i64> [[TMP1]], ptr [[Q]], align 8
+; DEFAULT-NEXT: store i64 [[X0]], ptr [[Q]], align 8
+; DEFAULT-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[Q]], i64 1
+; DEFAULT-NEXT: store i64 [[X1]], ptr [[Q1]], align 8
; DEFAULT-NEXT: ret void
;
%x0 = load i64, ptr %p
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/spillcost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/spillcost.ll
index b0c25bc4cc1f2..55978b61d54e8 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/spillcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/spillcost.ll
@@ -7,7 +7,9 @@ declare void @g()
define void @f0(i1 %c, ptr %p, ptr %q) {
; CHECK-LABEL: define void @f0(
; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P]], align 8
+; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P]], i64 1
+; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 8
; CHECK-NEXT: br i1 [[C]], label %[[FOO:.*]], label %[[BAR:.*]]
; CHECK: [[FOO]]:
; CHECK-NEXT: call void @g()
@@ -20,7 +22,9 @@ define void @f0(i1 %c, ptr %p, ptr %q) {
; CHECK-NEXT: call void @g()
; CHECK-NEXT: br label %[[BAZ]]
; CHECK: [[BAZ]]:
-; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr [[Q]], align 8
+; CHECK-NEXT: store i64 [[X0]], ptr [[Q]], align 8
+; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[Q]], i64 1
+; CHECK-NEXT: store i64 [[X1]], ptr [[Q1]], align 8
; CHECK-NEXT: ret void
;
%x0 = load i64, ptr %p
@@ -45,7 +49,7 @@ baz:
ret void
}
-; Shouldn't be vectorized
+; Should be vectorized - just one spill of TMP0
define void @f1(i1 %c, ptr %p, ptr %q, ptr %r) {
; CHECK-LABEL: define void @f1(
; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) #[[ATTR0]] {
>From 18191178ff2b89dd99f85f050ec1f8f92da39639 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 28 Feb 2025 16:03:35 +0000
Subject: [PATCH 2/3] Fix op parent
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6c48ae7d645ab..f75d323a32a82 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12609,7 +12609,7 @@ InstructionCost BoUpSLP::getSpillCost() {
}
} else {
OpLastInst = EntriesToLastInstruction.at(Op);
- OpParent = Op->getMainOp()->getParent();
+ OpParent = OpLastInst->getParent();
}
// Check the call instructions within the same basic blocks.
if (OpParent == Parent) {
>From fb40c2f67d00b5466242fcd94ac5a55da9b699e4 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 28 Feb 2025 17:53:21 +0000
Subject: [PATCH 3/3] Skip uncodintional first Last inst registering
Created using spr 1.3.5
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f75d323a32a82..8fc7e69273fe9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12502,7 +12502,7 @@ InstructionCost BoUpSLP::getSpillCost() {
return It->second.getInt() != 0;
PrevInstIt = Checked->getIterator().getReverse();
}
- SmallVector<const Instruction *> LastInstsInRange(1, Last);
+ SmallVector<const Instruction *> LastInstsInRange;
while (InstIt != PrevInstIt && Budget <= BudgetLimit) {
// Debug information does not impact spill cost.
// Vectorized calls, represented as vector intrinsics, do not impact spill
More information about the llvm-commits
mailing list