[llvm] [SelectionDAGBuilder] Fix non-determanism in `shouldKeepJumpConditionsTogether` (PR #83687)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 2 10:10:39 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
The issue was we where iteration on `SmallPtrSet` order which is based
on runtime address and thus can vary with the same input.
Add a `SmallVector` for iterating on `RhsDeps`. This has the benefit
of also dramatically simplifying the pruning code (we no longer are
removing from the same list we where iterating on).
---
Full diff: https://github.com/llvm/llvm-project/pull/83687.diff
1 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+28-33)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f6263cc492fe3..3a007d61ec52a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2450,11 +2450,11 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Collect dependencies on V recursively. This is used for the cost analysis in
// `shouldKeepJumpConditionsTogether`.
-static bool
-collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
- const Value *V,
- SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
- unsigned Depth = 0) {
+static bool collectInstructionDeps(
+ SmallPtrSetImpl<const Instruction *> *Deps, const Value *V,
+ SmallPtrSetImpl<const Instruction *> *Necessary = nullptr,
+ SmallVectorImpl<const Instruction *> *ItOrder = nullptr,
+ unsigned Depth = 0) {
// Return false if we have an incomplete count.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
@@ -2474,8 +2474,11 @@ collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
if (!Deps->insert(I).second)
return true;
+ if (ItOrder != nullptr)
+ ItOrder->push_back(I);
+
for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
- if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
+ if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, ItOrder,
Depth + 1))
return false;
return true;
@@ -2527,16 +2530,20 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
// Collect "all" instructions that lhs condition is dependent on.
SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
+ SmallVector<const Instruction *> RhsDepsItOrder;
collectInstructionDeps(&LhsDeps, Lhs);
// Collect "all" instructions that rhs condition is dependent on AND are
// dependencies of lhs. This gives us an estimate on which instructions we
// stand to save by splitting the condition.
- if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
+ if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps, &RhsDepsItOrder))
return false;
// Add the compare instruction itself unless its a dependency on the LHS.
- if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
- if (!LhsDeps.contains(RhsI))
+ if (const auto *RhsI = dyn_cast<Instruction>(Rhs)) {
+ if (!LhsDeps.contains(RhsI)) {
RhsDeps.insert(RhsI);
+ RhsDepsItOrder.push_back(RhsI);
+ }
+ }
const auto &TLI = DAG.getTargetLoweringInfo();
const auto &TTI =
@@ -2555,31 +2562,19 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
return true;
};
- // Prune instructions from RHS Deps that are dependencies of unrelated
- // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
- // arbitrary and just meant to cap the how much time we spend in the pruning
- // loop. Its highly unlikely to come into affect.
- const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
- // Stop after a certain point. No incorrectness from including too many
- // instructions.
- for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
- const Instruction *ToDrop = nullptr;
- for (const auto *Ins : RhsDeps) {
- if (!ShouldCountInsn(Ins)) {
- ToDrop = Ins;
- break;
- }
+ // Finally accumulate latency that we can only attribute to computing the
+ // RHS condition. Use latency because we are essentially trying to calculate
+ // the cost of the dependency chain.
+ // Possible TODO: We could try to estimate ILP and make this more precise.
+ // NB: This loop is capped by the number of rhs dep instructions we added
+ // which in turn is roughly limitted by `MaxRecursiveDepth`
+ for (const auto *Ins : RhsDepsItOrder) {
+ // Skip instructions that are dependencies of unrelated
+ // instructions (will need to the computed anyways).
+ if (!ShouldCountInsn(Ins)) {
+ RhsDeps.erase(Ins);
+ continue;
}
- if (ToDrop == nullptr)
- break;
- RhsDeps.erase(ToDrop);
- }
-
- for (const auto *Ins : RhsDeps) {
- // Finally accumulate latency that we can only attribute to computing the
- // RHS condition. Use latency because we are essentially trying to calculate
- // the cost of the dependency chain.
- // Possible TODO: We could try to estimate ILP and make this more precise.
CostOfIncluding +=
TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
``````````
</details>
https://github.com/llvm/llvm-project/pull/83687
More information about the llvm-commits
mailing list