[llvm] ConstraintElim: add dry-run routine to fail early (PR #99670)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 10:31:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Add a dry-run routine that computes a conservative estimate of the number of rows and columns that the transform will require, and fail early if the estimates exceed the upper bounds. This patch has a small overhead, but improves compile-time on one benchmark significantly.
-- 8< --
Results from LLVM Compile Time Tracker: http://llvm-compile-time-tracker.com/compare.php?from=c5ee3c05ca61f3fae11337c5853aee7b450a9dc6&to=a4b9f80e85d0fdd0e0e6390eb4548c88aa3fd35e&stat=instructions:u.
---
Full diff: https://github.com/llvm/llvm-project/pull/99670.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/ConstraintElimination.cpp (+165-10)
- (renamed) llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll (+9-8)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index c31173879af1e..369a6daa4d970 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -40,7 +40,6 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cmath>
#include <optional>
#include <string>
@@ -57,6 +56,10 @@ static cl::opt<unsigned>
MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
cl::desc("Maximum number of rows to keep in constraint system"));
+static cl::opt<unsigned> MaxColumns(
+ "constraint-elimination-max-cols", cl::init(50), cl::Hidden,
+ cl::desc("Maximum number of columns to keep in constraint system"));
+
static cl::opt<bool> DumpReproducers(
"constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
cl::desc("Dump IR to reproduce successful transformations."));
@@ -303,6 +306,7 @@ class ConstraintInfo {
void popLastNVariables(bool Signed, unsigned N) {
getCS(Signed).popLastNVariables(N);
}
+ const DataLayout &getDataLayout() const { return DL; }
bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
@@ -1491,7 +1495,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
/// Check if either the first condition of an AND or OR is implied by the
/// (negated in case of OR) second condition or vice versa.
static bool checkOrAndOpImpliedByOther(
- FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
+ const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
SmallVectorImpl<StackEntry> &DFSInStack) {
@@ -1671,18 +1675,91 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
return Changed;
}
-static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
- ScalarEvolution &SE,
- OptimizationRemarkEmitter &ORE) {
- bool Changed = false;
+/// Performs a dry run of AddFact, computing a conservative estimate of the
+/// number of new variables introduced.
+static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
+ const ConstraintInfo &Info, unsigned &EstimatedRowsA,
+ unsigned &EstimatedRowsB,
+ unsigned &EstimatedColumns) {
+ auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
+ &EstimatedColumns](CmpInst::Predicate Pred, Value *A,
+ Value *B) {
+ SmallVector<Value *> NewVars;
+ auto R = Info.getConstraint(Pred, A, B, NewVars);
+
+ // We offset it by 1 due to logic in addFact.
+ unsigned NewEstimate =
+ count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
+
+ EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
+ if (R.IsSigned)
+ ++EstimatedRowsA;
+ else
+ ++EstimatedRowsB;
+ };
+
+ UpdateEstimate(Pred, A, B);
+
+ // What follows is a dry-run of transferToOtherSystem.
+ auto IsKnownNonNegative = [&Info](Value *V) {
+ return Info.doesHold(CmpInst::ICMP_SGE, V,
+ ConstantInt::get(V->getType(), 0)) ||
+ isKnownNonNegative(V, Info.getDataLayout(),
+ MaxAnalysisRecursionDepth - 1);
+ };
+
+ if (!A->getType()->isIntegerTy())
+ return;
+
+ switch (Pred) {
+ default:
+ break;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ if (IsKnownNonNegative(B)) {
+ UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+ }
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_UGT:
+ if (IsKnownNonNegative(A)) {
+ UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+ }
+ break;
+ case CmpInst::ICMP_SLT:
+ if (IsKnownNonNegative(A))
+ UpdateEstimate(CmpInst::ICMP_ULT, A, B);
+ break;
+ case CmpInst::ICMP_SGT:
+ if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
+ UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+ if (IsKnownNonNegative(B))
+ UpdateEstimate(CmpInst::ICMP_UGT, A, B);
+ break;
+ case CmpInst::ICMP_SGE:
+ if (IsKnownNonNegative(B))
+ UpdateEstimate(CmpInst::ICMP_UGE, A, B);
+ break;
+ }
+}
+
+/// Performs a dry run of the transform, computing a conservative estimate of
+/// the total number of columns we need in the underlying storage.
+static std::tuple<State, unsigned, unsigned>
+dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) {
DT.updateDFSNumbers();
SmallVector<Value *> FunctionArgs;
for (Value &Arg : F.args())
FunctionArgs.push_back(&Arg);
- ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
State S(DT, LI, SE);
- std::unique_ptr<Module> ReproducerModule(
- DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+ unsigned EstimatedColumns = FunctionArgs.size() + 1;
+
+ // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to
+ // UnsignedCS.
+ unsigned EstimatedRowsA = 0, EstimatedRowsB = 1;
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
// First, collect conditions implied by branches and blocks with their
// Dominator DFS in and out numbers.
@@ -1725,12 +1802,90 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
return A.NumIn < B.NumIn;
});
+ for (const FactOrCheck &CB : S.WorkList) {
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (CB.isCheck()) {
+ // What follows is a dry-run of checkOrAndOpImpliedByOther, without
+ // assuming that instructions have been simplified, as they would have
+ // during the course of normal operation.
+ auto *ContextInst = CB.getContextInst();
+ if (auto *Cmp =
+ dyn_cast_or_null<ICmpInst>(CB.getInstructionToSimplify())) {
+ unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0;
+ if (match(ContextInst, m_LogicalOp()) &&
+ match(ContextInst->getOperand(OtherOpIdx),
+ m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ if (match(ContextInst, m_LogicalOr()))
+ Pred = CmpInst::getInversePredicate(Pred);
+ dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
+ EstimatedColumns);
+ }
+ }
+ continue;
+ }
+ if (!CB.isConditionFact()) {
+ Value *X;
+ if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
+ if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
+ dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst,
+ ConstantInt::get(CB.Inst->getType(), 0), Info,
+ EstimatedRowsA, EstimatedRowsB, EstimatedColumns);
+ dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ continue;
+ }
+
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
+ Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ continue;
+ }
+ }
+
+ if (CB.isConditionFact()) {
+ Pred = CB.Cond.Pred;
+ A = CB.Cond.Op0;
+ B = CB.Cond.Op1;
+ } else {
+ bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_Value(A), m_Value(B))));
+ (void)Matched;
+ assert(Matched && "Must have an assume intrinsic with a icmp operand");
+ }
+ dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
+ EstimatedColumns);
+ }
+ return {S, std::max(EstimatedRowsA, EstimatedRowsB), EstimatedColumns};
+}
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ OptimizationRemarkEmitter &ORE) {
+ bool Changed = false;
+ const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE);
+
+ // Fail early if estimates exceed limits. Row estimate could be off by up to
+ // 40%.
+ if (EstimatedRows > 1.4 * MaxRows || EstimatedColumns > MaxColumns)
+ return false;
+
+ SmallVector<Value *> FunctionArgs;
+ for (Value &Arg : F.args())
+ FunctionArgs.push_back(&Arg);
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
+ std::unique_ptr<Module> ReproducerModule(
+ DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+
SmallVector<Instruction *> ToRemove;
// Finally, process ordered worklist and eliminate implied conditions.
SmallVector<StackEntry, 16> DFSInStack;
SmallVector<ReproducerEntry> ReproducerCondStack;
- for (FactOrCheck &CB : S.WorkList) {
+ for (const FactOrCheck &CB : S.WorkList) {
// First, pop entries from the stack that are out-of-scope for CB. Remove
// the corresponding entry from the constraint system.
while (!DFSInStack.empty()) {
diff --git a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
similarity index 81%
rename from llvm/test/Transforms/ConstraintElimination/max-row-limit.ll
rename to llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
index 0e078109ed663..2f3b62dc5dab7 100644
--- a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll
+++ b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
-; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=9 -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
-; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=COMMON,NOSIMP %s
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=6 -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=7 -S %s | FileCheck --check-prefixes=NOSIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=5 -S %s | FileCheck --check-prefixes=NOSIMP %s
define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
@@ -22,7 +24,8 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
; SIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
; SIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
; SIMP: bb5:
-; SIMP-NEXT: ret i1 true
+; SIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4:%.*]], 100
+; SIMP-NEXT: ret i1 [[C5]]
; SIMP: exit:
; SIMP-NEXT: ret i1 false
;
@@ -43,7 +46,7 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
; NOSIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
; NOSIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
; NOSIMP: bb5:
-; NOSIMP-NEXT: [[C5:%.*]] = icmp uge i32 [[L4]], 100
+; NOSIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4]], 100
; NOSIMP-NEXT: ret i1 [[C5]]
; NOSIMP: exit:
; NOSIMP-NEXT: ret i1 false
@@ -69,11 +72,9 @@ bb4:
br i1 %c4, label %bb5, label %exit
bb5:
- %c5 = icmp uge i32 %l4, 100
+ %c5 = icmp sge i32 %l4, 100
ret i1 %c5
exit:
ret i1 false
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; COMMON: {{.*}}
``````````
</details>
https://github.com/llvm/llvm-project/pull/99670
More information about the llvm-commits
mailing list