[llvm] ConstraintElim: add dry-run routine to fail early (PR #99670)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 13:15:27 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/99670
>From a4b9f80e85d0fdd0e0e6390eb4548c88aa3fd35e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 16 Jul 2024 19:54:11 +0100
Subject: [PATCH 1/2] ConstraintElim: add dry-run routine to fail early
Add a dry-run routine that computes a conservative estimate of the
number of rows and columns that the transform will require, and fail
early if the estimates exceed the upper bounds. This patch has a small
overhead, but improves compile-time on one benchmark significantly. The
overhead will be compensated for in a follow-up patch, where
ConstraintSystem is ported to use a Matrix data structure, performing
the full allocation ahead-of-time using these estimates.
---
.../Scalar/ConstraintElimination.cpp | 175 +++++++++++++++++-
...x-row-limit.ll => max-row-column-limit.ll} | 17 +-
2 files changed, 174 insertions(+), 18 deletions(-)
rename llvm/test/Transforms/ConstraintElimination/{max-row-limit.ll => max-row-column-limit.ll} (81%)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index c31173879af1e..369a6daa4d970 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -40,7 +40,6 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cmath>
#include <optional>
#include <string>
@@ -57,6 +56,10 @@ static cl::opt<unsigned>
MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
cl::desc("Maximum number of rows to keep in constraint system"));
+static cl::opt<unsigned> MaxColumns(
+ "constraint-elimination-max-cols", cl::init(50), cl::Hidden,
+ cl::desc("Maximum number of columns to keep in constraint system"));
+
static cl::opt<bool> DumpReproducers(
"constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
cl::desc("Dump IR to reproduce successful transformations."));
@@ -303,6 +306,7 @@ class ConstraintInfo {
void popLastNVariables(bool Signed, unsigned N) {
getCS(Signed).popLastNVariables(N);
}
+ const DataLayout &getDataLayout() const { return DL; }
bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
@@ -1491,7 +1495,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
/// Check if either the first condition of an AND or OR is implied by the
/// (negated in case of OR) second condition or vice versa.
static bool checkOrAndOpImpliedByOther(
- FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
+ const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
SmallVectorImpl<StackEntry> &DFSInStack) {
@@ -1671,18 +1675,91 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
return Changed;
}
-static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
- ScalarEvolution &SE,
- OptimizationRemarkEmitter &ORE) {
- bool Changed = false;
+/// Performs a dry run of AddFact, computing a conservative estimate of the
+/// number of new variables introduced.
+static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
+ const ConstraintInfo &Info, unsigned &EstimatedRowsA,
+ unsigned &EstimatedRowsB,
+ unsigned &EstimatedColumns) {
+ auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
+ &EstimatedColumns](CmpInst::Predicate Pred, Value *A,
+ Value *B) {
+ SmallVector<Value *> NewVars;
+ auto R = Info.getConstraint(Pred, A, B, NewVars);
+
+ // We offset it by 1 due to logic in addFact.
+ unsigned NewEstimate =
+ count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
+
+ EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
+ if (R.IsSigned)
+ ++EstimatedRowsA;
+ else
+ ++EstimatedRowsB;
+ };
+
+ UpdateEstimate(Pred, A, B);
+
+ // What follows is a dry-run of transferToOtherSystem.
+ auto IsKnownNonNegative = [&Info](Value *V) {
+ return Info.doesHold(CmpInst::ICMP_SGE, V,
+ ConstantInt::get(V->getType(), 0)) ||
+ isKnownNonNegative(V, Info.getDataLayout(),
+ MaxAnalysisRecursionDepth - 1);
+ };
+
+ if (!A->getType()->isIntegerTy())
+ return;
+
+ switch (Pred) {
+ default:
+ break;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ if (IsKnownNonNegative(B)) {
+ UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+ }
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_UGT:
+ if (IsKnownNonNegative(A)) {
+ UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+ }
+ break;
+ case CmpInst::ICMP_SLT:
+ if (IsKnownNonNegative(A))
+ UpdateEstimate(CmpInst::ICMP_ULT, A, B);
+ break;
+ case CmpInst::ICMP_SGT:
+ if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
+ UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+ if (IsKnownNonNegative(B))
+ UpdateEstimate(CmpInst::ICMP_UGT, A, B);
+ break;
+ case CmpInst::ICMP_SGE:
+ if (IsKnownNonNegative(B))
+ UpdateEstimate(CmpInst::ICMP_UGE, A, B);
+ break;
+ }
+}
+
+/// Performs a dry run of the transform, computing a conservative estimate of
+/// the total number of columns we need in the underlying storage.
+static std::tuple<State, unsigned, unsigned>
+dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) {
DT.updateDFSNumbers();
SmallVector<Value *> FunctionArgs;
for (Value &Arg : F.args())
FunctionArgs.push_back(&Arg);
- ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
State S(DT, LI, SE);
- std::unique_ptr<Module> ReproducerModule(
- DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+ unsigned EstimatedColumns = FunctionArgs.size() + 1;
+
+ // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to
+ // UnsignedCS.
+ unsigned EstimatedRowsA = 0, EstimatedRowsB = 1;
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
// First, collect conditions implied by branches and blocks with their
// Dominator DFS in and out numbers.
@@ -1725,12 +1802,90 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
return A.NumIn < B.NumIn;
});
+ for (const FactOrCheck &CB : S.WorkList) {
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (CB.isCheck()) {
+ // What follows is a dry-run of checkOrAndOpImpliedByOther, without
+ // assuming that instructions have been simplified, as they would have
+ // during the course of normal operation.
+ auto *ContextInst = CB.getContextInst();
+ if (auto *Cmp =
+ dyn_cast_or_null<ICmpInst>(CB.getInstructionToSimplify())) {
+ unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0;
+ if (match(ContextInst, m_LogicalOp()) &&
+ match(ContextInst->getOperand(OtherOpIdx),
+ m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ if (match(ContextInst, m_LogicalOr()))
+ Pred = CmpInst::getInversePredicate(Pred);
+ dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
+ EstimatedColumns);
+ }
+ }
+ continue;
+ }
+ if (!CB.isConditionFact()) {
+ Value *X;
+ if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
+ if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
+ dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst,
+ ConstantInt::get(CB.Inst->getType(), 0), Info,
+ EstimatedRowsA, EstimatedRowsB, EstimatedColumns);
+ dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ continue;
+ }
+
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
+ Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ continue;
+ }
+ }
+
+ if (CB.isConditionFact()) {
+ Pred = CB.Cond.Pred;
+ A = CB.Cond.Op0;
+ B = CB.Cond.Op1;
+ } else {
+ bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_Value(A), m_Value(B))));
+ (void)Matched;
+ assert(Matched && "Must have an assume intrinsic with a icmp operand");
+ }
+ dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
+ EstimatedColumns);
+ }
+ return {S, std::max(EstimatedRowsA, EstimatedRowsB), EstimatedColumns};
+}
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ OptimizationRemarkEmitter &ORE) {
+ bool Changed = false;
+ const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE);
+
+ // Fail early if estimates exceed limits. Row estimate could be off by up to
+ // 40%.
+ if (EstimatedRows > 1.4 * MaxRows || EstimatedColumns > MaxColumns)
+ return false;
+
+ SmallVector<Value *> FunctionArgs;
+ for (Value &Arg : F.args())
+ FunctionArgs.push_back(&Arg);
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
+ std::unique_ptr<Module> ReproducerModule(
+ DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+
SmallVector<Instruction *> ToRemove;
// Finally, process ordered worklist and eliminate implied conditions.
SmallVector<StackEntry, 16> DFSInStack;
SmallVector<ReproducerEntry> ReproducerCondStack;
- for (FactOrCheck &CB : S.WorkList) {
+ for (const FactOrCheck &CB : S.WorkList) {
// First, pop entries from the stack that are out-of-scope for CB. Remove
// the corresponding entry from the constraint system.
while (!DFSInStack.empty()) {
diff --git a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
similarity index 81%
rename from llvm/test/Transforms/ConstraintElimination/max-row-limit.ll
rename to llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
index 0e078109ed663..2f3b62dc5dab7 100644
--- a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll
+++ b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
-; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=9 -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
-; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=COMMON,NOSIMP %s
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=6 -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=7 -S %s | FileCheck --check-prefixes=NOSIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=5 -S %s | FileCheck --check-prefixes=NOSIMP %s
define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
@@ -22,7 +24,8 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
; SIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
; SIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
; SIMP: bb5:
-; SIMP-NEXT: ret i1 true
+; SIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4:%.*]], 100
+; SIMP-NEXT: ret i1 [[C5]]
; SIMP: exit:
; SIMP-NEXT: ret i1 false
;
@@ -43,7 +46,7 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
; NOSIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
; NOSIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
; NOSIMP: bb5:
-; NOSIMP-NEXT: [[C5:%.*]] = icmp uge i32 [[L4]], 100
+; NOSIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4]], 100
; NOSIMP-NEXT: ret i1 [[C5]]
; NOSIMP: exit:
; NOSIMP-NEXT: ret i1 false
@@ -69,11 +72,9 @@ bb4:
br i1 %c4, label %bb5, label %exit
bb5:
- %c5 = icmp uge i32 %l4, 100
+ %c5 = icmp sge i32 %l4, 100
ret i1 %c5
exit:
ret i1 false
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; COMMON: {{.*}}
>From 7d9137338683127dcf147f052fa040d6b3a3a5dc Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 19 Jul 2024 19:59:40 +0100
Subject: [PATCH 2/2] ConstraintElim: attempt to make estimation cheaper
---
.../Scalar/ConstraintElimination.cpp | 75 ++++++-------------
1 file changed, 21 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 369a6daa4d970..84c7cdabd1d25 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1682,67 +1682,34 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
unsigned &EstimatedRowsB,
unsigned &EstimatedColumns) {
auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
- &EstimatedColumns](CmpInst::Predicate Pred, Value *A,
- Value *B) {
- SmallVector<Value *> NewVars;
- auto R = Info.getConstraint(Pred, A, B, NewVars);
-
+ &EstimatedColumns](bool IsSigned, Value *A, Value *B) {
+ SmallVector<ConditionTy> Preconditions;
+ unsigned NewVars = 0;
+ auto ADec = decompose(A->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned, Info.getDataLayout());
+ auto BDec = decompose(A->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned, Info.getDataLayout());
+ for (auto &KV : concat<DecompEntry>(ADec.Vars, BDec.Vars)) {
+ if (!Info.getValue2Index(IsSigned).contains(KV.Variable)) {
+ ++NewVars;
+ if (IsSigned)
+ ++EstimatedRowsA;
+ else
+ ++EstimatedRowsB;
+ }
+ }
// We offset it by 1 due to logic in addFact.
- unsigned NewEstimate =
- count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
-
- EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
- if (R.IsSigned)
- ++EstimatedRowsA;
- else
- ++EstimatedRowsB;
+ EstimatedColumns = std::max(EstimatedColumns, NewVars + 1);
};
- UpdateEstimate(Pred, A, B);
-
- // What follows is a dry-run of transferToOtherSystem.
- auto IsKnownNonNegative = [&Info](Value *V) {
- return Info.doesHold(CmpInst::ICMP_SGE, V,
- ConstantInt::get(V->getType(), 0)) ||
- isKnownNonNegative(V, Info.getDataLayout(),
- MaxAnalysisRecursionDepth - 1);
- };
+ bool IsSigned = CmpInst::isSigned(Pred);
+ UpdateEstimate(IsSigned, A, B);
if (!A->getType()->isIntegerTy())
return;
- switch (Pred) {
- default:
- break;
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_ULE:
- if (IsKnownNonNegative(B)) {
- UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
- UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
- }
- break;
- case CmpInst::ICMP_UGE:
- case CmpInst::ICMP_UGT:
- if (IsKnownNonNegative(A)) {
- UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
- UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
- }
- break;
- case CmpInst::ICMP_SLT:
- if (IsKnownNonNegative(A))
- UpdateEstimate(CmpInst::ICMP_ULT, A, B);
- break;
- case CmpInst::ICMP_SGT:
- if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
- UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
- if (IsKnownNonNegative(B))
- UpdateEstimate(CmpInst::ICMP_UGT, A, B);
- break;
- case CmpInst::ICMP_SGE:
- if (IsKnownNonNegative(B))
- UpdateEstimate(CmpInst::ICMP_UGE, A, B);
- break;
- }
+ UpdateEstimate(!IsSigned, A, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(!IsSigned, A, B);
}
/// Performs a dry run of the transform, computing a conservative estimate of
More information about the llvm-commits
mailing list