[llvm] ConstraintElim: add dry-run routine to fail early (PR #99670)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 10:23:54 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/99670
>From a4b9f80e85d0fdd0e0e6390eb4548c88aa3fd35e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 16 Jul 2024 19:54:11 +0100
Subject: [PATCH 1/4] ConstraintElim: add dry-run routine to fail early
Add a dry-run routine that computes a conservative estimate of the
number of rows and columns that the transform will require, and fail
early if the estimates exceed the upper bounds. This patch has a small
overhead, but improves compile-time on one benchmark significantly. The
overhead will be compensated for in a follow-up patch, where
ConstraintSystem is ported to use a Matrix data structure, performing
the full allocation ahead-of-time using these estimates.
---
.../Scalar/ConstraintElimination.cpp | 175 +++++++++++++++++-
...x-row-limit.ll => max-row-column-limit.ll} | 17 +-
2 files changed, 174 insertions(+), 18 deletions(-)
rename llvm/test/Transforms/ConstraintElimination/{max-row-limit.ll => max-row-column-limit.ll} (81%)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index c31173879af1e..369a6daa4d970 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -40,7 +40,6 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cmath>
#include <optional>
#include <string>
@@ -57,6 +56,10 @@ static cl::opt<unsigned>
MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
cl::desc("Maximum number of rows to keep in constraint system"));
+static cl::opt<unsigned> MaxColumns(
+ "constraint-elimination-max-cols", cl::init(50), cl::Hidden,
+ cl::desc("Maximum number of columns to keep in constraint system"));
+
static cl::opt<bool> DumpReproducers(
"constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
cl::desc("Dump IR to reproduce successful transformations."));
@@ -303,6 +306,7 @@ class ConstraintInfo {
void popLastNVariables(bool Signed, unsigned N) {
getCS(Signed).popLastNVariables(N);
}
+ const DataLayout &getDataLayout() const { return DL; }
bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
@@ -1491,7 +1495,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
/// Check if either the first condition of an AND or OR is implied by the
/// (negated in case of OR) second condition or vice versa.
static bool checkOrAndOpImpliedByOther(
- FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
+ const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
SmallVectorImpl<StackEntry> &DFSInStack) {
@@ -1671,18 +1675,91 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
return Changed;
}
-static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
- ScalarEvolution &SE,
- OptimizationRemarkEmitter &ORE) {
- bool Changed = false;
+/// Performs a dry run of AddFact, computing a conservative estimate of the
+/// number of new variables introduced.
+static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
+ const ConstraintInfo &Info, unsigned &EstimatedRowsA,
+ unsigned &EstimatedRowsB,
+ unsigned &EstimatedColumns) {
+ auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
+ &EstimatedColumns](CmpInst::Predicate Pred, Value *A,
+ Value *B) {
+ SmallVector<Value *> NewVars;
+ auto R = Info.getConstraint(Pred, A, B, NewVars);
+
+ // We offset it by 1 due to logic in addFact.
+ unsigned NewEstimate =
+ count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
+
+ EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
+ if (R.IsSigned)
+ ++EstimatedRowsA;
+ else
+ ++EstimatedRowsB;
+ };
+
+ UpdateEstimate(Pred, A, B);
+
+ // What follows is a dry-run of transferToOtherSystem.
+ auto IsKnownNonNegative = [&Info](Value *V) {
+ return Info.doesHold(CmpInst::ICMP_SGE, V,
+ ConstantInt::get(V->getType(), 0)) ||
+ isKnownNonNegative(V, Info.getDataLayout(),
+ MaxAnalysisRecursionDepth - 1);
+ };
+
+ if (!A->getType()->isIntegerTy())
+ return;
+
+ switch (Pred) {
+ default:
+ break;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ if (IsKnownNonNegative(B)) {
+ UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+ }
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_UGT:
+ if (IsKnownNonNegative(A)) {
+ UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+ }
+ break;
+ case CmpInst::ICMP_SLT:
+ if (IsKnownNonNegative(A))
+ UpdateEstimate(CmpInst::ICMP_ULT, A, B);
+ break;
+ case CmpInst::ICMP_SGT:
+ if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
+ UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+ if (IsKnownNonNegative(B))
+ UpdateEstimate(CmpInst::ICMP_UGT, A, B);
+ break;
+ case CmpInst::ICMP_SGE:
+ if (IsKnownNonNegative(B))
+ UpdateEstimate(CmpInst::ICMP_UGE, A, B);
+ break;
+ }
+}
+
+/// Performs a dry run of the transform, computing a conservative estimate of
+/// the total number of columns we need in the underlying storage.
+static std::tuple<State, unsigned, unsigned>
+dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) {
DT.updateDFSNumbers();
SmallVector<Value *> FunctionArgs;
for (Value &Arg : F.args())
FunctionArgs.push_back(&Arg);
- ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
State S(DT, LI, SE);
- std::unique_ptr<Module> ReproducerModule(
- DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+ unsigned EstimatedColumns = FunctionArgs.size() + 1;
+
+ // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to
+ // UnsignedCS.
+ unsigned EstimatedRowsA = 0, EstimatedRowsB = 1;
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
// First, collect conditions implied by branches and blocks with their
// Dominator DFS in and out numbers.
@@ -1725,12 +1802,90 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
return A.NumIn < B.NumIn;
});
+ for (const FactOrCheck &CB : S.WorkList) {
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (CB.isCheck()) {
+ // What follows is a dry-run of checkOrAndOpImpliedByOther, without
+ // assuming that instructions have been simplified, as they would have
+ // during the course of normal operation.
+ auto *ContextInst = CB.getContextInst();
+ if (auto *Cmp =
+ dyn_cast_or_null<ICmpInst>(CB.getInstructionToSimplify())) {
+ unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0;
+ if (match(ContextInst, m_LogicalOp()) &&
+ match(ContextInst->getOperand(OtherOpIdx),
+ m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ if (match(ContextInst, m_LogicalOr()))
+ Pred = CmpInst::getInversePredicate(Pred);
+ dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
+ EstimatedColumns);
+ }
+ }
+ continue;
+ }
+ if (!CB.isConditionFact()) {
+ Value *X;
+ if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
+ if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
+ dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst,
+ ConstantInt::get(CB.Inst->getType(), 0), Info,
+ EstimatedRowsA, EstimatedRowsB, EstimatedColumns);
+ dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ continue;
+ }
+
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
+ Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedRowsA,
+ EstimatedRowsB, EstimatedColumns);
+ continue;
+ }
+ }
+
+ if (CB.isConditionFact()) {
+ Pred = CB.Cond.Pred;
+ A = CB.Cond.Op0;
+ B = CB.Cond.Op1;
+ } else {
+ bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_Value(A), m_Value(B))));
+ (void)Matched;
+ assert(Matched && "Must have an assume intrinsic with a icmp operand");
+ }
+ dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
+ EstimatedColumns);
+ }
+ return {S, std::max(EstimatedRowsA, EstimatedRowsB), EstimatedColumns};
+}
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ OptimizationRemarkEmitter &ORE) {
+ bool Changed = false;
+ const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE);
+
+ // Fail early if estimates exceed limits. Row estimate could be off by up to
+ // 40%.
+ if (EstimatedRows > 1.4 * MaxRows || EstimatedColumns > MaxColumns)
+ return false;
+
+ SmallVector<Value *> FunctionArgs;
+ for (Value &Arg : F.args())
+ FunctionArgs.push_back(&Arg);
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
+ std::unique_ptr<Module> ReproducerModule(
+ DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+
SmallVector<Instruction *> ToRemove;
// Finally, process ordered worklist and eliminate implied conditions.
SmallVector<StackEntry, 16> DFSInStack;
SmallVector<ReproducerEntry> ReproducerCondStack;
- for (FactOrCheck &CB : S.WorkList) {
+ for (const FactOrCheck &CB : S.WorkList) {
// First, pop entries from the stack that are out-of-scope for CB. Remove
// the corresponding entry from the constraint system.
while (!DFSInStack.empty()) {
diff --git a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
similarity index 81%
rename from llvm/test/Transforms/ConstraintElimination/max-row-limit.ll
rename to llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
index 0e078109ed663..2f3b62dc5dab7 100644
--- a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll
+++ b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
-; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=9 -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
-; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=COMMON,NOSIMP %s
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=6 -S %s | FileCheck --check-prefixes=SIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=7 -S %s | FileCheck --check-prefixes=NOSIMP %s
+; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=5 -S %s | FileCheck --check-prefixes=NOSIMP %s
define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
@@ -22,7 +24,8 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
; SIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
; SIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
; SIMP: bb5:
-; SIMP-NEXT: ret i1 true
+; SIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4:%.*]], 100
+; SIMP-NEXT: ret i1 [[C5]]
; SIMP: exit:
; SIMP-NEXT: ret i1 false
;
@@ -43,7 +46,7 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
; NOSIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
; NOSIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
; NOSIMP: bb5:
-; NOSIMP-NEXT: [[C5:%.*]] = icmp uge i32 [[L4]], 100
+; NOSIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4]], 100
; NOSIMP-NEXT: ret i1 [[C5]]
; NOSIMP: exit:
; NOSIMP-NEXT: ret i1 false
@@ -69,11 +72,9 @@ bb4:
br i1 %c4, label %bb5, label %exit
bb5:
- %c5 = icmp uge i32 %l4, 100
+ %c5 = icmp sge i32 %l4, 100
ret i1 %c5
exit:
ret i1 false
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; COMMON: {{.*}}
>From 50c2e95b9a66395d5f6b62a99e67fec1a874f8f5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 20 Jul 2024 13:32:16 +0100
Subject: [PATCH 2/4] ConstraintElim: attempt to make estimation cheaper
---
.../Scalar/ConstraintElimination.cpp | 70 ++++++++++---------
1 file changed, 38 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 369a6daa4d970..2c12c4fafd84a 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1684,30 +1684,47 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
&EstimatedColumns](CmpInst::Predicate Pred, Value *A,
Value *B) {
- SmallVector<Value *> NewVars;
- auto R = Info.getConstraint(Pred, A, B, NewVars);
+ // What follows is a simplified dry-run of Info.getConstraint and addFact.
+ unsigned NumNewVars = 0;
+ bool IsSigned = false;
+
+ switch (Pred) {
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_EQ:
+ break;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE:
+ IsSigned = true;
+ break;
+ default:
+ return;
+ }
- // We offset it by 1 due to logic in addFact.
- unsigned NewEstimate =
- count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
+ SmallVector<ConditionTy, 4> Preconditions;
+ auto &Value2Index = Info.getValue2Index(IsSigned);
+ auto ADec = decompose(A->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned, Info.getDataLayout());
+ auto BDec = decompose(B->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned, Info.getDataLayout());
+ for (const auto &KV : concat<DecompEntry>(ADec.Vars, BDec.Vars)) {
+ if (!Value2Index.contains(KV.Variable))
+ ++NumNewVars;
+ }
- EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
- if (R.IsSigned)
+ if (IsSigned)
++EstimatedRowsA;
else
++EstimatedRowsB;
+
+ EstimatedColumns =
+ std::max(EstimatedColumns, NumNewVars + Value2Index.size() + 2);
};
UpdateEstimate(Pred, A, B);
- // What follows is a dry-run of transferToOtherSystem.
- auto IsKnownNonNegative = [&Info](Value *V) {
- return Info.doesHold(CmpInst::ICMP_SGE, V,
- ConstantInt::get(V->getType(), 0)) ||
- isKnownNonNegative(V, Info.getDataLayout(),
- MaxAnalysisRecursionDepth - 1);
- };
-
+ // What follows is a simplified dry-run of transferToOtherSystem.
if (!A->getType()->isIntegerTy())
return;
@@ -1716,31 +1733,20 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
break;
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
- if (IsKnownNonNegative(B)) {
- UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
- UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
- }
- break;
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_UGT:
- if (IsKnownNonNegative(A)) {
- UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
- UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
- }
+ UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
break;
case CmpInst::ICMP_SLT:
- if (IsKnownNonNegative(A))
- UpdateEstimate(CmpInst::ICMP_ULT, A, B);
+ UpdateEstimate(CmpInst::ICMP_ULT, A, B);
break;
case CmpInst::ICMP_SGT:
- if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
- UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
- if (IsKnownNonNegative(B))
- UpdateEstimate(CmpInst::ICMP_UGT, A, B);
+ UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+ UpdateEstimate(CmpInst::ICMP_UGT, A, B);
break;
case CmpInst::ICMP_SGE:
- if (IsKnownNonNegative(B))
- UpdateEstimate(CmpInst::ICMP_UGE, A, B);
+ UpdateEstimate(CmpInst::ICMP_UGE, A, B);
break;
}
}
>From 11b306fae6c365a152d8fb9b61846ca733ac87c1 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sun, 21 Jul 2024 14:38:05 +0100
Subject: [PATCH 3/4] ConstraintElim: avoid running decompose multiple times
---
.../Scalar/ConstraintElimination.cpp | 37 ++++++++++++-------
1 file changed, 23 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 2c12c4fafd84a..334fb8e5d6752 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -1678,7 +1679,7 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
/// Performs a dry run of AddFact, computing a conservative estimate of the
/// number of new variables introduced.
static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
- const ConstraintInfo &Info, unsigned &EstimatedRowsA,
+ ConstraintInfo &Info, unsigned &EstimatedRowsA,
unsigned &EstimatedRowsB,
unsigned &EstimatedColumns) {
auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
@@ -1704,12 +1705,24 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
SmallVector<ConditionTy, 4> Preconditions;
auto &Value2Index = Info.getValue2Index(IsSigned);
- auto ADec = decompose(A->stripPointerCastsSameRepresentation(),
- Preconditions, IsSigned, Info.getDataLayout());
- auto BDec = decompose(B->stripPointerCastsSameRepresentation(),
- Preconditions, IsSigned, Info.getDataLayout());
- for (const auto &KV : concat<DecompEntry>(ADec.Vars, BDec.Vars)) {
- if (!Value2Index.contains(KV.Variable))
+ Value *AStrip = A->stripPointerCastsSameRepresentation();
+ Value *BStrip = B->stripPointerCastsSameRepresentation();
+ SmallVector<DecompEntry> AVars, BVars;
+
+ if (!Value2Index.contains(AStrip)) {
+ AVars =
+ decompose(AStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars;
+ Value2Index.insert({AStrip, Value2Index.size() + 1});
+ }
+ if (!Value2Index.contains(BStrip)) {
+ BVars =
+ decompose(BStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars;
+ Value2Index.insert({BStrip, Value2Index.size() + 1});
+ }
+
+ for (const auto &KV : concat<DecompEntry>(AVars, BVars)) {
+ if (KV.Variable == AStrip || KV.Variable == BStrip ||
+ !Value2Index.contains(KV.Variable))
++NumNewVars;
}
@@ -1718,8 +1731,7 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
else
++EstimatedRowsB;
- EstimatedColumns =
- std::max(EstimatedColumns, NumNewVars + Value2Index.size() + 2);
+ EstimatedColumns = std::max(EstimatedColumns, NumNewVars + 2);
};
UpdateEstimate(Pred, A, B);
@@ -1756,16 +1768,13 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
static std::tuple<State, unsigned, unsigned>
dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) {
DT.updateDFSNumbers();
- SmallVector<Value *> FunctionArgs;
- for (Value &Arg : F.args())
- FunctionArgs.push_back(&Arg);
State S(DT, LI, SE);
- unsigned EstimatedColumns = FunctionArgs.size() + 1;
// EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to
// UnsignedCS.
unsigned EstimatedRowsA = 0, EstimatedRowsB = 1;
- ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
+ unsigned EstimatedColumns = 1;
+ ConstraintInfo Info(F.getDataLayout(), {});
// First, collect conditions implied by branches and blocks with their
// Dominator DFS in and out numbers.
>From f0d674c9f5aa601c0e5472f208ff96f3d855d9b6 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 22 Jul 2024 16:47:42 +0100
Subject: [PATCH 4/4] ConstraintElim: idea for optimizing dry-run further
---
.../Scalar/ConstraintElimination.cpp | 56 ++++++++++---------
1 file changed, 30 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 334fb8e5d6752..fdd463273edd1 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1682,13 +1682,11 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
ConstraintInfo &Info, unsigned &EstimatedRowsA,
unsigned &EstimatedRowsB,
unsigned &EstimatedColumns) {
+ // What follows is a simplified dry-run of Info.getConstraint and addFact.
auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
&EstimatedColumns](CmpInst::Predicate Pred, Value *A,
Value *B) {
- // What follows is a simplified dry-run of Info.getConstraint and addFact.
- unsigned NumNewVars = 0;
bool IsSigned = false;
-
switch (Pred) {
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_UGE:
@@ -1703,35 +1701,41 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
return;
}
- SmallVector<ConditionTy, 4> Preconditions;
- auto &Value2Index = Info.getValue2Index(IsSigned);
- Value *AStrip = A->stripPointerCastsSameRepresentation();
- Value *BStrip = B->stripPointerCastsSameRepresentation();
- SmallVector<DecompEntry> AVars, BVars;
-
- if (!Value2Index.contains(AStrip)) {
- AVars =
- decompose(AStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars;
- Value2Index.insert({AStrip, Value2Index.size() + 1});
- }
- if (!Value2Index.contains(BStrip)) {
- BVars =
- decompose(BStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars;
- Value2Index.insert({BStrip, Value2Index.size() + 1});
- }
+ // We abuse the Value2Index map by storing a map of Value -> size of
+ // decompose(Value).Vars. We do this to avoid wastefully calling decompose
+ // on chains we have already seen.
+ auto &DecompSizes = Info.getValue2Index(IsSigned);
+ const auto &DL = Info.getDataLayout();
+
+ auto GetDecompSize = [&DL, &DecompSizes, IsSigned](Value *V) {
+ if (!DecompSizes.contains(V)) {
+ SmallVector<ConditionTy, 4> Preconditions;
+ auto Vars = decompose(V, Preconditions, IsSigned, DL).Vars;
+ for (const auto &[Idx, KV] : enumerate(Vars))
+ // decompose matches the passed Value against some pattern, and
+ // recursively calls itself on the matched inner values, merging
+ // results into the final chain. We conservatively estimate that any
+ // sub-chain in the decompose chain will have the length of the chain
+ // minus the index: the results of two sub-chains could be merged,
+ // leading to a conservative estimate. This is however, not a problem
+ // in practice, as these conservative estimates are anyway less than
+ // the the size of the total chain, and std::max on the estimated
+ // columns will ignore these smaller estiamtes.
+ DecompSizes.insert({KV.Variable, Vars.size() - Idx});
+ DecompSizes.insert({V, Vars.size()});
+ }
+ return DecompSizes.at(V);
+ };
- for (const auto &KV : concat<DecompEntry>(AVars, BVars)) {
- if (KV.Variable == AStrip || KV.Variable == BStrip ||
- !Value2Index.contains(KV.Variable))
- ++NumNewVars;
- }
+ unsigned ASize = GetDecompSize(A->stripPointerCastsSameRepresentation());
+ unsigned BSize = GetDecompSize(B->stripPointerCastsSameRepresentation());
if (IsSigned)
++EstimatedRowsA;
else
++EstimatedRowsB;
- EstimatedColumns = std::max(EstimatedColumns, NumNewVars + 2);
+ EstimatedColumns = std::max(EstimatedColumns, ASize + BSize + 2);
};
UpdateEstimate(Pred, A, B);
@@ -1764,7 +1768,7 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
}
/// Performs a dry run of the transform, computing a conservative estimate of
-/// the total number of columns we need in the underlying storage.
+/// the total number of rows and columns we need in the underlying storage.
static std::tuple<State, unsigned, unsigned>
dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) {
DT.updateDFSNumbers();
More information about the llvm-commits
mailing list