[llvm] r295704 - The patch introduces new way of narrowing complex (>UINT16 variants) solutions.
Evgeny Stupachenko via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 20 23:34:41 PST 2017
Author: evstupac
Date: Tue Feb 21 01:34:40 2017
New Revision: 295704
URL: http://llvm.org/viewvc/llvm-project?rev=295704&view=rev
Log:
The patch introduces new way of narrowing complex (>UINT16 variants) solutions.
The new method introduced under "-lsr-exp-narrow" option (currenlty set to true).
Summary:
The method is based on registers number mathematical expectation and should be
generally closer to optimal solution.
Please see details in comments to
"LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas()" function
(in lib/Transforms/Scalar/LoopStrengthReduce.cpp).
Reviewers: qcolombet
Differential Revision: http://reviews.llvm.org/D29862
From: Evgeny Stupachenko <evstupac at gmail.com>
Modified:
llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=295704&r1=295703&r2=295704&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Feb 21 01:34:40 2017
@@ -134,6 +134,12 @@ static cl::opt<bool> InsnsCost(
"lsr-insns-cost", cl::Hidden, cl::init(false),
cl::desc("Add instruction count to a LSR cost model"));
+// Flag to choose how to narrow complex lsr solution
+static cl::opt<bool> LSRExpNarrow(
+ "lsr-exp-narrow", cl::Hidden, cl::init(true),
+ cl::desc("Narrow LSR complex solution using"
+ " expectation of registers number"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1095,6 +1101,7 @@ public:
}
bool HasFormulaWithSameRegs(const Formula &F) const;
+ float getNotSelectedProbability(const SCEV *Reg) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
@@ -1373,6 +1380,15 @@ bool LSRUse::HasFormulaWithSameRegs(cons
return Uniquifier.count(Key);
}
+/// The function returns a probability of selecting formula without Reg.
+float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
+ unsigned FNum = 0;
+ for (const Formula &F : Formulae)
+ if (F.referencesReg(Reg))
+ FNum++;
+ return ((float)(Formulae.size() - FNum)) / Formulae.size();
+}
+
/// If the given formula has not yet been inserted, add it to the list, and
/// return true. Return false otherwise. The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
@@ -1846,6 +1862,7 @@ class LSRInstance {
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByDeletingCostlyFormulas();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
@@ -4247,6 +4264,144 @@ void LSRInstance::NarrowSearchSpaceByRef
}
}
+/// The function delete formulas with high registers number expectation.
+/// Assuming we don't know the value of each formula (already delete
+/// all inefficient), generate probability of not selecting for each
+/// register.
+/// For example,
+/// Use1:
+/// reg(a) + reg({0,+,1})
+/// reg(a) + reg({-1,+,1}) + 1
+/// reg({a,+,1})
+/// Use2:
+/// reg(b) + reg({0,+,1})
+/// reg(b) + reg({-1,+,1}) + 1
+/// reg({b,+,1})
+/// Use3:
+/// reg(c) + reg(b) + reg({0,+,1})
+/// reg(c) + reg({b,+,1})
+///
+/// Probability of not selecting
+/// Use1 Use2 Use3
+/// reg(a) (1/3) * 1 * 1
+/// reg(b) 1 * (1/3) * (1/2)
+/// reg({0,+,1}) (2/3) * (2/3) * (1/2)
+/// reg({-1,+,1}) (2/3) * (2/3) * 1
+/// reg({a,+,1}) (2/3) * 1 * 1
+/// reg({b,+,1}) 1 * (2/3) * (2/3)
+/// reg(c) 1 * 1 * 0
+///
+/// Now count registers number mathematical expectation for each formula:
+/// Note that for each use we exclude probability if not selecting for the use.
+/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
+/// probabilty 1/3 of not selecting for Use1).
+/// Use1:
+/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
+/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
+/// reg({a,+,1}) 1
+/// Use2:
+/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
+/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
+/// reg({b,+,1}) 2/3
+/// Use3:
+/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
+/// reg(c) + reg({b,+,1}) 1 + 2/3
+
+void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+
+ // Set of Regs wich will be 100% used in final solution.
+ // Used in each formula of a solution (in example above this is reg(c)).
+ // We can skip them in calculations.
+ SmallPtrSet<const SCEV *, 4> UniqRegs;
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ // Map each register to probability of not selecting
+ DenseMap <const SCEV *, float> RegNumMap;
+ for (const SCEV *Reg : RegUses) {
+ if (UniqRegs.count(Reg))
+ continue;
+ float PNotSel = 1;
+ for (const LSRUse &LU : Uses) {
+ if (!LU.Regs.count(Reg))
+ continue;
+ float P = LU.getNotSelectedProbability(Reg);
+ if (P != 0.0)
+ PNotSel *= P;
+ else
+ UniqRegs.insert(Reg);
+ }
+ RegNumMap.insert(std::make_pair(Reg, PNotSel));
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n");
+
+ // Delete formulas where registers number expectation is high.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ // If nothing to delete - continue.
+ if (LU.Formulae.size() < 2)
+ continue;
+ // This is temporary solution to test performance. Float should be
+ // replaced with round independent type (based on integers) to avoid
+ // different results for different target builds.
+ float FMinRegNum = LU.Formulae[0].getNumRegs();
+ float FMinARegNum = LU.Formulae[0].getNumRegs();
+ size_t MinIdx = 0;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ float FRegNum = 0;
+ float FARegNum = 0;
+ for (const SCEV *BaseReg : F.BaseRegs) {
+ if (UniqRegs.count(BaseReg))
+ continue;
+ FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
+ if (isa<SCEVAddRecExpr>(BaseReg))
+ FARegNum +=
+ RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
+ }
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (!UniqRegs.count(ScaledReg)) {
+ FRegNum +=
+ RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
+ if (isa<SCEVAddRecExpr>(ScaledReg))
+ FARegNum +=
+ RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
+ }
+ }
+ if (FMinRegNum > FRegNum ||
+ (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
+ FMinRegNum = FRegNum;
+ FMinARegNum = FARegNum;
+ MinIdx = i;
+ }
+ }
+ DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
+ dbgs() << " with min reg num " << FMinRegNum << '\n');
+ if (MinIdx != 0)
+ std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
+ while (LU.Formulae.size() != 1) {
+ DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
+ dbgs() << '\n');
+ LU.Formulae.pop_back();
+ }
+ LU.RecomputeRegs(LUIdx, RegUses);
+ assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
+ Formula &F = LU.Formulae[0];
+ DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
+ // When we choose the formula, the regs become unique.
+ UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ UniqRegs.insert(F.ScaledReg);
+ }
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+}
+
+
/// Pick a register which seems likely to be profitable, and then in any use
/// which has any reference to that register, delete all formulae which do not
/// reference that register.
@@ -4319,7 +4474,10 @@ void LSRInstance::NarrowSearchSpaceUsing
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
- NarrowSearchSpaceByPickingWinnerRegs();
+ if (LSRExpNarrow)
+ NarrowSearchSpaceByDeletingCostlyFormulas();
+ else
+ NarrowSearchSpaceByPickingWinnerRegs();
}
/// This is the recursive solver.
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll?rev=295704&r1=295703&r2=295704&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll Tue Feb 21 01:34:40 2017
@@ -14,7 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8
; current LSR cost model.
; CHECK-NOT: = ptrtoint i8* undef to i64
; CHECK: .lr.ph
-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1
+; CHECK: [[TMP:%[^ ]+]] = add i64 %4, 1
; CHECK: sub i64 [[TMP]], %tmp6
; CHECK: ret void
define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll?rev=295704&r1=295703&r2=295704&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll Tue Feb 21 01:34:40 2017
@@ -139,7 +139,7 @@ for.end:
; Consequently, we should *not* form any chains.
;
; A9: foldedidx:
-; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
+; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #403]
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
entry:
br label %for.body
More information about the llvm-commits
mailing list