[polly] r311553 - Add more statistics.
Michael Kruse via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 06:50:31 PDT 2017
Author: meinersbur
Date: Wed Aug 23 06:50:30 2017
New Revision: 311553
URL: http://llvm.org/viewvc/llvm-project?rev=311553&view=rev
Log:
Add more statistics.
Add statistics about
- Which optimizations are applied
- Number of loops in Scops at various stages
- Number of scalar/singleton writes at various stages representative
for scalar false dependencies
- Number of parallel loops
These will be useful to find regressions due to moving Polly further
down of LLVM's pass pipeline.
Differential Revision: https://reviews.llvm.org/D37049
Modified:
polly/trunk/include/polly/ScopInfo.h
polly/trunk/include/polly/Simplify.h
polly/trunk/include/polly/Support/SCEVAffinator.h
polly/trunk/lib/Analysis/PruneUnprofitable.cpp
polly/trunk/lib/Analysis/ScopInfo.cpp
polly/trunk/lib/CodeGen/CodeGeneration.cpp
polly/trunk/lib/CodeGen/IslAst.cpp
polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
polly/trunk/lib/Support/RegisterPasses.cpp
polly/trunk/lib/Transform/DeLICM.cpp
polly/trunk/lib/Transform/ForwardOpTree.cpp
polly/trunk/lib/Transform/ScheduleOptimizer.cpp
polly/trunk/lib/Transform/Simplify.cpp
Modified: polly/trunk/include/polly/ScopInfo.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/include/polly/ScopInfo.h (original)
+++ polly/trunk/include/polly/ScopInfo.h Wed Aug 23 06:50:30 2017
@@ -1936,6 +1936,10 @@ private:
/// Scop constructor; invoked from ScopBuilder::buildScop.
Scop(Region &R, ScalarEvolution &SE, LoopInfo &LI,
ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE);
+
+ /// Return the LoopInfo used for this Scop.
+ LoopInfo *getLI() const { return Affinator.getLI(); }
+
//@}
/// Initialize this ScopBuilder.
@@ -3018,6 +3022,25 @@ public:
/// Return whether @p Inst has a use outside of this SCoP.
bool isEscaping(Instruction *Inst);
+
+ struct ScopStatistics {
+ int NumAffineLoops = 0;
+ int NumBoxedLoops = 0;
+
+ int NumValueWrites = 0;
+ int NumValueWritesInLoops = 0;
+ int NumPHIWrites = 0;
+ int NumPHIWritesInLoops = 0;
+ int NumSingletonWrites = 0;
+ int NumSingletonWritesInLoops = 0;
+ };
+
+ /// Collect statistic about this SCoP.
+ ///
+ /// These are most commonly used for LLVM's static counters (Statistic.h) in
+ /// various places. If statistics are disabled, only zeros are returned to
+ /// avoid the overhead.
+ ScopStatistics getStatistics() const;
};
/// Print Scop scop to raw_ostream OS.
Modified: polly/trunk/include/polly/Simplify.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/Simplify.h?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/include/polly/Simplify.h (original)
+++ polly/trunk/include/polly/Simplify.h Wed Aug 23 06:50:30 2017
@@ -41,7 +41,16 @@ class ScopStmt;
/// The order in which implicit writes are executed relative to each other is
/// undefined.
llvm::SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt);
-llvm::Pass *createSimplifyPass();
+
+/// Create a Simplify pass
+///
+/// @param CallNo Disambiguates this instance for when there are multiple
+/// instances of this pass in the pass manager. It is used only to
+/// keep the statistics apart and has no influence on the
+/// simplification itself.
+///
+/// @return The Simplify pass.
+llvm::Pass *createSimplifyPass(int CallNo = 0);
} // namespace polly
namespace llvm {
Modified: polly/trunk/include/polly/Support/SCEVAffinator.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/Support/SCEVAffinator.h?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/include/polly/Support/SCEVAffinator.h (original)
+++ polly/trunk/include/polly/Support/SCEVAffinator.h Wed Aug 23 06:50:30 2017
@@ -73,6 +73,9 @@ public:
/// Check an <nsw> AddRec for the loop @p L is cached.
bool hasNSWAddRecForLoop(llvm::Loop *L) const;
+ /// Return the LoopInfo used by thi object.
+ llvm::LoopInfo *getLI() const { return &LI; }
+
private:
/// Key to identify cached expressions.
using CacheKey = std::pair<const llvm::SCEV *, llvm::BasicBlock *>;
Modified: polly/trunk/lib/Analysis/PruneUnprofitable.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/PruneUnprofitable.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/PruneUnprofitable.cpp (original)
+++ polly/trunk/lib/Analysis/PruneUnprofitable.cpp Wed Aug 23 06:50:30 2017
@@ -25,12 +25,36 @@ STATISTIC(ScopsProcessed,
"Number of SCoPs considered for unprofitability pruning");
STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be "
"optimized in a significant way");
+STATISTIC(ScopsSurvived, "Number of SCoPs after pruning");
+
+STATISTIC(NumPrunedLoops, "Number of pruned loops");
+STATISTIC(NumPrunedBoxedLoops, "Number of pruned boxed loops");
+STATISTIC(NumPrunedAffineLoops, "Number of pruned affine loops");
+
+STATISTIC(NumLoopsInScop, "Number of loops in scops after pruning");
+STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after pruning");
+STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after pruning");
class PruneUnprofitable : public ScopPass {
private:
PruneUnprofitable(const PruneUnprofitable &) = delete;
const PruneUnprofitable &operator=(const PruneUnprofitable &) = delete;
+ void updateStatistics(Scop &S, bool Pruned) {
+ auto ScopStats = S.getStatistics();
+ if (Pruned) {
+ ScopsPruned++;
+ NumPrunedLoops += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
+ NumPrunedBoxedLoops += ScopStats.NumBoxedLoops;
+ NumPrunedAffineLoops += ScopStats.NumAffineLoops;
+ } else {
+ ScopsSurvived++;
+ NumLoopsInScop += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
+ NumBoxedLoops += ScopStats.NumBoxedLoops;
+ NumAffineLoops += ScopStats.NumAffineLoops;
+ }
+ }
+
public:
static char ID;
explicit PruneUnprofitable() : ScopPass(ID) {}
@@ -52,8 +76,10 @@ public:
if (!S.isProfitable(true)) {
DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in "
"a significant way\n");
- ScopsPruned++;
S.invalidate(PROFITABLE, DebugLoc());
+ updateStatistics(S, true);
+ } else {
+ updateStatistics(S, false);
}
return false;
Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Wed Aug 23 06:50:30 2017
@@ -121,7 +121,11 @@ STATISTIC(AssumptionsInvariantLoad,
STATISTIC(AssumptionsDelinearization,
"Number of delinearization assumptions taken.");
+STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
STATISTIC(NumLoopsInScop, "Number of loops in scops");
+STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
+STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
+
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
@@ -131,6 +135,17 @@ STATISTIC(NumScopsDepthLarger,
"Number of scops with maximal loop depth 6 and larger");
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
+STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
+STATISTIC(
+ NumValueWritesInLoops,
+ "Number of scalar value writes nested in affine loops after ScopInfo");
+STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
+STATISTIC(NumPHIWritesInLoops,
+ "Number of scalar phi writes nested in affine loops after ScopInfo");
+STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
+STATISTIC(NumSingletonWritesInLoops,
+ "Number of singleton writes nested in affine loops after ScopInfo");
+
// The maximal number of basic sets we allow during domain construction to
// be created. More complex scops will result in very high compile time and
// are also unlikely to result in good code
@@ -5160,6 +5175,47 @@ bool Scop::isEscaping(Instruction *Inst)
return false;
}
+Scop::ScopStatistics Scop::getStatistics() const {
+ ScopStatistics Result;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
+
+ int NumTotalLoops = LoopStat.NumLoops;
+ Result.NumBoxedLoops = getBoxedLoops().size();
+ Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
+
+ for (const ScopStmt &Stmt : *this) {
+ isl::set Domain = Stmt.getDomain().intersect_params(getContext());
+ bool IsInLoop = Stmt.getNumIterators() >= 1;
+ for (MemoryAccess *MA : Stmt) {
+ if (!MA->isWrite())
+ continue;
+
+ if (MA->isLatestValueKind()) {
+ Result.NumValueWrites += 1;
+ if (IsInLoop)
+ Result.NumValueWritesInLoops += 1;
+ }
+
+ if (MA->isLatestAnyPHIKind()) {
+ Result.NumPHIWrites += 1;
+ if (IsInLoop)
+ Result.NumPHIWritesInLoops += 1;
+ }
+
+ isl::set AccSet =
+ MA->getAccessRelation().intersect_domain(Domain).range();
+ if (AccSet.is_singleton()) {
+ Result.NumSingletonWrites += 1;
+ if (IsInLoop)
+ Result.NumSingletonWritesInLoops += 1;
+ }
+ }
+ }
+#endif
+ return Result;
+}
+
raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
scop.print(OS, PollyPrintInstructions);
return OS;
@@ -5177,7 +5233,11 @@ void ScopInfoRegionPass::getAnalysisUsag
AU.setPreservesAll();
}
-void updateLoopCountStatistic(ScopDetection::LoopStats Stats) {
+void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
+ Scop::ScopStatistics ScopStats) {
+ assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
+
+ NumScops++;
NumLoopsInScop += Stats.NumLoops;
MaxNumLoopsInScop =
std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
@@ -5194,6 +5254,16 @@ void updateLoopCountStatistic(ScopDetect
NumScopsDepthFive++;
else
NumScopsDepthLarger++;
+
+ NumAffineLoops += ScopStats.NumAffineLoops;
+ NumBoxedLoops += ScopStats.NumBoxedLoops;
+
+ NumValueWrites += ScopStats.NumValueWrites;
+ NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
+ NumPHIWrites += ScopStats.NumPHIWrites;
+ NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
+ NumSingletonWrites += ScopStats.NumSingletonWrites;
+ NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
}
bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
@@ -5213,11 +5283,13 @@ bool ScopInfoRegionPass::runOnRegion(Reg
ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE);
S = SB.getScop(); // take ownership of scop object
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
if (S) {
ScopDetection::LoopStats Stats =
ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
- updateLoopCountStatistic(Stats);
+ updateLoopCountStatistic(Stats, S->getStatistics());
}
+#endif
return false;
}
@@ -5268,9 +5340,11 @@ void ScopInfo::recompute() {
std::unique_ptr<Scop> S = SB.getScop();
if (!S)
continue;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
ScopDetection::LoopStats Stats =
ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
- updateLoopCountStatistic(Stats);
+ updateLoopCountStatistic(Stats, S->getStatistics());
+#endif
bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
assert(Inserted && "Building Scop for the same region twice!");
(void)Inserted;
Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Wed Aug 23 06:50:30 2017
@@ -56,6 +56,13 @@ static cl::opt<bool, true>
cl::location(polly::PerfMonitoring), cl::init(false),
cl::ZeroOrMore, cl::cat(PollyCategory));
+STATISTIC(ScopsProcessed, "Number of SCoP processed");
+STATISTIC(CodegenedScops, "Number of successfully generated SCoPs");
+STATISTIC(CodegenedAffineLoops,
+ "Number of original affine loops in SCoPs that have been generated");
+STATISTIC(CodegenedBoxedLoops,
+ "Number of original boxed loops in SCoPs that have been generated");
+
namespace polly {
/// Mark a basic block unreachable.
///
@@ -162,6 +169,11 @@ static bool CodeGen(Scop &S, IslAstInfo
if (!AstRoot)
return false;
+ // Collect statistics. Do it before we modify the IR to avoid having it any
+ // influence on the result.
+ auto ScopStats = S.getStatistics();
+ ScopsProcessed++;
+
auto &DL = S.getFunction().getParent()->getDataLayout();
Region *R = &S.getRegion();
assert(!R->isTopLevelRegion() && "Top level regions are not supported");
@@ -249,6 +261,10 @@ static bool CodeGen(Scop &S, IslAstInfo
NodeBuilder.create(AstRoot);
NodeBuilder.finalize();
fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
+
+ CodegenedScops++;
+ CodegenedAffineLoops += ScopStats.NumAffineLoops;
+ CodegenedBoxedLoops += ScopStats.NumBoxedLoops;
}
Function *F = EnteringBB->getParent();
Modified: polly/trunk/lib/CodeGen/IslAst.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslAst.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslAst.cpp (original)
+++ polly/trunk/lib/CodeGen/IslAst.cpp Wed Aug 23 06:50:30 2017
@@ -78,6 +78,19 @@ static cl::opt<bool> DetectParallel("pol
cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
+STATISTIC(ScopsProcessed, "Number of SCoPs processed");
+STATISTIC(ScopsBeneficial, "Number of beneficial SCoPs");
+STATISTIC(BeneficialAffineLoops, "Number of beneficial affine loops");
+STATISTIC(BeneficialBoxedLoops, "Number of beneficial boxed loops");
+
+STATISTIC(NumForLoops, "Number of for-loops");
+STATISTIC(NumParallel, "Number of parallel for-loops");
+STATISTIC(NumInnermostParallel, "Number of innermost parallel for-loops");
+STATISTIC(NumOutermostParallel, "Number of outermost parallel for-loops");
+STATISTIC(NumReductionParallel, "Number of reduction-parallel for-loops");
+STATISTIC(NumExecutedInParallel, "Number of for-loops executed in parallel");
+STATISTIC(NumIfConditions, "Number of if-conditions");
+
namespace polly {
/// Temporary information used when building the ast.
struct AstBuildUserInfo {
@@ -401,6 +414,41 @@ static bool benefitsFromPolly(Scop &Scop
return true;
}
+/// Collect statistics for the syntax tree rooted at @p Ast.
+static void walkAstForStatistics(__isl_keep isl_ast_node *Ast) {
+ assert(Ast);
+ isl_ast_node_foreach_descendant_top_down(
+ Ast,
+ [](__isl_keep isl_ast_node *Node, void *User) -> isl_bool {
+ switch (isl_ast_node_get_type(Node)) {
+ case isl_ast_node_for:
+ NumForLoops++;
+ if (IslAstInfo::isParallel(Node))
+ NumParallel++;
+ if (IslAstInfo::isInnermostParallel(Node))
+ NumInnermostParallel++;
+ if (IslAstInfo::isOutermostParallel(Node))
+ NumOutermostParallel++;
+ if (IslAstInfo::isReductionParallel(Node))
+ NumReductionParallel++;
+ if (IslAstInfo::isExecutedInParallel(Node))
+ NumExecutedInParallel++;
+ break;
+
+ case isl_ast_node_if:
+ NumIfConditions++;
+ break;
+
+ default:
+ break;
+ }
+
+ // Continue traversing subtrees.
+ return isl_bool_true;
+ },
+ nullptr);
+}
+
IslAst::IslAst(Scop &Scop)
: S(Scop), Root(nullptr), RunCondition(nullptr),
Ctx(Scop.getSharedIslCtx()) {}
@@ -421,6 +469,11 @@ void IslAst::init(const Dependences &D)
if (!benefitsFromPolly(S, PerformParallelTest))
return;
+ auto ScopStats = S.getStatistics();
+ ScopsBeneficial++;
+ BeneficialAffineLoops += ScopStats.NumAffineLoops;
+ BeneficialBoxedLoops += ScopStats.NumBoxedLoops;
+
isl_ctx *Ctx = S.getIslCtx();
isl_options_set_ast_build_atomic_upper_bound(Ctx, true);
isl_options_set_ast_build_detect_min_max(Ctx, true);
@@ -454,6 +507,7 @@ void IslAst::init(const Dependences &D)
RunCondition = buildRunCondition(S, Build);
Root = isl_ast_build_node_from_schedule(Build, S.getScheduleTree().release());
+ walkAstForStatistics(Root);
isl_ast_build_free(Build);
}
@@ -692,6 +746,8 @@ bool IslAstInfoWrapperPass::runOnScop(Sc
if (Scop.isToBeSkipped())
return false;
+ ScopsProcessed++;
+
const Dependences &D =
getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Wed Aug 23 06:50:30 2017
@@ -53,6 +53,11 @@ using namespace llvm;
STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
+STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
+STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
+STATISTIC(VectorLoops, "Number of generated vector for-loops");
+STATISTIC(IfConditions, "Number of generated if-conditions");
+
static cl::opt<bool> PollyGenerateRTCPrint(
"polly-codegen-emit-rtc-print",
cl::desc("Emit code that prints the runtime check result dynamically."),
@@ -480,6 +485,8 @@ void IslNodeBuilder::createForVector(__i
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
+
+ VectorLoops++;
}
namespace {
@@ -571,6 +578,8 @@ void IslNodeBuilder::createForSequential
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
isl_id_free(IteratorID);
+
+ SequentialLoops++;
}
/// Remove the BBs contained in a (sub)function from the dominator tree.
@@ -720,6 +729,8 @@ void IslNodeBuilder::createForParallel(_
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
isl_id_free(IteratorID);
+
+ ParallelLoops++;
}
/// Return whether any of @p Node's statements contain partial accesses.
@@ -813,6 +824,8 @@ void IslNodeBuilder::createIf(__isl_take
Builder.SetInsertPoint(&MergeBB->front());
isl_ast_node_free(If);
+
+ IfConditions++;
}
__isl_give isl_id_to_ast_expr *
Modified: polly/trunk/lib/Support/RegisterPasses.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Support/RegisterPasses.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Support/RegisterPasses.cpp (original)
+++ polly/trunk/lib/Support/RegisterPasses.cpp Wed Aug 23 06:50:30 2017
@@ -328,13 +328,13 @@ void registerPollyPasses(llvm::legacy::P
PM.add(polly::createPolyhedralInfoPass());
if (EnableSimplify)
- PM.add(polly::createSimplifyPass());
+ PM.add(polly::createSimplifyPass(0));
if (EnableForwardOpTree)
PM.add(polly::createForwardOpTreePass());
if (EnableDeLICM)
PM.add(polly::createDeLICMPass());
if (EnableSimplify)
- PM.add(polly::createSimplifyPass());
+ PM.add(polly::createSimplifyPass(1));
if (ImportJScop)
PM.add(polly::createJSONImporterPass());
Modified: polly/trunk/lib/Transform/DeLICM.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/DeLICM.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/DeLICM.cpp (original)
+++ polly/trunk/lib/Transform/DeLICM.cpp Wed Aug 23 06:50:30 2017
@@ -61,6 +61,16 @@ STATISTIC(MappedPHIScalars, "Number of m
STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");
+STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
+STATISTIC(NumValueWritesInLoops,
+ "Number of scalar value writes nested in affine loops after DeLICM");
+STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
+STATISTIC(NumPHIWritesInLoops,
+ "Number of scalar phi writes nested in affine loops after DeLICM");
+STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
+STATISTIC(NumSingletonWritesInLoops,
+ "Number of singleton writes nested in affine loops after DeLICM");
+
isl::union_map computeReachingOverwrite(isl::union_map Schedule,
isl::union_map Writes,
bool InclPrevWrite,
@@ -1402,6 +1412,14 @@ public:
collapseToUnused(S);
+ auto ScopStats = S.getStatistics();
+ NumValueWrites += ScopStats.NumValueWrites;
+ NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
+ NumPHIWrites += ScopStats.NumPHIWrites;
+ NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
+ NumSingletonWrites += ScopStats.NumSingletonWrites;
+ NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
+
return false;
}
Modified: polly/trunk/lib/Transform/ForwardOpTree.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ForwardOpTree.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ForwardOpTree.cpp (original)
+++ polly/trunk/lib/Transform/ForwardOpTree.cpp Wed Aug 23 06:50:30 2017
@@ -56,6 +56,16 @@ STATISTIC(TotalModifiedStmts,
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
+STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
+STATISTIC(NumValueWritesInLoops,
+ "Number of scalar value writes nested in affine loops after OpTree");
+STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
+STATISTIC(NumPHIWritesInLoops,
+ "Number of scalar phi writes nested in affine loops after OpTree");
+STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
+STATISTIC(NumSingletonWritesInLoops,
+ "Number of singleton writes nested in affine loops after OpTree");
+
namespace {
/// The state of whether an operand tree was/can be forwarded.
@@ -844,6 +854,15 @@ public:
DEBUG(dbgs() << "\nFinal Scop:\n");
DEBUG(dbgs() << S);
+ // Update statistics
+ auto ScopStats = S.getStatistics();
+ NumValueWrites += ScopStats.NumValueWrites;
+ NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
+ NumPHIWrites += ScopStats.NumPHIWrites;
+ NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
+ NumSingletonWrites += ScopStats.NumSingletonWrites;
+ NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
+
return false;
}
Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Wed Aug 23 06:50:30 2017
@@ -237,6 +237,33 @@ static cl::opt<bool> OptimizedScops(
"transformations is applied on the schedule tree"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+STATISTIC(ScopsProcessed, "Number of scops processed");
+STATISTIC(ScopsRescheduled, "Number of scops rescheduled");
+STATISTIC(ScopsOptimized, "Number of scops optimized");
+
+STATISTIC(NumAffineLoopsOptimized, "Number of affine loops optimized");
+STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized");
+
+#define THREE_STATISTICS(VARNAME, DESC) \
+ static llvm::Statistic VARNAME[3] = { \
+ {DEBUG_TYPE, #VARNAME "0", DESC " (original)", {0}, false}, \
+ {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)", {0}, false}, \
+ {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)", {0}, false}}
+
+THREE_STATISTICS(NumBands, "Number of bands");
+THREE_STATISTICS(NumBandMembers, "Number of band members");
+THREE_STATISTICS(NumCoincident, "Number of coincident band members");
+THREE_STATISTICS(NumPermutable, "Number of permutable bands");
+THREE_STATISTICS(NumFilters, "Number of filter nodes");
+THREE_STATISTICS(NumExtension, "Number of extension nodes");
+
+STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied");
+STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied");
+STATISTIC(RegisterTileOpts, "Number of register tiling applied");
+STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied");
+STATISTIC(MatMulOpts,
+ "Number of matrix multiplication patterns detected and optimized");
+
/// Create an isl::union_set, which describes the isolate option based on
/// IsolateDomain.
///
@@ -368,6 +395,7 @@ isl::schedule_node ScheduleTreeOptimizer
if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
Node = Node.parent();
auto LoopMarker = isl::id::alloc(Node.get_ctx(), "SIMD", nullptr);
+ PrevectOpts++;
return Node.insert_mark(LoopMarker);
}
@@ -456,17 +484,23 @@ bool ScheduleTreeOptimizer::isTileableBa
__isl_give isl::schedule_node
ScheduleTreeOptimizer::standardBandOpts(isl::schedule_node Node, void *User) {
- if (FirstLevelTiling)
+ if (FirstLevelTiling) {
Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
FirstLevelDefaultTileSize);
+ FirstLevelTileOpts++;
+ }
- if (SecondLevelTiling)
+ if (SecondLevelTiling) {
Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
SecondLevelDefaultTileSize);
+ SecondLevelTileOpts++;
+ }
- if (RegisterTiling)
+ if (RegisterTiling) {
Node =
applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
+ RegisterTileOpts++;
+ }
if (PollyVectorizerChoice == VECTORIZER_NONE)
return Node;
@@ -1235,6 +1269,7 @@ ScheduleTreeOptimizer::optimizeBand(__is
isMatrMultPattern(isl::manage(isl_schedule_node_copy(Node)), OAI->D,
MMI)) {
DEBUG(dbgs() << "The matrix multiplication pattern was detected\n");
+ MatMulOpts++;
return optimizeMatMulPattern(isl::manage(Node), OAI->TTI, MMI).release();
}
@@ -1308,6 +1343,52 @@ private:
char IslScheduleOptimizer::ID = 0;
+/// Collect statistics for the schedule tree.
+///
+/// @param Schedule The schedule tree to analyze. If not a schedule tree it is
+/// ignored.
+/// @param Version The version of the schedule tree that is analyzed.
+/// 0 for the original schedule tree before any transformation.
+/// 1 for the schedule tree after isl's rescheduling.
+/// 2 for the schedule tree after optimizations are applied
+/// (tiling, pattern matching)
+static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
+ auto Root = Schedule.get_root();
+ if (!Root)
+ return;
+
+ Root.foreach_ancestor_top_down([Version](
+ isl::schedule_node Node) -> isl::stat {
+ switch (isl_schedule_node_get_type(Node.get())) {
+ case isl_schedule_node_band: {
+ NumBands[Version]++;
+ if (isl_schedule_node_band_get_permutable(Node.get()) == isl_bool_true)
+ NumPermutable[Version]++;
+
+ int CountMembers = isl_schedule_node_band_n_member(Node.get());
+ NumBandMembers[Version] += CountMembers;
+ for (int i = 0; i < CountMembers; i += 1) {
+ if (Node.band_member_get_coincident(i))
+ NumCoincident[Version]++;
+ }
+ } break;
+
+ case isl_schedule_node_filter:
+ NumFilters[Version]++;
+ break;
+
+ case isl_schedule_node_extension:
+ NumExtension[Version]++;
+ break;
+
+ default:
+ break;
+ }
+
+ return isl::stat::ok;
+ });
+}
+
bool IslScheduleOptimizer::runOnScop(Scop &S) {
// Skip SCoPs in case they're already optimised by PPCGCodeGeneration
@@ -1352,6 +1433,9 @@ bool IslScheduleOptimizer::runOnScop(Sco
if (!Domain)
return false;
+ ScopsProcessed++;
+ walkScheduleTreeForStatistics(S.getScheduleTree(), 0);
+
isl::union_map Validity = give(D.getDependences(ValidityKinds));
isl::union_map Proximity = give(D.getDependences(ProximityKinds));
@@ -1432,11 +1516,15 @@ bool IslScheduleOptimizer::runOnScop(Sco
auto Schedule = SC.compute_schedule();
isl_options_set_on_error(Ctx, OnErrorStatus);
+ walkScheduleTreeForStatistics(Schedule, 1);
+
// In cases the scheduler is not able to optimize the code, we just do not
// touch the schedule.
if (!Schedule)
return false;
+ ScopsRescheduled++;
+
DEBUG({
auto *P = isl_printer_to_str(Ctx);
P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
@@ -1451,10 +1539,16 @@ bool IslScheduleOptimizer::runOnScop(Sco
auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)};
auto NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
+ walkScheduleTreeForStatistics(NewSchedule, 1);
if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule))
return false;
+ auto ScopStats = S.getStatistics();
+ ScopsOptimized++;
+ NumAffineLoopsOptimized += ScopStats.NumAffineLoops;
+ NumBoxedLoopsOptimized += ScopStats.NumBoxedLoops;
+
S.setScheduleTree(NewSchedule.release());
S.markAsOptimized();
Modified: polly/trunk/lib/Transform/Simplify.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/Simplify.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/Simplify.cpp (original)
+++ polly/trunk/lib/Transform/Simplify.cpp Wed Aug 23 06:50:30 2017
@@ -27,25 +27,44 @@ using namespace polly;
namespace {
+#define TWO_STATISTICS(VARNAME, DESC) \
+ static llvm::Statistic VARNAME[2] = { \
+ {DEBUG_TYPE, #VARNAME "0", DESC " (first)", {0}, false}, \
+ {DEBUG_TYPE, #VARNAME "1", DESC " (second)", {0}, false}}
+
/// Number of max disjuncts we allow in removeOverwrites(). This is to avoid
/// that the analysis of accesses in a statement is becoming too complex. Chosen
/// to be relatively small because all the common cases should access only few
/// array elements per statement.
static int const SimplifyMaxDisjuncts = 4;
-STATISTIC(ScopsProcessed, "Number of SCoPs processed");
-STATISTIC(ScopsModified, "Number of SCoPs simplified");
+TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed");
+TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified");
-STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes");
-STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another");
-STATISTIC(TotalRedundantWritesRemoved,
- "Number of writes of same value removed in any SCoP");
-STATISTIC(TotalEmptyPartialAccessesRemoved,
- "Number of empty partial accesses removed");
-STATISTIC(TotalDeadAccessesRemoved, "Number of dead accesses removed");
-STATISTIC(TotalDeadInstructionsRemoved,
- "Number of unused instructions removed");
-STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP");
+TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes");
+TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another");
+TWO_STATISTICS(TotalRedundantWritesRemoved,
+ "Number of writes of same value removed in any SCoP");
+TWO_STATISTICS(TotalEmptyPartialAccessesRemoved,
+ "Number of empty partial accesses removed");
+TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed");
+TWO_STATISTICS(TotalDeadInstructionsRemoved,
+ "Number of unused instructions removed");
+TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP");
+
+TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify");
+TWO_STATISTICS(
+ NumValueWritesInLoops,
+ "Number of scalar value writes nested in affine loops after Simplify");
+TWO_STATISTICS(NumPHIWrites,
+ "Number of scalar phi writes after the first simplification");
+TWO_STATISTICS(
+ NumPHIWritesInLoops,
+ "Number of scalar phi writes nested in affine loops after Simplify");
+TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify");
+TWO_STATISTICS(
+ NumSingletonWritesInLoops,
+ "Number of singleton writes nested in affine loops after Simplify");
static bool isImplicitRead(MemoryAccess *MA) {
return MA->isRead() && MA->isOriginalScalarKind();
@@ -100,6 +119,10 @@ static isl::union_map underapproximatedA
class Simplify : public ScopPass {
private:
+ /// The invocation id (if there are multiple instances in the pass manager's
+ /// pipeline) to determine which statistics to update.
+ int CallNo;
+
/// The last/current SCoP that is/has been processed.
Scop *S;
@@ -176,7 +199,7 @@ private:
Stmt.removeSingleMemoryAccess(MA);
OverwritesRemoved++;
- TotalOverwritesRemoved++;
+ TotalOverwritesRemoved[CallNo]++;
}
// Unconditional writes overwrite other values.
@@ -315,7 +338,7 @@ private:
// We removed MA, OtherMA takes its role.
MA = OtherMA;
- TotalWritesCoalesced++;
+ TotalWritesCoalesced[CallNo]++;
WritesCoalesced++;
// Don't look for more candidates.
@@ -437,7 +460,7 @@ private:
Stmt.removeSingleMemoryAccess(MA);
RedundantWritesRemoved++;
- TotalRedundantWritesRemoved++;
+ TotalRedundantWritesRemoved[CallNo]++;
}
}
}
@@ -476,7 +499,7 @@ private:
StmtsRemoved = NumStmtsBefore - S->getSize();
DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
<< ") statements\n");
- TotalStmtsRemoved += StmtsRemoved;
+ TotalStmtsRemoved[CallNo] += StmtsRemoved;
}
/// Remove accesses that have an empty domain.
@@ -501,7 +524,7 @@ private:
for (MemoryAccess *MA : DeferredRemove) {
Stmt.removeSingleMemoryAccess(MA);
EmptyPartialAccessesRemoved++;
- TotalEmptyPartialAccessesRemoved++;
+ TotalEmptyPartialAccessesRemoved[CallNo]++;
}
}
}
@@ -530,7 +553,7 @@ private:
Stmt->removeSingleMemoryAccess(MA);
DeadAccessesRemoved++;
- TotalDeadAccessesRemoved++;
+ TotalDeadAccessesRemoved[CallNo]++;
}
// Remove all non-reachable instructions.
@@ -548,7 +571,7 @@ private:
DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
dbgs() << " because it is not used\n");
DeadInstructionsRemoved++;
- TotalDeadInstructionsRemoved++;
+ TotalDeadInstructionsRemoved[CallNo]++;
continue;
}
@@ -595,7 +618,7 @@ private:
public:
static char ID;
- explicit Simplify() : ScopPass(ID) {}
+ explicit Simplify(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredTransitive<ScopInfoRegionPass>();
@@ -610,7 +633,7 @@ public:
// Prepare processing of this SCoP.
this->S = &S;
- ScopsProcessed++;
+ ScopsProcessed[CallNo]++;
DEBUG(dbgs() << "Removing partial writes that never happen...\n");
removeEmptyPartialAccesses();
@@ -632,10 +655,18 @@ public:
removeUnnecessaryStmts();
if (isModified())
- ScopsModified++;
+ ScopsModified[CallNo]++;
DEBUG(dbgs() << "\nFinal Scop:\n");
DEBUG(dbgs() << S);
+ auto ScopStats = S.getStatistics();
+ NumValueWrites[CallNo] += ScopStats.NumValueWrites;
+ NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
+ NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
+ NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
+ NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
+ NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;
+
return false;
}
@@ -688,7 +719,7 @@ SmallVector<MemoryAccess *, 32> getAcces
}
} // namespace polly
-Pass *polly::createSimplifyPass() { return new Simplify(); }
+Pass *polly::createSimplifyPass(int CallNo) { return new Simplify(CallNo); }
INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false,
false)
More information about the llvm-commits
mailing list