[polly] r311553 - Add more statistics.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 23 06:50:31 PDT 2017


Author: meinersbur
Date: Wed Aug 23 06:50:30 2017
New Revision: 311553

URL: http://llvm.org/viewvc/llvm-project?rev=311553&view=rev
Log:
Add more statistics.

Add statistics about
- Which optimizations are applied
- Number of loops in Scops at various stages
- Number of scalar/singleton writes at various stages representative
  for scalar false dependencies
- Number of parallel loops

These will be useful to find regressions due to moving Polly further
down of LLVM's pass pipeline.

Differential Revision: https://reviews.llvm.org/D37049

Modified:
    polly/trunk/include/polly/ScopInfo.h
    polly/trunk/include/polly/Simplify.h
    polly/trunk/include/polly/Support/SCEVAffinator.h
    polly/trunk/lib/Analysis/PruneUnprofitable.cpp
    polly/trunk/lib/Analysis/ScopInfo.cpp
    polly/trunk/lib/CodeGen/CodeGeneration.cpp
    polly/trunk/lib/CodeGen/IslAst.cpp
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
    polly/trunk/lib/Support/RegisterPasses.cpp
    polly/trunk/lib/Transform/DeLICM.cpp
    polly/trunk/lib/Transform/ForwardOpTree.cpp
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp
    polly/trunk/lib/Transform/Simplify.cpp

Modified: polly/trunk/include/polly/ScopInfo.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/include/polly/ScopInfo.h (original)
+++ polly/trunk/include/polly/ScopInfo.h Wed Aug 23 06:50:30 2017
@@ -1936,6 +1936,10 @@ private:
   /// Scop constructor; invoked from ScopBuilder::buildScop.
   Scop(Region &R, ScalarEvolution &SE, LoopInfo &LI,
        ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE);
+
+  /// Return the LoopInfo used for this Scop.
+  LoopInfo *getLI() const { return Affinator.getLI(); }
+
   //@}
 
   /// Initialize this ScopBuilder.
@@ -3018,6 +3022,25 @@ public:
 
   /// Return whether @p Inst has a use outside of this SCoP.
   bool isEscaping(Instruction *Inst);
+
+  struct ScopStatistics {
+    int NumAffineLoops = 0;
+    int NumBoxedLoops = 0;
+
+    int NumValueWrites = 0;
+    int NumValueWritesInLoops = 0;
+    int NumPHIWrites = 0;
+    int NumPHIWritesInLoops = 0;
+    int NumSingletonWrites = 0;
+    int NumSingletonWritesInLoops = 0;
+  };
+
+  /// Collect statistic about this SCoP.
+  ///
+  /// These are most commonly used for LLVM's static counters (Statistic.h) in
+  /// various places. If statistics are disabled, only zeros are returned to
+  /// avoid the overhead.
+  ScopStatistics getStatistics() const;
 };
 
 /// Print Scop scop to raw_ostream OS.

Modified: polly/trunk/include/polly/Simplify.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/Simplify.h?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/include/polly/Simplify.h (original)
+++ polly/trunk/include/polly/Simplify.h Wed Aug 23 06:50:30 2017
@@ -41,7 +41,16 @@ class ScopStmt;
 ///   The order in which implicit writes are executed relative to each other is
 ///   undefined.
 llvm::SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt);
-llvm::Pass *createSimplifyPass();
+
+/// Create a Simplify pass
+///
+/// @param CallNo Disambiguates this instance for when there are multiple
+///               instances of this pass in the pass manager. It is used only to
+///               keep the statistics apart and has no influence on the
+///               simplification itself.
+///
+/// @return The Simplify pass.
+llvm::Pass *createSimplifyPass(int CallNo = 0);
 } // namespace polly
 
 namespace llvm {

Modified: polly/trunk/include/polly/Support/SCEVAffinator.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/Support/SCEVAffinator.h?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/include/polly/Support/SCEVAffinator.h (original)
+++ polly/trunk/include/polly/Support/SCEVAffinator.h Wed Aug 23 06:50:30 2017
@@ -73,6 +73,9 @@ public:
   /// Check an <nsw> AddRec for the loop @p L is cached.
   bool hasNSWAddRecForLoop(llvm::Loop *L) const;
 
+  /// Return the LoopInfo used by thi object.
+  llvm::LoopInfo *getLI() const { return &LI; }
+
 private:
   /// Key to identify cached expressions.
   using CacheKey = std::pair<const llvm::SCEV *, llvm::BasicBlock *>;

Modified: polly/trunk/lib/Analysis/PruneUnprofitable.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/PruneUnprofitable.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/PruneUnprofitable.cpp (original)
+++ polly/trunk/lib/Analysis/PruneUnprofitable.cpp Wed Aug 23 06:50:30 2017
@@ -25,12 +25,36 @@ STATISTIC(ScopsProcessed,
           "Number of SCoPs considered for unprofitability pruning");
 STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be "
                        "optimized in a significant way");
+STATISTIC(ScopsSurvived, "Number of SCoPs after pruning");
+
+STATISTIC(NumPrunedLoops, "Number of pruned loops");
+STATISTIC(NumPrunedBoxedLoops, "Number of pruned boxed loops");
+STATISTIC(NumPrunedAffineLoops, "Number of pruned affine loops");
+
+STATISTIC(NumLoopsInScop, "Number of loops in scops after pruning");
+STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after pruning");
+STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after pruning");
 
 class PruneUnprofitable : public ScopPass {
 private:
   PruneUnprofitable(const PruneUnprofitable &) = delete;
   const PruneUnprofitable &operator=(const PruneUnprofitable &) = delete;
 
+  void updateStatistics(Scop &S, bool Pruned) {
+    auto ScopStats = S.getStatistics();
+    if (Pruned) {
+      ScopsPruned++;
+      NumPrunedLoops += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
+      NumPrunedBoxedLoops += ScopStats.NumBoxedLoops;
+      NumPrunedAffineLoops += ScopStats.NumAffineLoops;
+    } else {
+      ScopsSurvived++;
+      NumLoopsInScop += ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops;
+      NumBoxedLoops += ScopStats.NumBoxedLoops;
+      NumAffineLoops += ScopStats.NumAffineLoops;
+    }
+  }
+
 public:
   static char ID;
   explicit PruneUnprofitable() : ScopPass(ID) {}
@@ -52,8 +76,10 @@ public:
     if (!S.isProfitable(true)) {
       DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in "
                       "a significant way\n");
-      ScopsPruned++;
       S.invalidate(PROFITABLE, DebugLoc());
+      updateStatistics(S, true);
+    } else {
+      updateStatistics(S, false);
     }
 
     return false;

Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Wed Aug 23 06:50:30 2017
@@ -121,7 +121,11 @@ STATISTIC(AssumptionsInvariantLoad,
 STATISTIC(AssumptionsDelinearization,
           "Number of delinearization assumptions taken.");
 
+STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo");
 STATISTIC(NumLoopsInScop, "Number of loops in scops");
+STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo");
+STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo");
+
 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
@@ -131,6 +135,17 @@ STATISTIC(NumScopsDepthLarger,
           "Number of scops with maximal loop depth 6 and larger");
 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
 
+STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo");
+STATISTIC(
+    NumValueWritesInLoops,
+    "Number of scalar value writes nested in affine loops after ScopInfo");
+STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo");
+STATISTIC(NumPHIWritesInLoops,
+          "Number of scalar phi writes nested in affine loops after ScopInfo");
+STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo");
+STATISTIC(NumSingletonWritesInLoops,
+          "Number of singleton writes nested in affine loops after ScopInfo");
+
 // The maximal number of basic sets we allow during domain construction to
 // be created. More complex scops will result in very high compile time and
 // are also unlikely to result in good code
@@ -5160,6 +5175,47 @@ bool Scop::isEscaping(Instruction *Inst)
   return false;
 }
 
+Scop::ScopStatistics Scop::getStatistics() const {
+  ScopStatistics Result;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+  auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0);
+
+  int NumTotalLoops = LoopStat.NumLoops;
+  Result.NumBoxedLoops = getBoxedLoops().size();
+  Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops;
+
+  for (const ScopStmt &Stmt : *this) {
+    isl::set Domain = Stmt.getDomain().intersect_params(getContext());
+    bool IsInLoop = Stmt.getNumIterators() >= 1;
+    for (MemoryAccess *MA : Stmt) {
+      if (!MA->isWrite())
+        continue;
+
+      if (MA->isLatestValueKind()) {
+        Result.NumValueWrites += 1;
+        if (IsInLoop)
+          Result.NumValueWritesInLoops += 1;
+      }
+
+      if (MA->isLatestAnyPHIKind()) {
+        Result.NumPHIWrites += 1;
+        if (IsInLoop)
+          Result.NumPHIWritesInLoops += 1;
+      }
+
+      isl::set AccSet =
+          MA->getAccessRelation().intersect_domain(Domain).range();
+      if (AccSet.is_singleton()) {
+        Result.NumSingletonWrites += 1;
+        if (IsInLoop)
+          Result.NumSingletonWritesInLoops += 1;
+      }
+    }
+  }
+#endif
+  return Result;
+}
+
 raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) {
   scop.print(OS, PollyPrintInstructions);
   return OS;
@@ -5177,7 +5233,11 @@ void ScopInfoRegionPass::getAnalysisUsag
   AU.setPreservesAll();
 }
 
-void updateLoopCountStatistic(ScopDetection::LoopStats Stats) {
+void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
+                              Scop::ScopStatistics ScopStats) {
+  assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops);
+
+  NumScops++;
   NumLoopsInScop += Stats.NumLoops;
   MaxNumLoopsInScop =
       std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
@@ -5194,6 +5254,16 @@ void updateLoopCountStatistic(ScopDetect
     NumScopsDepthFive++;
   else
     NumScopsDepthLarger++;
+
+  NumAffineLoops += ScopStats.NumAffineLoops;
+  NumBoxedLoops += ScopStats.NumBoxedLoops;
+
+  NumValueWrites += ScopStats.NumValueWrites;
+  NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
+  NumPHIWrites += ScopStats.NumPHIWrites;
+  NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
+  NumSingletonWrites += ScopStats.NumSingletonWrites;
+  NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
 }
 
 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
@@ -5213,11 +5283,13 @@ bool ScopInfoRegionPass::runOnRegion(Reg
   ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE);
   S = SB.getScop(); // take ownership of scop object
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
   if (S) {
     ScopDetection::LoopStats Stats =
         ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
-    updateLoopCountStatistic(Stats);
+    updateLoopCountStatistic(Stats, S->getStatistics());
   }
+#endif
 
   return false;
 }
@@ -5268,9 +5340,11 @@ void ScopInfo::recompute() {
     std::unique_ptr<Scop> S = SB.getScop();
     if (!S)
       continue;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
     ScopDetection::LoopStats Stats =
         ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
-    updateLoopCountStatistic(Stats);
+    updateLoopCountStatistic(Stats, S->getStatistics());
+#endif
     bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second;
     assert(Inserted && "Building Scop for the same region twice!");
     (void)Inserted;

Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Wed Aug 23 06:50:30 2017
@@ -56,6 +56,13 @@ static cl::opt<bool, true>
                     cl::location(polly::PerfMonitoring), cl::init(false),
                     cl::ZeroOrMore, cl::cat(PollyCategory));
 
+STATISTIC(ScopsProcessed, "Number of SCoP processed");
+STATISTIC(CodegenedScops, "Number of successfully generated SCoPs");
+STATISTIC(CodegenedAffineLoops,
+          "Number of original affine loops in SCoPs that have been generated");
+STATISTIC(CodegenedBoxedLoops,
+          "Number of original boxed loops in SCoPs that have been generated");
+
 namespace polly {
 /// Mark a basic block unreachable.
 ///
@@ -162,6 +169,11 @@ static bool CodeGen(Scop &S, IslAstInfo
   if (!AstRoot)
     return false;
 
+  // Collect statistics. Do it before we modify the IR to avoid having it any
+  // influence on the result.
+  auto ScopStats = S.getStatistics();
+  ScopsProcessed++;
+
   auto &DL = S.getFunction().getParent()->getDataLayout();
   Region *R = &S.getRegion();
   assert(!R->isTopLevelRegion() && "Top level regions are not supported");
@@ -249,6 +261,10 @@ static bool CodeGen(Scop &S, IslAstInfo
     NodeBuilder.create(AstRoot);
     NodeBuilder.finalize();
     fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
+
+    CodegenedScops++;
+    CodegenedAffineLoops += ScopStats.NumAffineLoops;
+    CodegenedBoxedLoops += ScopStats.NumBoxedLoops;
   }
 
   Function *F = EnteringBB->getParent();

Modified: polly/trunk/lib/CodeGen/IslAst.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslAst.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslAst.cpp (original)
+++ polly/trunk/lib/CodeGen/IslAst.cpp Wed Aug 23 06:50:30 2017
@@ -78,6 +78,19 @@ static cl::opt<bool> DetectParallel("pol
                                     cl::init(false), cl::ZeroOrMore,
                                     cl::cat(PollyCategory));
 
+STATISTIC(ScopsProcessed, "Number of SCoPs processed");
+STATISTIC(ScopsBeneficial, "Number of beneficial SCoPs");
+STATISTIC(BeneficialAffineLoops, "Number of beneficial affine loops");
+STATISTIC(BeneficialBoxedLoops, "Number of beneficial boxed loops");
+
+STATISTIC(NumForLoops, "Number of for-loops");
+STATISTIC(NumParallel, "Number of parallel for-loops");
+STATISTIC(NumInnermostParallel, "Number of innermost parallel for-loops");
+STATISTIC(NumOutermostParallel, "Number of outermost parallel for-loops");
+STATISTIC(NumReductionParallel, "Number of reduction-parallel for-loops");
+STATISTIC(NumExecutedInParallel, "Number of for-loops executed in parallel");
+STATISTIC(NumIfConditions, "Number of if-conditions");
+
 namespace polly {
 /// Temporary information used when building the ast.
 struct AstBuildUserInfo {
@@ -401,6 +414,41 @@ static bool benefitsFromPolly(Scop &Scop
   return true;
 }
 
+/// Collect statistics for the syntax tree rooted at @p Ast.
+static void walkAstForStatistics(__isl_keep isl_ast_node *Ast) {
+  assert(Ast);
+  isl_ast_node_foreach_descendant_top_down(
+      Ast,
+      [](__isl_keep isl_ast_node *Node, void *User) -> isl_bool {
+        switch (isl_ast_node_get_type(Node)) {
+        case isl_ast_node_for:
+          NumForLoops++;
+          if (IslAstInfo::isParallel(Node))
+            NumParallel++;
+          if (IslAstInfo::isInnermostParallel(Node))
+            NumInnermostParallel++;
+          if (IslAstInfo::isOutermostParallel(Node))
+            NumOutermostParallel++;
+          if (IslAstInfo::isReductionParallel(Node))
+            NumReductionParallel++;
+          if (IslAstInfo::isExecutedInParallel(Node))
+            NumExecutedInParallel++;
+          break;
+
+        case isl_ast_node_if:
+          NumIfConditions++;
+          break;
+
+        default:
+          break;
+        }
+
+        // Continue traversing subtrees.
+        return isl_bool_true;
+      },
+      nullptr);
+}
+
 IslAst::IslAst(Scop &Scop)
     : S(Scop), Root(nullptr), RunCondition(nullptr),
       Ctx(Scop.getSharedIslCtx()) {}
@@ -421,6 +469,11 @@ void IslAst::init(const Dependences &D)
   if (!benefitsFromPolly(S, PerformParallelTest))
     return;
 
+  auto ScopStats = S.getStatistics();
+  ScopsBeneficial++;
+  BeneficialAffineLoops += ScopStats.NumAffineLoops;
+  BeneficialBoxedLoops += ScopStats.NumBoxedLoops;
+
   isl_ctx *Ctx = S.getIslCtx();
   isl_options_set_ast_build_atomic_upper_bound(Ctx, true);
   isl_options_set_ast_build_detect_min_max(Ctx, true);
@@ -454,6 +507,7 @@ void IslAst::init(const Dependences &D)
   RunCondition = buildRunCondition(S, Build);
 
   Root = isl_ast_build_node_from_schedule(Build, S.getScheduleTree().release());
+  walkAstForStatistics(Root);
 
   isl_ast_build_free(Build);
 }
@@ -692,6 +746,8 @@ bool IslAstInfoWrapperPass::runOnScop(Sc
   if (Scop.isToBeSkipped())
     return false;
 
+  ScopsProcessed++;
+
   const Dependences &D =
       getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
 

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Wed Aug 23 06:50:30 2017
@@ -53,6 +53,11 @@ using namespace llvm;
 
 STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
 
+STATISTIC(SequentialLoops, "Number of generated sequential for-loops");
+STATISTIC(ParallelLoops, "Number of generated parallel for-loops");
+STATISTIC(VectorLoops, "Number of generated vector for-loops");
+STATISTIC(IfConditions, "Number of generated if-conditions");
+
 static cl::opt<bool> PollyGenerateRTCPrint(
     "polly-codegen-emit-rtc-print",
     cl::desc("Emit code that prints the runtime check result dynamically."),
@@ -480,6 +485,8 @@ void IslNodeBuilder::createForVector(__i
 
   isl_ast_node_free(For);
   isl_ast_expr_free(Iterator);
+
+  VectorLoops++;
 }
 
 namespace {
@@ -571,6 +578,8 @@ void IslNodeBuilder::createForSequential
   isl_ast_node_free(For);
   isl_ast_expr_free(Iterator);
   isl_id_free(IteratorID);
+
+  SequentialLoops++;
 }
 
 /// Remove the BBs contained in a (sub)function from the dominator tree.
@@ -720,6 +729,8 @@ void IslNodeBuilder::createForParallel(_
   isl_ast_node_free(For);
   isl_ast_expr_free(Iterator);
   isl_id_free(IteratorID);
+
+  ParallelLoops++;
 }
 
 /// Return whether any of @p Node's statements contain partial accesses.
@@ -813,6 +824,8 @@ void IslNodeBuilder::createIf(__isl_take
   Builder.SetInsertPoint(&MergeBB->front());
 
   isl_ast_node_free(If);
+
+  IfConditions++;
 }
 
 __isl_give isl_id_to_ast_expr *

Modified: polly/trunk/lib/Support/RegisterPasses.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Support/RegisterPasses.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Support/RegisterPasses.cpp (original)
+++ polly/trunk/lib/Support/RegisterPasses.cpp Wed Aug 23 06:50:30 2017
@@ -328,13 +328,13 @@ void registerPollyPasses(llvm::legacy::P
     PM.add(polly::createPolyhedralInfoPass());
 
   if (EnableSimplify)
-    PM.add(polly::createSimplifyPass());
+    PM.add(polly::createSimplifyPass(0));
   if (EnableForwardOpTree)
     PM.add(polly::createForwardOpTreePass());
   if (EnableDeLICM)
     PM.add(polly::createDeLICMPass());
   if (EnableSimplify)
-    PM.add(polly::createSimplifyPass());
+    PM.add(polly::createSimplifyPass(1));
 
   if (ImportJScop)
     PM.add(polly::createJSONImporterPass());

Modified: polly/trunk/lib/Transform/DeLICM.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/DeLICM.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/DeLICM.cpp (original)
+++ polly/trunk/lib/Transform/DeLICM.cpp Wed Aug 23 06:50:30 2017
@@ -61,6 +61,16 @@ STATISTIC(MappedPHIScalars, "Number of m
 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");
 
+STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
+STATISTIC(NumValueWritesInLoops,
+          "Number of scalar value writes nested in affine loops after DeLICM");
+STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
+STATISTIC(NumPHIWritesInLoops,
+          "Number of scalar phi writes nested in affine loops after DeLICM");
+STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
+STATISTIC(NumSingletonWritesInLoops,
+          "Number of singleton writes nested in affine loops after DeLICM");
+
 isl::union_map computeReachingOverwrite(isl::union_map Schedule,
                                         isl::union_map Writes,
                                         bool InclPrevWrite,
@@ -1402,6 +1412,14 @@ public:
 
     collapseToUnused(S);
 
+    auto ScopStats = S.getStatistics();
+    NumValueWrites += ScopStats.NumValueWrites;
+    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
+    NumPHIWrites += ScopStats.NumPHIWrites;
+    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
+    NumSingletonWrites += ScopStats.NumSingletonWrites;
+    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
+
     return false;
   }
 

Modified: polly/trunk/lib/Transform/ForwardOpTree.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ForwardOpTree.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ForwardOpTree.cpp (original)
+++ polly/trunk/lib/Transform/ForwardOpTree.cpp Wed Aug 23 06:50:30 2017
@@ -56,6 +56,16 @@ STATISTIC(TotalModifiedStmts,
 
 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
 
+STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
+STATISTIC(NumValueWritesInLoops,
+          "Number of scalar value writes nested in affine loops after OpTree");
+STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
+STATISTIC(NumPHIWritesInLoops,
+          "Number of scalar phi writes nested in affine loops after OpTree");
+STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
+STATISTIC(NumSingletonWritesInLoops,
+          "Number of singleton writes nested in affine loops after OpTree");
+
 namespace {
 
 /// The state of whether an operand tree was/can be forwarded.
@@ -844,6 +854,15 @@ public:
     DEBUG(dbgs() << "\nFinal Scop:\n");
     DEBUG(dbgs() << S);
 
+    // Update statistics
+    auto ScopStats = S.getStatistics();
+    NumValueWrites += ScopStats.NumValueWrites;
+    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
+    NumPHIWrites += ScopStats.NumPHIWrites;
+    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
+    NumSingletonWrites += ScopStats.NumSingletonWrites;
+    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
+
     return false;
   }
 

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Wed Aug 23 06:50:30 2017
@@ -237,6 +237,33 @@ static cl::opt<bool> OptimizedScops(
              "transformations is applied on the schedule tree"),
     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
 
+STATISTIC(ScopsProcessed, "Number of scops processed");
+STATISTIC(ScopsRescheduled, "Number of scops rescheduled");
+STATISTIC(ScopsOptimized, "Number of scops optimized");
+
+STATISTIC(NumAffineLoopsOptimized, "Number of affine loops optimized");
+STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized");
+
+#define THREE_STATISTICS(VARNAME, DESC)                                        \
+  static llvm::Statistic VARNAME[3] = {                                        \
+      {DEBUG_TYPE, #VARNAME "0", DESC " (original)", {0}, false},              \
+      {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)", {0}, false},       \
+      {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)", {0}, false}}
+
+THREE_STATISTICS(NumBands, "Number of bands");
+THREE_STATISTICS(NumBandMembers, "Number of band members");
+THREE_STATISTICS(NumCoincident, "Number of coincident band members");
+THREE_STATISTICS(NumPermutable, "Number of permutable bands");
+THREE_STATISTICS(NumFilters, "Number of filter nodes");
+THREE_STATISTICS(NumExtension, "Number of extension nodes");
+
+STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied");
+STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied");
+STATISTIC(RegisterTileOpts, "Number of register tiling applied");
+STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied");
+STATISTIC(MatMulOpts,
+          "Number of matrix multiplication patterns detected and optimized");
+
 /// Create an isl::union_set, which describes the isolate option based on
 /// IsolateDomain.
 ///
@@ -368,6 +395,7 @@ isl::schedule_node ScheduleTreeOptimizer
   if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
     Node = Node.parent();
   auto LoopMarker = isl::id::alloc(Node.get_ctx(), "SIMD", nullptr);
+  PrevectOpts++;
   return Node.insert_mark(LoopMarker);
 }
 
@@ -456,17 +484,23 @@ bool ScheduleTreeOptimizer::isTileableBa
 
 __isl_give isl::schedule_node
 ScheduleTreeOptimizer::standardBandOpts(isl::schedule_node Node, void *User) {
-  if (FirstLevelTiling)
+  if (FirstLevelTiling) {
     Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
                     FirstLevelDefaultTileSize);
+    FirstLevelTileOpts++;
+  }
 
-  if (SecondLevelTiling)
+  if (SecondLevelTiling) {
     Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
                     SecondLevelDefaultTileSize);
+    SecondLevelTileOpts++;
+  }
 
-  if (RegisterTiling)
+  if (RegisterTiling) {
     Node =
         applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
+    RegisterTileOpts++;
+  }
 
   if (PollyVectorizerChoice == VECTORIZER_NONE)
     return Node;
@@ -1235,6 +1269,7 @@ ScheduleTreeOptimizer::optimizeBand(__is
       isMatrMultPattern(isl::manage(isl_schedule_node_copy(Node)), OAI->D,
                         MMI)) {
     DEBUG(dbgs() << "The matrix multiplication pattern was detected\n");
+    MatMulOpts++;
     return optimizeMatMulPattern(isl::manage(Node), OAI->TTI, MMI).release();
   }
 
@@ -1308,6 +1343,52 @@ private:
 
 char IslScheduleOptimizer::ID = 0;
 
+/// Collect statistics for the schedule tree.
+///
+/// @param Schedule The schedule tree to analyze. If not a schedule tree it is
+/// ignored.
+/// @param Version  The version of the schedule tree that is analyzed.
+///                 0 for the original schedule tree before any transformation.
+///                 1 for the schedule tree after isl's rescheduling.
+///                 2 for the schedule tree after optimizations are applied
+///                 (tiling, pattern matching)
+static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
+  auto Root = Schedule.get_root();
+  if (!Root)
+    return;
+
+  Root.foreach_ancestor_top_down([Version](
+                                     isl::schedule_node Node) -> isl::stat {
+    switch (isl_schedule_node_get_type(Node.get())) {
+    case isl_schedule_node_band: {
+      NumBands[Version]++;
+      if (isl_schedule_node_band_get_permutable(Node.get()) == isl_bool_true)
+        NumPermutable[Version]++;
+
+      int CountMembers = isl_schedule_node_band_n_member(Node.get());
+      NumBandMembers[Version] += CountMembers;
+      for (int i = 0; i < CountMembers; i += 1) {
+        if (Node.band_member_get_coincident(i))
+          NumCoincident[Version]++;
+      }
+    } break;
+
+    case isl_schedule_node_filter:
+      NumFilters[Version]++;
+      break;
+
+    case isl_schedule_node_extension:
+      NumExtension[Version]++;
+      break;
+
+    default:
+      break;
+    }
+
+    return isl::stat::ok;
+  });
+}
+
 bool IslScheduleOptimizer::runOnScop(Scop &S) {
 
   // Skip SCoPs in case they're already optimised by PPCGCodeGeneration
@@ -1352,6 +1433,9 @@ bool IslScheduleOptimizer::runOnScop(Sco
   if (!Domain)
     return false;
 
+  ScopsProcessed++;
+  walkScheduleTreeForStatistics(S.getScheduleTree(), 0);
+
   isl::union_map Validity = give(D.getDependences(ValidityKinds));
   isl::union_map Proximity = give(D.getDependences(ProximityKinds));
 
@@ -1432,11 +1516,15 @@ bool IslScheduleOptimizer::runOnScop(Sco
   auto Schedule = SC.compute_schedule();
   isl_options_set_on_error(Ctx, OnErrorStatus);
 
+  walkScheduleTreeForStatistics(Schedule, 1);
+
   // In cases the scheduler is not able to optimize the code, we just do not
   // touch the schedule.
   if (!Schedule)
     return false;
 
+  ScopsRescheduled++;
+
   DEBUG({
     auto *P = isl_printer_to_str(Ctx);
     P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
@@ -1451,10 +1539,16 @@ bool IslScheduleOptimizer::runOnScop(Sco
   auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)};
   auto NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
+  walkScheduleTreeForStatistics(NewSchedule, 1);
 
   if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule))
     return false;
 
+  auto ScopStats = S.getStatistics();
+  ScopsOptimized++;
+  NumAffineLoopsOptimized += ScopStats.NumAffineLoops;
+  NumBoxedLoopsOptimized += ScopStats.NumBoxedLoops;
+
   S.setScheduleTree(NewSchedule.release());
   S.markAsOptimized();
 

Modified: polly/trunk/lib/Transform/Simplify.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/Simplify.cpp?rev=311553&r1=311552&r2=311553&view=diff
==============================================================================
--- polly/trunk/lib/Transform/Simplify.cpp (original)
+++ polly/trunk/lib/Transform/Simplify.cpp Wed Aug 23 06:50:30 2017
@@ -27,25 +27,44 @@ using namespace polly;
 
 namespace {
 
+#define TWO_STATISTICS(VARNAME, DESC)                                          \
+  static llvm::Statistic VARNAME[2] = {                                        \
+      {DEBUG_TYPE, #VARNAME "0", DESC " (first)", {0}, false},                 \
+      {DEBUG_TYPE, #VARNAME "1", DESC " (second)", {0}, false}}
+
 /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid
 /// that the analysis of accesses in a statement is becoming too complex. Chosen
 /// to be relatively small because all the common cases should access only few
 /// array elements per statement.
 static int const SimplifyMaxDisjuncts = 4;
 
-STATISTIC(ScopsProcessed, "Number of SCoPs processed");
-STATISTIC(ScopsModified, "Number of SCoPs simplified");
+TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed");
+TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified");
 
-STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes");
-STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another");
-STATISTIC(TotalRedundantWritesRemoved,
-          "Number of writes of same value removed in any SCoP");
-STATISTIC(TotalEmptyPartialAccessesRemoved,
-          "Number of empty partial accesses removed");
-STATISTIC(TotalDeadAccessesRemoved, "Number of dead accesses removed");
-STATISTIC(TotalDeadInstructionsRemoved,
-          "Number of unused instructions removed");
-STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP");
+TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes");
+TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another");
+TWO_STATISTICS(TotalRedundantWritesRemoved,
+               "Number of writes of same value removed in any SCoP");
+TWO_STATISTICS(TotalEmptyPartialAccessesRemoved,
+               "Number of empty partial accesses removed");
+TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed");
+TWO_STATISTICS(TotalDeadInstructionsRemoved,
+               "Number of unused instructions removed");
+TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP");
+
+TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify");
+TWO_STATISTICS(
+    NumValueWritesInLoops,
+    "Number of scalar value writes nested in affine loops after Simplify");
+TWO_STATISTICS(NumPHIWrites,
+               "Number of scalar phi writes after the first simplification");
+TWO_STATISTICS(
+    NumPHIWritesInLoops,
+    "Number of scalar phi writes nested in affine loops after Simplify");
+TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify");
+TWO_STATISTICS(
+    NumSingletonWritesInLoops,
+    "Number of singleton writes nested in affine loops after Simplify");
 
 static bool isImplicitRead(MemoryAccess *MA) {
   return MA->isRead() && MA->isOriginalScalarKind();
@@ -100,6 +119,10 @@ static isl::union_map underapproximatedA
 
 class Simplify : public ScopPass {
 private:
+  /// The invocation id (if there are multiple instances in the pass manager's
+  /// pipeline) to determine which statistics to update.
+  int CallNo;
+
   /// The last/current SCoP that is/has been processed.
   Scop *S;
 
@@ -176,7 +199,7 @@ private:
 
           Stmt.removeSingleMemoryAccess(MA);
           OverwritesRemoved++;
-          TotalOverwritesRemoved++;
+          TotalOverwritesRemoved[CallNo]++;
         }
 
         // Unconditional writes overwrite other values.
@@ -315,7 +338,7 @@ private:
             // We removed MA, OtherMA takes its role.
             MA = OtherMA;
 
-            TotalWritesCoalesced++;
+            TotalWritesCoalesced[CallNo]++;
             WritesCoalesced++;
 
             // Don't look for more candidates.
@@ -437,7 +460,7 @@ private:
               Stmt.removeSingleMemoryAccess(MA);
 
               RedundantWritesRemoved++;
-              TotalRedundantWritesRemoved++;
+              TotalRedundantWritesRemoved[CallNo]++;
             }
           }
         }
@@ -476,7 +499,7 @@ private:
     StmtsRemoved = NumStmtsBefore - S->getSize();
     DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
                  << ") statements\n");
-    TotalStmtsRemoved += StmtsRemoved;
+    TotalStmtsRemoved[CallNo] += StmtsRemoved;
   }
 
   /// Remove accesses that have an empty domain.
@@ -501,7 +524,7 @@ private:
       for (MemoryAccess *MA : DeferredRemove) {
         Stmt.removeSingleMemoryAccess(MA);
         EmptyPartialAccessesRemoved++;
-        TotalEmptyPartialAccessesRemoved++;
+        TotalEmptyPartialAccessesRemoved[CallNo]++;
       }
     }
   }
@@ -530,7 +553,7 @@ private:
       Stmt->removeSingleMemoryAccess(MA);
 
       DeadAccessesRemoved++;
-      TotalDeadAccessesRemoved++;
+      TotalDeadAccessesRemoved[CallNo]++;
     }
 
     // Remove all non-reachable instructions.
@@ -548,7 +571,7 @@ private:
           DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
                 dbgs() << " because it is not used\n");
           DeadInstructionsRemoved++;
-          TotalDeadInstructionsRemoved++;
+          TotalDeadInstructionsRemoved[CallNo]++;
           continue;
         }
 
@@ -595,7 +618,7 @@ private:
 
 public:
   static char ID;
-  explicit Simplify() : ScopPass(ID) {}
+  explicit Simplify(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {}
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredTransitive<ScopInfoRegionPass>();
@@ -610,7 +633,7 @@ public:
 
     // Prepare processing of this SCoP.
     this->S = &S;
-    ScopsProcessed++;
+    ScopsProcessed[CallNo]++;
 
     DEBUG(dbgs() << "Removing partial writes that never happen...\n");
     removeEmptyPartialAccesses();
@@ -632,10 +655,18 @@ public:
     removeUnnecessaryStmts();
 
     if (isModified())
-      ScopsModified++;
+      ScopsModified[CallNo]++;
     DEBUG(dbgs() << "\nFinal Scop:\n");
     DEBUG(dbgs() << S);
 
+    auto ScopStats = S.getStatistics();
+    NumValueWrites[CallNo] += ScopStats.NumValueWrites;
+    NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
+    NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
+    NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
+    NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
+    NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;
+
     return false;
   }
 
@@ -688,7 +719,7 @@ SmallVector<MemoryAccess *, 32> getAcces
 }
 } // namespace polly
 
-Pass *polly::createSimplifyPass() { return new Simplify(); }
+Pass *polly::createSimplifyPass(int CallNo) { return new Simplify(CallNo); }
 
 INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false,
                       false)




More information about the llvm-commits mailing list