[llvm] r336522 - [PGOMemOPSize] Preserve the DominatorTree

Chijun Sima via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 9 01:07:21 PDT 2018


Author: sima
Date: Mon Jul  9 01:07:21 2018
New Revision: 336522

URL: http://llvm.org/viewvc/llvm-project?rev=336522&view=rev
Log:
[PGOMemOPSize] Preserve the DominatorTree

Summary:
PGOMemOPSize only modifies CFG in a couple of places; thus we can preserve the DominatorTree with little effort.
When optimizing SQLite with -O3, this patch can decrease 3.8% of the numbers of nodes traversed by DFS and 5.7% of the times DominatorTreeBase::recalculation is called.

Reviewers: kuhar, davide, dmgreen

Reviewed By: dmgreen

Subscribers: mzolotukhin, vsk, llvm-commits

Differential Revision: https://reviews.llvm.org/D48914

Modified:
    llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
    llvm/trunk/test/Other/opt-O2-pipeline.ll
    llvm/trunk/test/Other/opt-O3-pipeline.ll
    llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll
    llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll
    llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll

Modified: llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp?rev=336522&r1=336521&r2=336522&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp Mon Jul  9 01:07:21 2018
@@ -25,6 +25,8 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -112,6 +114,7 @@ private:
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
   }
 };
 } // end anonymous namespace
@@ -133,8 +136,8 @@ namespace {
 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
 public:
   MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
-               OptimizationRemarkEmitter &ORE)
-      : Func(Func), BFI(BFI), ORE(ORE), Changed(false) {
+               OptimizationRemarkEmitter &ORE, DominatorTree *DT)
+      : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
     ValueDataArray =
         llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
     // Get the MemOPSize range information from option MemOPSizeRange,
@@ -170,6 +173,7 @@ private:
   Function &Func;
   BlockFrequencyInfo &BFI;
   OptimizationRemarkEmitter &ORE;
+  DominatorTree *DT;
   bool Changed;
   std::vector<MemIntrinsic *> WorkList;
   // Start of the previse range.
@@ -336,15 +340,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic
   LLVM_DEBUG(dbgs() << *BB << "\n");
   auto OrigBBFreq = BFI.getBlockFreq(BB);
 
-  BasicBlock *DefaultBB = SplitBlock(BB, MI);
+  BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
   BasicBlock::iterator It(*MI);
   ++It;
   assert(It != DefaultBB->end());
-  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
+  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
   MergeBB->setName("MemOP.Merge");
   BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
   DefaultBB->setName("MemOP.Default");
 
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   auto &Ctx = Func.getContext();
   IRBuilder<> IRB(BB);
   BB->getTerminator()->eraseFromParent();
@@ -361,6 +366,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic
 
   LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
 
+  std::vector<DominatorTree::UpdateType> Updates;
+  if (DT)
+    Updates.reserve(2 * SizeIds.size());
+
   for (uint64_t SizeId : SizeIds) {
     BasicBlock *CaseBB = BasicBlock::Create(
         Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
@@ -375,8 +384,15 @@ bool MemOPSizeOpt::perform(MemIntrinsic
     IRBuilder<> IRBCase(CaseBB);
     IRBCase.CreateBr(MergeBB);
     SI->addCase(CaseSizeId, CaseBB);
+    if (DT) {
+      Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
+      Updates.push_back({DominatorTree::Insert, BB, CaseBB});
+    }
     LLVM_DEBUG(dbgs() << *CaseBB << "\n");
   }
+  DTU.applyUpdates(Updates);
+  Updates.clear();
+
   setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
 
   LLVM_DEBUG(dbgs() << *BB << "\n");
@@ -397,13 +413,14 @@ bool MemOPSizeOpt::perform(MemIntrinsic
 } // namespace
 
 static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
-                                OptimizationRemarkEmitter &ORE) {
+                                OptimizationRemarkEmitter &ORE,
+                                DominatorTree *DT) {
   if (DisableMemOPOPT)
     return false;
 
   if (F.hasFnAttribute(Attribute::OptimizeForSize))
     return false;
-  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE);
+  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
   MemOPSizeOpt.perform();
   return MemOPSizeOpt.isChanged();
 }
@@ -412,7 +429,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFun
   BlockFrequencyInfo &BFI =
       getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-  return PGOMemOPSizeOptImpl(F, BFI, ORE);
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
 }
 
 namespace llvm {
@@ -422,11 +441,13 @@ PreservedAnalyses PGOMemOPSizeOpt::run(F
                                        FunctionAnalysisManager &FAM) {
   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE);
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
   if (!Changed)
     return PreservedAnalyses::all();
   auto PA = PreservedAnalyses();
   PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
   return PA;
 }
 } // namespace llvm

Modified: llvm/trunk/test/Other/opt-O2-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-O2-pipeline.ll?rev=336522&r1=336521&r2=336522&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-O2-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-O2-pipeline.ll Mon Jul  9 01:07:21 2018
@@ -80,7 +80,6 @@
 ; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         PGOMemOPSize
-; CHECK-NEXT:         Dominator Tree Construction
 ; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:         Function Alias Analysis Results
 ; CHECK-NEXT:         Natural Loop Information

Modified: llvm/trunk/test/Other/opt-O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-O3-pipeline.ll?rev=336522&r1=336521&r2=336522&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-O3-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-O3-pipeline.ll Mon Jul  9 01:07:21 2018
@@ -84,7 +84,6 @@
 ; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:         Optimization Remark Emitter
 ; CHECK-NEXT:         PGOMemOPSize
-; CHECK-NEXT:         Dominator Tree Construction
 ; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:         Function Alias Analysis Results
 ; CHECK-NEXT:         Natural Loop Information

Modified: llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll?rev=336522&r1=336521&r2=336522&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll (original)
+++ llvm/trunk/test/Transforms/PGOProfile/memop_clone.ll Mon Jul  9 01:07:21 2018
@@ -1,4 +1,4 @@
-; RUN: opt < %s -pgo-memop-opt -S | FileCheck %s
+; RUN: opt < %s -pgo-memop-opt -verify-dom-info -S | FileCheck %s
 
 define i32 @test(i8* %a, i8* %b) !prof !1 {
 ; CHECK_LABEL: test

Modified: llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll?rev=336522&r1=336521&r2=336522&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll (original)
+++ llvm/trunk/test/Transforms/PGOProfile/memop_size_opt.ll Mon Jul  9 01:07:21 2018
@@ -1,8 +1,8 @@
-; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
-; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
-; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
 ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
-; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
 ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
 
 

Modified: llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll?rev=336522&r1=336521&r2=336522&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll (original)
+++ llvm/trunk/test/Transforms/PGOProfile/memop_size_opt_zero.ll Mon Jul  9 01:07:21 2018
@@ -1,7 +1,7 @@
 ; Test to ensure the pgo memop optimization pass doesn't try to scale
 ; up a value profile with a 0 count, which would lead to divide by 0.
-; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
-; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"




More information about the llvm-commits mailing list