[llvm] ddc4b56 - [ExpandMemCmpPass] Preserve Dominator Tree, if available

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 29 14:16:38 PST 2021


Author: Roman Lebedev
Date: 2021-01-30T01:14:51+03:00
New Revision: ddc4b56eef9fec990915470069a29e70bbde3711

URL: https://github.com/llvm/llvm-project/commit/ddc4b56eef9fec990915470069a29e70bbde3711
DIFF: https://github.com/llvm/llvm-project/commit/ddc4b56eef9fec990915470069a29e70bbde3711.diff

LOG: [ExpandMemCmpPass] Preserve Dominator Tree, if available

This finishes getting rid of all the avoidable Dominator Tree recalculations
in X86 optimized codegen pipeline.

Added: 
    

Modified: 
    llvm/lib/CodeGen/ExpandMemCmp.cpp
    llvm/test/CodeGen/AArch64/O3-pipeline.ll
    llvm/test/CodeGen/ARM/O3-pipeline.ll
    llvm/test/CodeGen/X86/opt-pipeline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 9f85db9de884..50fdc2114780 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -21,11 +22,13 @@
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
-#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -75,6 +78,7 @@ class MemCmpExpansion {
   PHINode *PhiRes;
   const bool IsUsedForZeroCmp;
   const DataLayout &DL;
+  DomTreeUpdater *DTU;
   IRBuilder<> Builder;
   // Represents the decomposition in blocks of the expansion. For example,
   // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
@@ -123,7 +127,8 @@ class MemCmpExpansion {
 public:
   MemCmpExpansion(CallInst *CI, uint64_t Size,
                   const TargetTransformInfo::MemCmpExpansionOptions &Options,
-                  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
+                  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+                  DomTreeUpdater *DTU);
 
   unsigned getNumBlocks();
   uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -212,10 +217,12 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
 MemCmpExpansion::MemCmpExpansion(
     CallInst *const CI, uint64_t Size,
     const TargetTransformInfo::MemCmpExpansionOptions &Options,
-    const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+    const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+    DomTreeUpdater *DTU)
     : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
       NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
-      IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
+      IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
+      Builder(CI) {
   assert(Size > 0 && "zero blocks");
   // Scale the max size down if the target can load more bytes than we need.
   llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -325,13 +332,14 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
 // final phi node for selecting the memcmp result.
 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
                                                unsigned OffsetBytes) {
-  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+  BasicBlock *BB = LoadCmpBlocks[BlockIndex];
+  Builder.SetInsertPoint(BB);
   const LoadPair Loads =
       getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
                   Type::getInt32Ty(CI->getContext()), OffsetBytes);
   Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
 
-  PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
+  PhiRes->addIncoming(Diff, BB);
 
   if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
     // Early exit branch if 
diff erence found to EndBlock. Otherwise, continue to
@@ -340,10 +348,16 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
                                     ConstantInt::get(Diff->getType(), 0));
     BranchInst *CmpBr =
         BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+    if (DTU)
+      DTU->applyUpdates(
+          {{DominatorTree::Insert, BB, EndBlock},
+           {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
     Builder.Insert(CmpBr);
   } else {
     // The last block has an unconditional branch to EndBlock.
     BranchInst *CmpBr = BranchInst::Create(EndBlock);
+    if (DTU)
+      DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
     Builder.Insert(CmpBr);
   }
 }
@@ -428,8 +442,12 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
                            : LoadCmpBlocks[BlockIndex + 1];
   // Early exit branch if 
diff erence found to ResultBlock. Otherwise,
   // continue to next LoadCmpBlock or EndBlock.
+  BasicBlock *BB = Builder.GetInsertBlock();
   BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
   Builder.Insert(CmpBr);
+  if (DTU)
+    DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
+                       {DominatorTree::Insert, BB, NextBB}});
 
   // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
   // since early exit to ResultBlock was not taken (no 
diff erence was found in
@@ -482,8 +500,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
                            : LoadCmpBlocks[BlockIndex + 1];
   // Early exit branch if 
diff erence found to ResultBlock. Otherwise, continue
   // to next LoadCmpBlock or EndBlock.
+  BasicBlock *BB = Builder.GetInsertBlock();
   BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
   Builder.Insert(CmpBr);
+  if (DTU)
+    DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
+                       {DominatorTree::Insert, BB, ResBlock.BB}});
 
   // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
   // since early exit to ResultBlock was not taken (no 
diff erence was found in
@@ -507,6 +529,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
     PhiRes->addIncoming(Res, ResBlock.BB);
     BranchInst *NewBr = BranchInst::Create(EndBlock);
     Builder.Insert(NewBr);
+    if (DTU)
+      DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
     return;
   }
   BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
@@ -519,9 +543,11 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
       Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
                            ConstantInt::get(Builder.getInt32Ty(), 1));
 
+  PhiRes->addIncoming(Res, ResBlock.BB);
   BranchInst *NewBr = BranchInst::Create(EndBlock);
   Builder.Insert(NewBr);
-  PhiRes->addIncoming(Res, ResBlock.BB);
+  if (DTU)
+    DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
 }
 
 void MemCmpExpansion::setupResultBlockPHINodes() {
@@ -597,7 +623,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
   // Create the basic block framework for a multi-block expansion.
   if (getNumBlocks() != 1) {
     BasicBlock *StartBlock = CI->getParent();
-    EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+    EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
+                          /*MSSAU=*/nullptr, "endblock");
     setupEndBlockPHINodes();
     createResultBlock();
 
@@ -610,9 +637,12 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
     // Create the number of required load compare basic blocks.
     createLoadCmpBlocks();
 
-    // Update the terminator added by splitBasicBlock to branch to the first
+    // Update the terminator added by SplitBlock to branch to the first
     // LoadCmpBlock.
     StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+    if (DTU)
+      DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
+                         {DominatorTree::Delete, StartBlock, EndBlock}});
   }
 
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -707,7 +737,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
 ///  ret i32 %phi.res
 static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
                          const TargetLowering *TLI, const DataLayout *DL,
-                         ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+                         ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+                         DomTreeUpdater *DTU) {
   NumMemCmpCalls++;
 
   // Early exit from expansion if -Oz.
@@ -744,7 +775,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
     Options.MaxNumLoads = MaxLoadsPerMemcmp;
 
-  MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
+  MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
 
   // Don't expand if this will require more loads than desired by the target.
   if (Expansion.getNumLoads() == 0) {
@@ -763,8 +794,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   return true;
 }
 
-
-
 class ExpandMemCmpPass : public FunctionPass {
 public:
   static char ID;
@@ -791,7 +820,10 @@ class ExpandMemCmpPass : public FunctionPass {
     auto *BFI = (PSI && PSI->hasProfileSummary()) ?
            &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
            nullptr;
-    auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
+    DominatorTree *DT = nullptr;
+    if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+      DT = &DTWP->getDomTree();
+    auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
     return !PA.areAllPreserved();
   }
 
@@ -800,25 +832,28 @@ class ExpandMemCmpPass : public FunctionPass {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
     FunctionPass::getAnalysisUsage(AU);
   }
 
   PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
                             const TargetTransformInfo *TTI,
-                            const TargetLowering* TL,
-                            ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
+                            const TargetLowering *TL, ProfileSummaryInfo *PSI,
+                            BlockFrequencyInfo *BFI, DominatorTree *DT);
   // Returns true if a change was made.
   bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
-                  const TargetTransformInfo *TTI, const TargetLowering* TL,
-                  const DataLayout& DL, ProfileSummaryInfo *PSI,
-                  BlockFrequencyInfo *BFI);
+                  const TargetTransformInfo *TTI, const TargetLowering *TL,
+                  const DataLayout &DL, ProfileSummaryInfo *PSI,
+                  BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
 };
 
-bool ExpandMemCmpPass::runOnBlock(
-    BasicBlock &BB, const TargetLibraryInfo *TLI,
-    const TargetTransformInfo *TTI, const TargetLowering* TL,
-    const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+                                  const TargetTransformInfo *TTI,
+                                  const TargetLowering *TL,
+                                  const DataLayout &DL, ProfileSummaryInfo *PSI,
+                                  BlockFrequencyInfo *BFI,
+                                  DomTreeUpdater *DTU) {
   for (Instruction& I : BB) {
     CallInst *CI = dyn_cast<CallInst>(&I);
     if (!CI) {
@@ -827,22 +862,27 @@ bool ExpandMemCmpPass::runOnBlock(
     LibFunc Func;
     if (TLI->getLibFunc(*CI, Func) &&
         (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
-        expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
+        expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) {
       return true;
     }
   }
   return false;
 }
 
+PreservedAnalyses
+ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
+                          const TargetTransformInfo *TTI,
+                          const TargetLowering *TL, ProfileSummaryInfo *PSI,
+                          BlockFrequencyInfo *BFI, DominatorTree *DT) {
+  Optional<DomTreeUpdater> DTU;
+  if (DT)
+    DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
 
-PreservedAnalyses ExpandMemCmpPass::runImpl(
-    Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
-    const TargetLowering* TL, ProfileSummaryInfo *PSI,
-    BlockFrequencyInfo *BFI) {
   const DataLayout& DL = F.getParent()->getDataLayout();
   bool MadeChanges = false;
   for (auto BBIt = F.begin(); BBIt != F.end();) {
-    if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
+    if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
+                   DTU.hasValue() ? DTU.getPointer() : nullptr)) {
       MadeChanges = true;
       // If changes were made, restart the function from the beginning, since
       // the structure of the function was changed.
@@ -854,7 +894,11 @@ PreservedAnalyses ExpandMemCmpPass::runImpl(
   if (MadeChanges)
     for (BasicBlock &BB : F)
       SimplifyInstructionsInBlock(&BB);
-  return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
+  if (!MadeChanges)
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
 }
 
 } // namespace
@@ -866,6 +910,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
                     "Expand memcmp() to load/stores", false, false)
 

diff  --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 28753d646b85..47ad73debf4a 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -49,7 +49,6 @@
 ; CHECK-NEXT:       Shadow Stack GC Lowering
 ; CHECK-NEXT:       Lower constant intrinsics
 ; CHECK-NEXT:       Remove unreachable blocks from the CFG
-; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       Post-Dominator Tree Construction
 ; CHECK-NEXT:       Branch Probability Analysis

diff  --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 349179f95cb8..18471ca8e403 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -29,7 +29,6 @@
 ; CHECK-NEXT:      Shadow Stack GC Lowering
 ; CHECK-NEXT:      Lower constant intrinsics
 ; CHECK-NEXT:      Remove unreachable blocks from the CFG
-; CHECK-NEXT:      Dominator Tree Construction
 ; CHECK-NEXT:      Natural Loop Information
 ; CHECK-NEXT:      Post-Dominator Tree Construction
 ; CHECK-NEXT:      Branch Probability Analysis

diff  --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index d31a2a4e366b..501a3badeab2 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -46,7 +46,6 @@
 ; CHECK-NEXT:       Shadow Stack GC Lowering
 ; CHECK-NEXT:       Lower constant intrinsics
 ; CHECK-NEXT:       Remove unreachable blocks from the CFG
-; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       Post-Dominator Tree Construction
 ; CHECK-NEXT:       Branch Probability Analysis


        


More information about the llvm-commits mailing list