[llvm] ddc4b56 - [ExpandMemCmpPass] Preserve Dominator Tree, if available
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 29 14:16:38 PST 2021
Author: Roman Lebedev
Date: 2021-01-30T01:14:51+03:00
New Revision: ddc4b56eef9fec990915470069a29e70bbde3711
URL: https://github.com/llvm/llvm-project/commit/ddc4b56eef9fec990915470069a29e70bbde3711
DIFF: https://github.com/llvm/llvm-project/commit/ddc4b56eef9fec990915470069a29e70bbde3711.diff
LOG: [ExpandMemCmpPass] Preserve Dominator Tree, if available
This finishes getting rid of all the avoidable Dominator Tree recalculations
in X86 optimized codegen pipeline.
Added:
Modified:
llvm/lib/CodeGen/ExpandMemCmp.cpp
llvm/test/CodeGen/AArch64/O3-pipeline.ll
llvm/test/CodeGen/ARM/O3-pipeline.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 9f85db9de884..50fdc2114780 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -21,11 +22,13 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -75,6 +78,7 @@ class MemCmpExpansion {
PHINode *PhiRes;
const bool IsUsedForZeroCmp;
const DataLayout &DL;
+ DomTreeUpdater *DTU;
IRBuilder<> Builder;
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
@@ -123,7 +127,8 @@ class MemCmpExpansion {
public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+ DomTreeUpdater *DTU);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -212,10 +217,12 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
+ DomTreeUpdater *DTU)
: CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
- IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
+ IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
+ Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -325,13 +332,14 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
unsigned OffsetBytes) {
- Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+ BasicBlock *BB = LoadCmpBlocks[BlockIndex];
+ Builder.SetInsertPoint(BB);
const LoadPair Loads =
getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
Type::getInt32Ty(CI->getContext()), OffsetBytes);
Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
- PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
+ PhiRes->addIncoming(Diff, BB);
if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
// Early exit branch if
diff erence found to EndBlock. Otherwise, continue to
@@ -340,10 +348,16 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
ConstantInt::get(Diff->getType(), 0));
BranchInst *CmpBr =
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ if (DTU)
+ DTU->applyUpdates(
+ {{DominatorTree::Insert, BB, EndBlock},
+ {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
Builder.Insert(CmpBr);
} else {
// The last block has an unconditional branch to EndBlock.
BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
Builder.Insert(CmpBr);
}
}
@@ -428,8 +442,12 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if
diff erence found to ResultBlock. Otherwise,
// continue to next LoadCmpBlock or EndBlock.
+ BasicBlock *BB = Builder.GetInsertBlock();
BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
+ {DominatorTree::Insert, BB, NextBB}});
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
// since early exit to ResultBlock was not taken (no
diff erence was found in
@@ -482,8 +500,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if
diff erence found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.
+ BasicBlock *BB = Builder.GetInsertBlock();
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
+ {DominatorTree::Insert, BB, ResBlock.BB}});
// Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
// since early exit to ResultBlock was not taken (no
diff erence was found in
@@ -507,6 +529,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
PhiRes->addIncoming(Res, ResBlock.BB);
BranchInst *NewBr = BranchInst::Create(EndBlock);
Builder.Insert(NewBr);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
return;
}
BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
@@ -519,9 +543,11 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
ConstantInt::get(Builder.getInt32Ty(), 1));
+ PhiRes->addIncoming(Res, ResBlock.BB);
BranchInst *NewBr = BranchInst::Create(EndBlock);
Builder.Insert(NewBr);
- PhiRes->addIncoming(Res, ResBlock.BB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
}
void MemCmpExpansion::setupResultBlockPHINodes() {
@@ -597,7 +623,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the basic block framework for a multi-block expansion.
if (getNumBlocks() != 1) {
BasicBlock *StartBlock = CI->getParent();
- EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+ EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
+ /*MSSAU=*/nullptr, "endblock");
setupEndBlockPHINodes();
createResultBlock();
@@ -610,9 +637,12 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
// Create the number of required load compare basic blocks.
createLoadCmpBlocks();
- // Update the terminator added by splitBasicBlock to branch to the first
+ // Update the terminator added by SplitBlock to branch to the first
// LoadCmpBlock.
StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
+ {DominatorTree::Delete, StartBlock, EndBlock}});
}
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -707,7 +737,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -744,7 +775,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmp;
- MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
@@ -763,8 +794,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
return true;
}
-
-
class ExpandMemCmpPass : public FunctionPass {
public:
static char ID;
@@ -791,7 +820,10 @@ class ExpandMemCmpPass : public FunctionPass {
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
nullptr;
- auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
+ DominatorTree *DT = nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
return !PA.areAllPreserved();
}
@@ -800,25 +832,28 @@ class ExpandMemCmpPass : public FunctionPass {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
FunctionPass::getAnalysisUsage(AU);
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering* TL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
+ const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT);
// Returns true if a change was made.
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI);
+ const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
};
-bool ExpandMemCmpPass::runOnBlock(
- BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL,
+ const DataLayout &DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI,
+ DomTreeUpdater *DTU) {
for (Instruction& I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
@@ -827,22 +862,27 @@ bool ExpandMemCmpPass::runOnBlock(
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) {
return true;
}
}
return false;
}
+PreservedAnalyses
+ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI,
+ const TargetLowering *TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DominatorTree *DT) {
+ Optional<DomTreeUpdater> DTU;
+ if (DT)
+ DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-PreservedAnalyses ExpandMemCmpPass::runImpl(
- Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
- const TargetLowering* TL, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI) {
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
- if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
+ DTU.hasValue() ? DTU.getPointer() : nullptr)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
@@ -854,7 +894,11 @@ PreservedAnalyses ExpandMemCmpPass::runImpl(
if (MadeChanges)
for (BasicBlock &BB : F)
SimplifyInstructionsInBlock(&BB);
- return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
}
} // namespace
@@ -866,6 +910,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 28753d646b85..47ad73debf4a 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -49,7 +49,6 @@
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Post-Dominator Tree Construction
; CHECK-NEXT: Branch Probability Analysis
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 349179f95cb8..18471ca8e403 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -29,7 +29,6 @@
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Post-Dominator Tree Construction
; CHECK-NEXT: Branch Probability Analysis
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index d31a2a4e366b..501a3badeab2 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -46,7 +46,6 @@
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Post-Dominator Tree Construction
; CHECK-NEXT: Branch Probability Analysis
More information about the llvm-commits
mailing list