[llvm] 0563859 - [ScalarizeMaskedMemIntrin] Preserve Dominator Tree, if avaliable
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 28 14:12:23 PST 2021
Author: Roman Lebedev
Date: 2021-01-29T01:11:36+03:00
New Revision: 056385921d8f31cac573873a073d645d5ddca983
URL: https://github.com/llvm/llvm-project/commit/056385921d8f31cac573873a073d645d5ddca983
DIFF: https://github.com/llvm/llvm-project/commit/056385921d8f31cac573873a073d645d5ddca983.diff
LOG: [ScalarizeMaskedMemIntrin] Preserve Dominator Tree, if avaliable
This de-pessimizes the arguably more usual case of no masked mem intrinsics,
and gets rid of one more Dominator Tree recalculation.
As per llvm/test/CodeGen/X86/opt-pipeline.ll,
there's one more Dominator Tree recalculation left, we could get rid of.
Added:
Modified:
llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
llvm/test/CodeGen/ARM/O3-pipeline.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index d38c5d5fe952..a520ab3c63a4 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -15,11 +15,13 @@
#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -60,16 +62,18 @@ class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
};
} // end anonymous namespace
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
- const TargetTransformInfo &TTI, const DataLayout &DL);
+ const TargetTransformInfo &TTI, const DataLayout &DL,
+ DomTreeUpdater *DTU);
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
const TargetTransformInfo &TTI,
- const DataLayout &DL);
+ const DataLayout &DL, DomTreeUpdater *DTU);
char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
@@ -77,6 +81,7 @@ INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
"Scalarize unsupported masked memory intrinsics", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
"Scalarize unsupported masked memory intrinsics", false,
false)
@@ -132,7 +137,8 @@ static bool isConstantIntVector(Value *Mask) {
// %10 = extractelement <16 x i1> %mask, i32 2
// br i1 %10, label %cond.load4, label %else5
//
-static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
+static void scalarizeMaskedLoad(CallInst *CI, DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -215,7 +221,8 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
Instruction *ThenTerm =
- SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false);
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
BasicBlock *CondBlock = ThenTerm->getParent();
CondBlock->setName("cond.load");
@@ -271,7 +278,8 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
// store i32 %6, i32* %7
// br label %else2
// . . .
-static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
+static void scalarizeMaskedStore(CallInst *CI, DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -347,7 +355,8 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
// %store i32 %OneElt, i32* %EltAddr
//
Instruction *ThenTerm =
- SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false);
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
BasicBlock *CondBlock = ThenTerm->getParent();
CondBlock->setName("cond.store");
@@ -398,7 +407,8 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
// . . .
// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
+static void scalarizeMaskedGather(CallInst *CI, DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -467,7 +477,8 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
Instruction *ThenTerm =
- SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false);
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
BasicBlock *CondBlock = ThenTerm->getParent();
CondBlock->setName("cond.load");
@@ -525,7 +536,8 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
// store i32 %Elt1, i32* %Ptr1, align 4
// br label %else2
// . . .
-static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
+static void scalarizeMaskedScatter(CallInst *CI, DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -592,7 +604,8 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
// %store i32 %Elt1, i32* %Ptr1
//
Instruction *ThenTerm =
- SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false);
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
BasicBlock *CondBlock = ThenTerm->getParent();
CondBlock->setName("cond.store");
@@ -614,7 +627,8 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
ModifiedDT = true;
}
-static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
+static void scalarizeMaskedExpandLoad(CallInst *CI, DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
Value *Mask = CI->getArgOperand(1);
Value *PassThru = CI->getArgOperand(2);
@@ -696,7 +710,8 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
// VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
//
Instruction *ThenTerm =
- SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false);
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
BasicBlock *CondBlock = ThenTerm->getParent();
CondBlock->setName("cond.load");
@@ -738,7 +753,8 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
ModifiedDT = true;
}
-static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
+static void scalarizeMaskedCompressStore(CallInst *CI, DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -802,7 +818,8 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
// %store i32 %OneElt, i32* %EltAddr
//
Instruction *ThenTerm =
- SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false);
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
BasicBlock *CondBlock = ThenTerm->getParent();
CondBlock->setName("cond.store");
@@ -837,7 +854,12 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
ModifiedDT = true;
}
-static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
+static bool runImpl(Function &F, const TargetTransformInfo &TTI,
+ DominatorTree *DT) {
+ Optional<DomTreeUpdater> DTU;
+ if (DT)
+ DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
bool EverMadeChange = false;
bool MadeChange = true;
auto &DL = F.getParent()->getDataLayout();
@@ -846,7 +868,9 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
for (Function::iterator I = F.begin(); I != F.end();) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL);
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL,
+ DTU.hasValue() ? DTU.getPointer() : nullptr);
+
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -860,28 +884,33 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return runImpl(F, TTI);
+ DominatorTree *DT = nullptr;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ return runImpl(F, TTI, DT);
}
PreservedAnalyses
ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!runImpl(F, TTI))
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ if (!runImpl(F, TTI, DT))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<TargetIRAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
- const TargetTransformInfo &TTI,
- const DataLayout &DL) {
+ const TargetTransformInfo &TTI, const DataLayout &DL,
+ DomTreeUpdater *DTU) {
bool MadeChange = false;
BasicBlock::iterator CurInstIterator = BB.begin();
while (CurInstIterator != BB.end()) {
if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
- MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL);
+ MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
if (ModifiedDT)
return true;
}
@@ -891,7 +920,7 @@ static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
const TargetTransformInfo &TTI,
- const DataLayout &DL) {
+ const DataLayout &DL, DomTreeUpdater *DTU) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
// The scalarization code below does not work for scalable vectors.
@@ -909,14 +938,14 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
CI->getType(),
cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
return false;
- scalarizeMaskedLoad(CI, ModifiedDT);
+ scalarizeMaskedLoad(CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_store:
if (TTI.isLegalMaskedStore(
CI->getArgOperand(0)->getType(),
cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
return false;
- scalarizeMaskedStore(CI, ModifiedDT);
+ scalarizeMaskedStore(CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_gather: {
unsigned AlignmentInt =
@@ -926,7 +955,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
if (TTI.isLegalMaskedGather(LoadTy, Alignment))
return false;
- scalarizeMaskedGather(CI, ModifiedDT);
+ scalarizeMaskedGather(CI, DTU, ModifiedDT);
return true;
}
case Intrinsic::masked_scatter: {
@@ -937,18 +966,18 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
if (TTI.isLegalMaskedScatter(StoreTy, Alignment))
return false;
- scalarizeMaskedScatter(CI, ModifiedDT);
+ scalarizeMaskedScatter(CI, DTU, ModifiedDT);
return true;
}
case Intrinsic::masked_expandload:
if (TTI.isLegalMaskedExpandLoad(CI->getType()))
return false;
- scalarizeMaskedExpandLoad(CI, ModifiedDT);
+ scalarizeMaskedExpandLoad(CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_compressstore:
if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
return false;
- scalarizeMaskedCompressStore(CI, ModifiedDT);
+ scalarizeMaskedCompressStore(CI, DTU, ModifiedDT);
return true;
}
}
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 5055ae93263e..349179f95cb8 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -39,7 +39,6 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
-; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 066a9cd335b5..d31a2a4e366b 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -56,7 +56,6 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
-; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Interleaved Access Pass
; CHECK-NEXT: X86 Partial Reduction
; CHECK-NEXT: Expand indirectbr instructions
More information about the llvm-commits
mailing list