[llvm] e64adc0 - [X86] Fix compile time regression of D93594.
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 18 01:53:06 PDT 2021
Author: Luo, Yuanke
Date: 2021-03-18T16:52:43+08:00
New Revision: e64adc0b88c2705425a9fe2345729e2688a4e4c6
URL: https://github.com/llvm/llvm-project/commit/e64adc0b88c2705425a9fe2345729e2688a4e4c6
DIFF: https://github.com/llvm/llvm-project/commit/e64adc0b88c2705425a9fe2345729e2688a4e4c6.diff
LOG: [X86] Fix compile time regression of D93594.
D93594 depend on the dominate tree and loop information. It increased
the compile time when build with -O0. However this is just to amend the
dominate tree and loop information, so that it is unnecessary to
re-analyze them again. Given the dominate tree of loop information are
absent in this pass, we can avoid amending them.
Differential Revision: https://reviews.llvm.org/D98773
Added:
Modified:
llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
llvm/test/CodeGen/X86/O0-pipeline.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index 9b6e54682f8c..134df5d9569c 100644
--- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -50,10 +50,38 @@ static bool isV256I32Ty(Type *Ty) {
return false;
}
-static BasicBlock *createLoop(BasicBlock *Preheader, BasicBlock *Exit,
- Value *Bound, Value *Step, StringRef Name,
- IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
- LoopInfo &LI) {
+namespace {
+class X86LowerAMXIntrinsics {
+ Function &Func;
+
+public:
+ X86LowerAMXIntrinsics(Function &F, DomTreeUpdater &DomTU, LoopInfo *LoopI)
+ : Func(F), DTU(DomTU), LI(LoopI) {}
+ bool visit();
+
+private:
+ DomTreeUpdater &DTU;
+ LoopInfo *LI;
+ BasicBlock *createLoop(BasicBlock *Preheader, BasicBlock *Exit, Value *Bound,
+ Value *Step, StringRef Name, IRBuilderBase &B,
+ Loop *L);
+ template <bool IsTileLoad>
+ Value *createTileLoadStoreLoops(BasicBlock *Start, BasicBlock *End,
+ IRBuilderBase &B, Value *Row, Value *Col,
+ Value *Ptr, Value *Stride, Value *Tile);
+ Value *createTileDPBSSDLoops(BasicBlock *Start, BasicBlock *End,
+ IRBuilderBase &B, Value *Row, Value *Col,
+ Value *K, Value *Acc, Value *LHS, Value *RHS);
+ template <bool IsTileLoad>
+ bool lowerTileLoadStore(Instruction *TileLoadStore);
+ bool lowerTileDPBSSD(Instruction *TileDPBSSD);
+ bool lowerTileZero(Instruction *TileZero);
+};
+
+BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
+ BasicBlock *Exit, Value *Bound,
+ Value *Step, StringRef Name,
+ IRBuilderBase &B, Loop *L) {
LLVMContext &Ctx = Preheader->getContext();
BasicBlock *Header =
BasicBlock::Create(Ctx, Name + ".header", Preheader->getParent(), Exit);
@@ -86,35 +114,37 @@ static BasicBlock *createLoop(BasicBlock *Preheader, BasicBlock *Exit,
{DominatorTree::Insert, Latch, Exit},
{DominatorTree::Insert, Preheader, Header},
});
-
- L->addBasicBlockToLoop(Header, LI);
- L->addBasicBlockToLoop(Body, LI);
- L->addBasicBlockToLoop(Latch, LI);
+ if (LI) {
+ L->addBasicBlockToLoop(Header, *LI);
+ L->addBasicBlockToLoop(Body, *LI);
+ L->addBasicBlockToLoop(Latch, *LI);
+ }
return Body;
}
template <bool IsTileLoad>
-static Value *createTileLoadStoreLoops(BasicBlock *Start, BasicBlock *End,
- IRBuilderBase &B, DomTreeUpdater &DTU,
- LoopInfo &LI, Value *Row, Value *Col,
- Value *Ptr, Value *Stride, Value *Tile) {
+Value *X86LowerAMXIntrinsics::createTileLoadStoreLoops(
+ BasicBlock *Start, BasicBlock *End, IRBuilderBase &B, Value *Row,
+ Value *Col, Value *Ptr, Value *Stride, Value *Tile) {
std::string IntrinName = IsTileLoad ? "tileload" : "tilestore";
- Loop *RowLoop = LI.AllocateLoop();
- Loop *ColLoop = LI.AllocateLoop();
- RowLoop->addChildLoop(ColLoop);
- if (Loop *ParentL = LI.getLoopFor(Start))
- ParentL->addChildLoop(RowLoop);
- else
- LI.addTopLevelLoop(RowLoop);
+ Loop *RowLoop = nullptr;
+ Loop *ColLoop = nullptr;
+ if (LI) {
+ RowLoop = LI->AllocateLoop();
+ ColLoop = LI->AllocateLoop();
+ RowLoop->addChildLoop(ColLoop);
+ if (Loop *ParentL = LI->getLoopFor(Start))
+ ParentL->addChildLoop(RowLoop);
+ else
+ LI->addTopLevelLoop(RowLoop);
+ }
- BasicBlock *RowBody =
- createLoop(Start, End, Row, B.getInt16(1), IntrinName + ".scalarize.rows",
- B, DTU, RowLoop, LI);
+ BasicBlock *RowBody = createLoop(Start, End, Row, B.getInt16(1),
+ IntrinName + ".scalarize.rows", B, RowLoop);
BasicBlock *RowLatch = RowBody->getSingleSuccessor();
- BasicBlock *ColBody =
- createLoop(RowBody, RowLatch, Col, B.getInt16(1),
- IntrinName + ".scalarize.cols", B, DTU, ColLoop, LI);
+ BasicBlock *ColBody = createLoop(RowBody, RowLatch, Col, B.getInt16(1),
+ IntrinName + ".scalarize.cols", B, ColLoop);
BasicBlock *ColLoopLatch = ColBody->getSingleSuccessor();
BasicBlock *ColLoopHeader = ColBody->getSinglePredecessor();
@@ -181,35 +211,36 @@ static Value *createTileLoadStoreLoops(BasicBlock *Start, BasicBlock *End,
}
}
-static Value *createTileDPBSSDLoops(BasicBlock *Start, BasicBlock *End,
- IRBuilderBase &B, DomTreeUpdater &DTU,
- LoopInfo &LI, Value *Row, Value *Col,
- Value *K, Value *Acc, Value *LHS,
- Value *RHS) {
- Loop *RowLoop = LI.AllocateLoop();
- Loop *ColLoop = LI.AllocateLoop();
- Loop *InnerLoop = LI.AllocateLoop();
- ColLoop->addChildLoop(InnerLoop);
- RowLoop->addChildLoop(ColLoop);
- if (Loop *ParentL = LI.getLoopFor(Start))
- ParentL->addChildLoop(RowLoop);
- else
- LI.addTopLevelLoop(RowLoop);
+Value *X86LowerAMXIntrinsics::createTileDPBSSDLoops(
+ BasicBlock *Start, BasicBlock *End, IRBuilderBase &B, Value *Row,
+ Value *Col, Value *K, Value *Acc, Value *LHS, Value *RHS) {
+ Loop *RowLoop = nullptr;
+ Loop *ColLoop = nullptr;
+ Loop *InnerLoop = nullptr;
+ if (LI) {
+ RowLoop = LI->AllocateLoop();
+ ColLoop = LI->AllocateLoop();
+ InnerLoop = LI->AllocateLoop();
+ ColLoop->addChildLoop(InnerLoop);
+ RowLoop->addChildLoop(ColLoop);
+ if (Loop *ParentL = LI->getLoopFor(Start))
+ ParentL->addChildLoop(RowLoop);
+ else
+ LI->addTopLevelLoop(RowLoop);
+ }
- BasicBlock *RowBody =
- createLoop(Start, End, Row, B.getInt16(1), "tiledpbssd.scalarize.rows", B,
- DTU, RowLoop, LI);
+ BasicBlock *RowBody = createLoop(Start, End, Row, B.getInt16(1),
+ "tiledpbssd.scalarize.rows", B, RowLoop);
BasicBlock *RowLatch = RowBody->getSingleSuccessor();
- BasicBlock *ColBody =
- createLoop(RowBody, RowLatch, Col, B.getInt16(1),
- "tiledpbssd.scalarize.cols", B, DTU, ColLoop, LI);
+ BasicBlock *ColBody = createLoop(RowBody, RowLatch, Col, B.getInt16(1),
+ "tiledpbssd.scalarize.cols", B, ColLoop);
BasicBlock *ColLoopLatch = ColBody->getSingleSuccessor();
B.SetInsertPoint(ColBody->getTerminator());
BasicBlock *InnerBody =
createLoop(ColBody, ColLoopLatch, K, B.getInt16(1),
- "tiledpbssd.scalarize.inner", B, DTU, InnerLoop, LI);
+ "tiledpbssd.scalarize.inner", B, InnerLoop);
BasicBlock *ColLoopHeader = ColBody->getSinglePredecessor();
BasicBlock *RowLoopHeader = RowBody->getSinglePredecessor();
@@ -324,30 +355,11 @@ static Value *createTileDPBSSDLoops(BasicBlock *Start, BasicBlock *End,
return NewVecD;
}
-namespace {
-class X86LowerAMXIntrinsics {
- Function &Func;
-
-public:
- X86LowerAMXIntrinsics(Function &F, DominatorTree *DT, LoopInfo *LI)
- : Func(F), DT(DT), LI(LI) {}
- bool visit();
-
-private:
- DominatorTree *DT;
- LoopInfo *LI;
- template <bool IsTileLoad>
- bool lowerTileLoadStore(Instruction *TileLoadStore);
- bool lowerTileDPBSSD(Instruction *TileDPBSSD);
- bool lowerTileZero(Instruction *TileZero);
-};
-
bool X86LowerAMXIntrinsics::lowerTileDPBSSD(Instruction *TileDPBSSD) {
Value *M, *N, *K, *C, *A, *B;
match(TileDPBSSD, m_Intrinsic<Intrinsic::x86_tdpbssd_internal>(
m_Value(M), m_Value(N), m_Value(K), m_Value(C),
m_Value(A), m_Value(B)));
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
Instruction *InsertI = TileDPBSSD;
IRBuilder<> PreBuilder(TileDPBSSD);
PreBuilder.SetInsertPoint(TileDPBSSD);
@@ -358,10 +370,10 @@ bool X86LowerAMXIntrinsics::lowerTileDPBSSD(Instruction *TileDPBSSD) {
Value *KDWord = PreBuilder.CreateLShr(K, PreBuilder.getInt16(2));
BasicBlock *Start = InsertI->getParent();
BasicBlock *End =
- SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
+ SplitBlock(InsertI->getParent(), InsertI, &DTU, LI, nullptr, "continue");
IRBuilder<> Builder(TileDPBSSD);
- Value *ResVec = createTileDPBSSDLoops(Start, End, Builder, DTU, *LI, M,
- NDWord, KDWord, C, A, B);
+ Value *ResVec =
+ createTileDPBSSDLoops(Start, End, Builder, M, NDWord, KDWord, C, A, B);
// we cannot assume there always be bitcast after tiledpbssd. So we need to
// insert one bitcast as required
Builder.SetInsertPoint(End->getFirstNonPHI());
@@ -394,7 +406,6 @@ bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) {
m_Value(M), m_Value(N), m_Value(Ptr),
m_Value(Stride), m_Value(Tile)));
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
Instruction *InsertI = TileLoadStore;
IRBuilder<> PreBuilder(TileLoadStore);
PreBuilder.SetInsertPoint(TileLoadStore);
@@ -402,10 +413,10 @@ bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) {
Value *StrideDWord = PreBuilder.CreateLShr(Stride, PreBuilder.getInt64(2));
BasicBlock *Start = InsertI->getParent();
BasicBlock *End =
- SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
+ SplitBlock(InsertI->getParent(), InsertI, &DTU, LI, nullptr, "continue");
IRBuilder<> Builder(TileLoadStore);
Value *ResVec = createTileLoadStoreLoops<IsTileLoad>(
- Start, End, Builder, DTU, *LI, M, NDWord, Ptr, StrideDWord,
+ Start, End, Builder, M, NDWord, Ptr, StrideDWord,
IsTileLoad ? nullptr : Tile);
if (IsTileLoad) {
// we cannot assume there always be bitcast after tileload. So we need to
@@ -505,18 +516,19 @@ class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
TM->getOptLevel() != CodeGenOpt::None)
return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- X86LowerAMXIntrinsics LAT(F, &DT, &LI);
+ X86LowerAMXIntrinsics LAT(F, DTU, LI);
return LAT.visit();
}
StringRef getPassName() const override { return "Lower AMX intrinsics"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
@@ -528,8 +540,6 @@ static const char PassName[] = "Lower AMX intrinsics";
char X86LowerAMXIntrinsicsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
false, false)
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 2e1cbacf4584..e5b3584a0339 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -18,8 +18,6 @@
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand Atomic instructions
-; CHECK-NEXT: Dominator Tree Construction
-; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lower AMX intrinsics
; CHECK-NEXT: Lower AMX type for load/store
; CHECK-NEXT: Module Verifier
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 0f92e5a7be06..9df12b7a3fd3 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -24,12 +24,12 @@
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Expand Atomic instructions
-; CHECK-NEXT: Dominator Tree Construction
-; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lower AMX intrinsics
; CHECK-NEXT: Lower AMX type for load/store
; CHECK-NEXT: Module Verifier
+; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Loop Pass Manager
More information about the llvm-commits
mailing list