[llvm] [X86] Avoid useless DomTree in flags copy lowering (PR #97628)
Alexis Engelke via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 4 03:05:23 PDT 2024
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/97628
>From b47f5cf9ef5337f9b4599b04366302b7caf9e861 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Wed, 3 Jul 2024 21:39:22 +0200
Subject: [PATCH 1/2] [X86] Avoid useless DomTree in flags copy lowering
Currently, flags copy lowering does two expensive things:
- It traverses the CFG in RPO, and
- It requires a dominator tree that is not preserved.
Most notably, it is the only machine dominator tree user at -O0.
Many functions have no flag copies to begin with, therefore, add an
early exit if EFLAGS has no COPY def.
The legacy pass manager has no way to dynamically decide whether an
analysis is required. Therefore, if there's a copy, get the dominator
tree from the pass manager, if it has one, otherwise, compute it.
These changes should make the pass very cheap for the common case.
---
llvm/lib/Target/X86/X86FlagsCopyLowering.cpp | 31 ++++++++++++++++++--
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index 394947bc65c89..d9ed1334d7376 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -128,7 +128,7 @@ FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
char X86FlagsCopyLoweringPass::ID = 0;
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -258,13 +258,38 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TII = Subtarget->getInstrInfo();
TRI = Subtarget->getRegisterInfo();
- MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
PromoteRC = &X86::GR8RegClass;
if (MF.empty())
// Nothing to do for a degenerate empty function...
return false;
+ bool HasCopies = false;
+ for (const MachineInstr &DefInst : MRI->def_instructions(X86::EFLAGS)) {
+ if (DefInst.getOpcode() == TargetOpcode::COPY) {
+ HasCopies = true;
+ break;
+ }
+ }
+
+ if (!HasCopies)
+ return false;
+
+ // We change the code, so we don't preserve the dominator tree anyway. If we
+ // got a valid MDT from the pass manager, use that, otherwise construct one
+ // now. This is an optimization that avoids unnecessary MDT construction for
+ // functions that have no flag copies.
+
+ auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ std::unique_ptr<MachineDominatorTree> OwnedMDT;
+ if (MDTWrapper) {
+ MDT = &MDTWrapper->getDomTree();
+ } else {
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(MF);
+ MDT = OwnedMDT.get();
+ }
+
// Collect the copies in RPO so that when there are chains where a copy is in
// turn copied again we visit the first one first. This ensures we can find
// viable locations for testing the original EFLAGS that dominate all the
@@ -688,7 +713,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
}
#endif
- return true;
+ return !Copies.empty();
}
/// Collect any conditions that have already been set in registers so that we
>From e2e7f63ef8a09718859e3d2428c1c53bcbfc0610 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Thu, 4 Jul 2024 10:04:42 +0000
Subject: [PATCH 2/2] Fix test and use llvm::none_of
---
llvm/lib/Target/X86/X86FlagsCopyLowering.cpp | 12 +++---------
llvm/test/CodeGen/X86/O0-pipeline.ll | 1 -
llvm/test/CodeGen/X86/opt-pipeline.ll | 1 -
3 files changed, 3 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index d9ed1334d7376..8283958c15015 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -264,15 +264,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// Nothing to do for a degenerate empty function...
return false;
- bool HasCopies = false;
- for (const MachineInstr &DefInst : MRI->def_instructions(X86::EFLAGS)) {
- if (DefInst.getOpcode() == TargetOpcode::COPY) {
- HasCopies = true;
- break;
- }
- }
-
- if (!HasCopies)
+ if (none_of(MRI->def_instructions(X86::EFLAGS), [](const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::COPY;
+ }))
return false;
// We change the code, so we don't preserve the dominator tree anyway. If we
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 40648adeb91cd..ca855cfd1ad44 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -44,7 +44,6 @@
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
; CHECK-NEXT: X86 speculative load hardening
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 DynAlloca Expander
; CHECK-NEXT: Fast Tile Register Preconfigure
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 15c496bfb7f66..9bee9d0de88ae 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -125,7 +125,6 @@
; CHECK-NEXT: X86 Optimize Call Frame
; CHECK-NEXT: X86 Avoid Store Forwarding Block
; CHECK-NEXT: X86 speculative load hardening
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 DynAlloca Expander
; CHECK-NEXT: MachineDominator Tree Construction
More information about the llvm-commits
mailing list