[llvm] [MachineSSAUpdater][AMDGPU] Add faster version of MachineSSAUpdater class. (PR #145722)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 05:42:36 PDT 2025
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/145722
>From f1d527aef6eaf334cf3e5c1aa0476c7b09722cb5 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at amd.com>
Date: Mon, 6 Oct 2025 14:10:31 +0000
Subject: [PATCH 1/6] [MachineSSAUpdater][AMDGPU] Add faster version of
MachineSSAUpdater class.
This is a port of SSAUpdaterBulk to machine IR minus "bulk" part. Phi
deduplication and simplification are not yet implemented but can be added
if needed.
When used in AMDGPU to replace MachineSSAUpdater for i1 copy lowering, it
reduced compilation time from 417 to 180 seconds on a large test case (56%
improvement).
---
.../llvm/CodeGen/MachineIDFSSAUpdater.h | 77 ++++++++
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp | 181 ++++++++++++++++++
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 41 ++--
...ergence-divergent-i1-used-outside-loop.mir | 22 +--
.../GlobalISel/divergence-structurizer.mir | 6 +-
llvm/test/CodeGen/AMDGPU/si-i1-copies.mir | 2 +-
...lower-i1-copies-order-of-phi-incomings.mir | 6 +-
8 files changed, 301 insertions(+), 35 deletions(-)
create mode 100644 llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
create mode 100644 llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
diff --git a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
new file mode 100644
index 0000000000000..8a8ae3a3e7ca7
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
@@ -0,0 +1,77 @@
+//===- MachineIDFSSAUpdater.h - Unstructured SSA Update Tool ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MachineIDFSSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H
+#define LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class MachineDominatorTree;
+class MachineInstrBuilder;
+class MachineBasicBlock;
+
+class MachineIDFSSAUpdater {
+ struct BBValueInfo {
+ Register LiveInValue;
+ Register LiveOutValue;
+ };
+
+ MachineDominatorTree &DT;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ MachineRegisterInfo::VRegAttrs RegAttrs;
+
+ SmallVector<std::pair<MachineBasicBlock *, Register>, 4> Defines;
+ SmallVector<MachineBasicBlock *, 4> UseBlocks;
+ DenseMap<MachineBasicBlock *, BBValueInfo> BBInfos;
+
+ MachineInstrBuilder createInst(unsigned Opc, MachineBasicBlock *BB,
+ MachineBasicBlock::iterator I);
+
+ // IsLiveOut indicates whether we are computing live-out values (true) or
+ // live-in values (false).
+ Register computeValue(MachineBasicBlock *BB, bool IsLiveOut);
+
+public:
+ MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF,
+ const MachineRegisterInfo::VRegAttrs &RegAttr)
+ : DT(DT), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
+ RegAttrs(RegAttr) {}
+
+ MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF,
+ Register Reg)
+ : MachineIDFSSAUpdater(DT, MF, MF.getRegInfo().getVRegAttrs(Reg)) {}
+
+ /// Indicate that a rewritten value is available in the specified block
+ /// with the specified value. Must be called before invoking Calculate().
+ void addAvailableValue(MachineBasicBlock *BB, Register V) {
+ Defines.emplace_back(BB, V);
+ }
+
+ /// Record a basic block that uses the value. This method should be called for
+ /// every basic block where the value will be used. Must be called before
+ /// invoking Calculate().
+ void addUseBlock(MachineBasicBlock *BB) { UseBlocks.push_back(BB); }
+
+ /// Calculate and insert necessary PHI nodes for SSA form.
+ /// Must be called after registering all definitions and uses.
+ void calculate();
+
+ /// See SSAUpdater::GetValueInMiddleOfBlock description.
+ Register getValueInMiddleOfBlock(MachineBasicBlock *BB);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f8f9bbba53e43..1491f3e121752 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -126,6 +126,7 @@ add_llvm_component_library(LLVMCodeGen
MachineFunctionPass.cpp
MachineFunctionPrinterPass.cpp
MachineFunctionSplitter.cpp
+ MachineIDFSSAUpdater.cpp
MachineInstrBundle.cpp
MachineInstr.cpp
MachineLateInstrsCleanup.cpp
diff --git a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
new file mode 100644
index 0000000000000..0eefdf373dc60
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
@@ -0,0 +1,181 @@
+//===- MachineIDFSSAUpdater.cpp - Unstructured SSA Update Tool ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineIDFSSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineIDFSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+template <bool IsPostDom>
+class MachineIDFCalculator final
+ : public IDFCalculatorBase<MachineBasicBlock, IsPostDom> {
+public:
+ using IDFCalculatorBase =
+ typename llvm::IDFCalculatorBase<MachineBasicBlock, IsPostDom>;
+ using ChildrenGetterTy = typename IDFCalculatorBase::ChildrenGetterTy;
+
+ MachineIDFCalculator(DominatorTreeBase<MachineBasicBlock, IsPostDom> &DT)
+ : IDFCalculatorBase(DT) {}
+};
+
+using MachineForwardIDFCalculator = MachineIDFCalculator<false>;
+using MachineReverseIDFCalculator = MachineIDFCalculator<true>;
+
+} // namespace llvm
+
+using namespace llvm;
+
+/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks.
+/// This is basically a subgraph limited by DefBlocks and UsingBlocks.
+static void
+computeLiveInBlocks(const SmallPtrSetImpl<MachineBasicBlock *> &UsingBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<MachineBasicBlock *> &LiveInBlocks) {
+ // To determine liveness, we must iterate through the predecessors of blocks
+ // where the def is live. Blocks are added to the worklist if we need to
+ // check their predecessors. Start with all the using blocks.
+ SmallVector<MachineBasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(),
+ UsingBlocks.end());
+
+ // Now that we have a set of blocks where the phi is live-in, recursively add
+ // their predecessors until we find the full region the value is live.
+ while (!LiveInBlockWorklist.empty()) {
+ MachineBasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+
+ // The block really is live in here, insert it into the set. If already in
+ // the set, then it has already been processed.
+ if (!LiveInBlocks.insert(BB).second)
+ continue;
+
+ // Since the value is live into BB, it is either defined in a predecessor or
+ // live into it to. Add the preds to the worklist unless they are a
+ // defining block.
+ for (MachineBasicBlock *P : BB->predecessors()) {
+ // The value is not live into a predecessor if it defines the value.
+ if (DefBlocks.count(P))
+ continue;
+
+ // Otherwise it is, add to the worklist.
+ LiveInBlockWorklist.push_back(P);
+ }
+ }
+}
+
+MachineInstrBuilder
+MachineIDFSSAUpdater::createInst(unsigned Opc, MachineBasicBlock *BB,
+ MachineBasicBlock::iterator I) {
+ return BuildMI(*BB, I, DebugLoc(), TII.get(Opc),
+ MRI.createVirtualRegister(RegAttrs));
+}
+
+// IsLiveOut indicates whether we are computing live-out values (true) or
+// live-in values (false).
+Register MachineIDFSSAUpdater::computeValue(MachineBasicBlock *BB,
+ bool IsLiveOut) {
+ BBValueInfo *BBInfo = &BBInfos[BB];
+
+ if (IsLiveOut && BBInfo->LiveOutValue)
+ return BBInfo->LiveOutValue;
+
+ if (BBInfo->LiveInValue)
+ return BBInfo->LiveInValue;
+
+ SmallVector<BBValueInfo *, 4> DomPath = {BBInfo};
+ MachineBasicBlock *DomBB = BB, *TopDomBB = BB;
+ Register V;
+
+ while (DT.isReachableFromEntry(DomBB) && !DomBB->pred_empty() &&
+ (DomBB = DT.getNode(DomBB)->getIDom()->getBlock())) {
+ BBInfo = &BBInfos[DomBB];
+ if (BBInfo->LiveOutValue) {
+ V = BBInfo->LiveOutValue;
+ break;
+ }
+ if (BBInfo->LiveInValue) {
+ V = BBInfo->LiveInValue;
+ break;
+ }
+ TopDomBB = DomBB;
+ DomPath.emplace_back(BBInfo);
+ }
+
+ if (!V) {
+ V = createInst(TargetOpcode::IMPLICIT_DEF, TopDomBB,
+ TopDomBB->getFirstNonPHI())
+ .getReg(0);
+ }
+
+ for (BBValueInfo *BBInfo : DomPath) {
+ // Loop above can insert new entries into the BBInfos map: assume the
+ // map shouldn't grow as the caller should have been allocated enough
+ // buckets, see [1].
+ BBInfo->LiveInValue = V;
+ }
+
+ return V;
+}
+
+/// Perform all the necessary updates, including new PHI-nodes insertion and the
+/// requested uses update.
+void MachineIDFSSAUpdater::calculate() {
+ MachineForwardIDFCalculator IDF(DT);
+
+ SmallPtrSet<MachineBasicBlock *, 2> DefBlocks;
+ for (auto [BB, V] : Defines)
+ DefBlocks.insert(BB);
+ IDF.setDefiningBlocks(DefBlocks);
+
+ SmallPtrSet<MachineBasicBlock *, 2> UsingBlocks(UseBlocks.begin(),
+ UseBlocks.end());
+ SmallVector<MachineBasicBlock *, 4> IDFBlocks;
+ SmallPtrSet<MachineBasicBlock *, 4> LiveInBlocks;
+ computeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks);
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.calculate(IDFBlocks);
+
+ // Reserve sufficient buckets to prevent map growth. [1]
+ BBInfos.reserve(LiveInBlocks.size() + DefBlocks.size());
+
+ for (auto [BB, V] : Defines)
+ BBInfos[BB].LiveOutValue = V;
+
+ for (auto *FrontierBB : IDFBlocks) {
+ Register NewVR =
+ createInst(TargetOpcode::PHI, FrontierBB, FrontierBB->begin())
+ .getReg(0);
+ BBInfos[FrontierBB].LiveInValue = NewVR;
+ }
+
+ for (auto *BB : IDFBlocks) {
+ auto *PHI = &BB->front();
+ assert(PHI->isPHI());
+ MachineInstrBuilder MIB(*BB->getParent(), PHI);
+ for (MachineBasicBlock *Pred : BB->predecessors())
+ MIB.addReg(computeValue(Pred, /*IsLiveOut=*/true)).addMBB(Pred);
+ }
+}
+
+Register MachineIDFSSAUpdater::getValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ return computeValue(BB, /*IsLiveOut=*/false);
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 96131bd591a17..ba7781443711b 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -23,7 +23,7 @@
#include "SILowerI1Copies.h"
#include "AMDGPU.h"
-#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineIDFSSAUpdater.h"
#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "si-i1-copies"
@@ -275,7 +275,7 @@ class LoopFinder {
/// Add undef values dominating the loop and the optionally given additional
/// blocks, so that the SSA updater doesn't have to search all the way to the
/// function entry.
- void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
+ void addLoopEntries(unsigned LoopLevel, MachineIDFSSAUpdater &SSAUpdater,
MachineRegisterInfo &MRI,
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs,
ArrayRef<Incoming> Incomings = {}) {
@@ -286,14 +286,14 @@ class LoopFinder {
Dom = DT.findNearestCommonDominator(Dom, Incoming.Block);
if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
- SSAUpdater.AddAvailableValue(
+ SSAUpdater.addAvailableValue(
Dom, insertUndefLaneMask(Dom, &MRI, LaneMaskRegAttrs));
} else {
// The dominator is part of the loop or the given blocks, so add the
// undef value to unreachable predecessors instead.
for (MachineBasicBlock *Pred : Dom->predecessors()) {
if (!inLoopLevel(*Pred, LoopLevel, Incomings))
- SSAUpdater.AddAvailableValue(
+ SSAUpdater.addAvailableValue(
Pred, insertUndefLaneMask(Pred, &MRI, LaneMaskRegAttrs));
}
}
@@ -469,7 +469,6 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
}
bool PhiLoweringHelper::lowerPhis() {
- MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
PhiIncomingAnalysis PIA(*PDT, TII);
SmallVector<MachineInstr *, 4> Vreg1Phis;
@@ -524,22 +523,26 @@ bool PhiLoweringHelper::lowerPhis() {
// in practice.
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
- SSAUpdater.Initialize(DstReg);
+ MachineIDFSSAUpdater SSAUpdater(*DT, *MF, DstReg);
+ SSAUpdater.addUseBlock(&MBB);
if (FoundLoopLevel) {
LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs,
Incomings);
for (auto &Incoming : Incomings) {
+ SSAUpdater.addUseBlock(Incoming.Block);
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- SSAUpdater.AddAvailableValue(Incoming.Block, Incoming.UpdatedReg);
+ SSAUpdater.addAvailableValue(Incoming.Block, Incoming.UpdatedReg);
}
+ SSAUpdater.calculate();
+
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
buildMergeLaneMasks(
IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+ SSAUpdater.getValueInMiddleOfBlock(&IMBB), Incoming.Reg);
}
} else {
// The phi is not observed from outside a loop. Use a more accurate
@@ -547,20 +550,23 @@ bool PhiLoweringHelper::lowerPhis() {
PIA.analyze(MBB, Incomings);
for (MachineBasicBlock *MBB : PIA.predecessors())
- SSAUpdater.AddAvailableValue(
+ SSAUpdater.addAvailableValue(
MBB, insertUndefLaneMask(MBB, MRI, LaneMaskRegAttrs));
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
constrainAsLaneMask(Incoming);
- SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
+ SSAUpdater.addAvailableValue(&IMBB, Incoming.Reg);
} else {
+ SSAUpdater.addUseBlock(&IMBB);
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- SSAUpdater.AddAvailableValue(&IMBB, Incoming.UpdatedReg);
+ SSAUpdater.addAvailableValue(&IMBB, Incoming.UpdatedReg);
}
}
+ SSAUpdater.calculate();
+
for (auto &Incoming : Incomings) {
if (!Incoming.UpdatedReg.isValid())
continue;
@@ -568,11 +574,11 @@ bool PhiLoweringHelper::lowerPhis() {
MachineBasicBlock &IMBB = *Incoming.Block;
buildMergeLaneMasks(
IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+ SSAUpdater.getValueInMiddleOfBlock(&IMBB), Incoming.Reg);
}
}
- Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
+ Register NewReg = SSAUpdater.getValueInMiddleOfBlock(&MBB);
if (NewReg != DstReg) {
replaceDstReg(NewReg, DstReg, &MBB);
MI->eraseFromParent();
@@ -585,7 +591,6 @@ bool PhiLoweringHelper::lowerPhis() {
bool Vreg1LoweringHelper::lowerCopiesToI1() {
bool Changed = false;
- MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
SmallVector<MachineInstr *, 4> DeadCopies;
@@ -643,12 +648,14 @@ bool Vreg1LoweringHelper::lowerCopiesToI1() {
PDT->findNearestCommonDominator(DomBlocks);
unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
if (FoundLoopLevel) {
- SSAUpdater.Initialize(DstReg);
- SSAUpdater.AddAvailableValue(&MBB, DstReg);
+ MachineIDFSSAUpdater SSAUpdater(*DT, *MF, DstReg);
+ SSAUpdater.addUseBlock(&MBB);
+ SSAUpdater.addAvailableValue(&MBB, DstReg);
LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
+ SSAUpdater.calculate();
buildMergeLaneMasks(MBB, MI, DL, DstReg,
- SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
+ SSAUpdater.getValueInMiddleOfBlock(&MBB), SrcReg);
DeadCopies.push_back(&MI);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
index e800cb2e24a7a..cfef60c66d6a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
@@ -101,19 +101,19 @@ body: |
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1)
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
index b76d421c16172..994640e524fc9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
@@ -1026,10 +1026,10 @@ body: |
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR1]]
; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[OR]], [[C4]]
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %47(s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %53(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %54(s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.1
@@ -1195,7 +1195,7 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %52(s1), %bb.6, %56(s1), %bb.7
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %52(s1), %bb.6, %57(s1), %bb.7
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %41(s1), %bb.6, %40(s1), %bb.7
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
diff --git a/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir b/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir
index fb052e28e2c20..9618abe1770bf 100644
--- a/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir
@@ -10,10 +10,10 @@ body: |
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
- ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[DEF]]
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
index ecbd47a9e8d0d..9c27cb3017e95 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
@@ -20,21 +20,21 @@ body: |
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr4
; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
; GCN-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
- ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $exec_lo
; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: {{ $}}
>From 6c1f128e3fd7aba1e81a6df3e32afbbe0c0999c8 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at amd.com>
Date: Fri, 10 Oct 2025 13:56:58 +0000
Subject: [PATCH 2/6] per-review fixes.
---
llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
index 0eefdf373dc60..eab4b7798e974 100644
--- a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
@@ -6,7 +6,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the MachineIDFSSAUpdater class.
+// This file implements the MachineIDFSSAUpdater class, which provides an
+// efficient SSA form maintenance utility for machine-level IR. It uses the
+// iterated dominance frontier (IDF) algorithm via MachineForwardIDFCalculator
+// to compute phi-function placement, offering better performance than the
+// incremental MachineSSAUpdater approach. The updater requires a single call
+// to calculate() after all definitions and uses have been registered.
//
//===----------------------------------------------------------------------===//
@@ -160,14 +165,14 @@ void MachineIDFSSAUpdater::calculate() {
for (auto [BB, V] : Defines)
BBInfos[BB].LiveOutValue = V;
- for (auto *FrontierBB : IDFBlocks) {
+ for (MachineBasicBlock *FrontierBB : IDFBlocks) {
Register NewVR =
createInst(TargetOpcode::PHI, FrontierBB, FrontierBB->begin())
.getReg(0);
BBInfos[FrontierBB].LiveInValue = NewVR;
}
- for (auto *BB : IDFBlocks) {
+ for (MachineBasicBlock *BB : IDFBlocks) {
auto *PHI = &BB->front();
assert(PHI->isPHI());
MachineInstrBuilder MIB(*BB->getParent(), PHI);
>From 72b9a0ac2284590f9e02c196718cc15e51dd1dcf Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at amd.com>
Date: Wed, 15 Oct 2025 11:57:27 +0000
Subject: [PATCH 3/6] Add constructor parameter to switch between
G_IMPLICIT_DEF and IMPLICIT_DEF.
---
llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h | 11 +++++++----
llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp | 5 +++--
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
index 8a8ae3a3e7ca7..5623e98fce55f 100644
--- a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
@@ -27,6 +27,7 @@ class MachineIDFSSAUpdater {
Register LiveOutValue;
};
+ const bool RunOnGenericRegs;
MachineDominatorTree &DT;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
@@ -45,13 +46,15 @@ class MachineIDFSSAUpdater {
public:
MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF,
- const MachineRegisterInfo::VRegAttrs &RegAttr)
+ const MachineRegisterInfo::VRegAttrs &RegAttr,
+ bool RunOnGenericRegs = false)
: DT(DT), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
- RegAttrs(RegAttr) {}
+ RegAttrs(RegAttr), RunOnGenericRegs(RunOnGenericRegs) {}
MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF,
- Register Reg)
- : MachineIDFSSAUpdater(DT, MF, MF.getRegInfo().getVRegAttrs(Reg)) {}
+ Register Reg, bool RunOnGenericRegs = false)
+ : MachineIDFSSAUpdater(DT, MF, MF.getRegInfo().getVRegAttrs(Reg),
+ RunOnGenericRegs) {}
/// Indicate that a rewritten value is available in the specified block
/// with the specified value. Must be called before invoking Calculate().
diff --git a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
index eab4b7798e974..b184579cb0740 100644
--- a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
@@ -126,8 +126,9 @@ Register MachineIDFSSAUpdater::computeValue(MachineBasicBlock *BB,
}
if (!V) {
- V = createInst(TargetOpcode::IMPLICIT_DEF, TopDomBB,
- TopDomBB->getFirstNonPHI())
+ V = createInst(RunOnGenericRegs ? TargetOpcode::G_IMPLICIT_DEF
+ : TargetOpcode::IMPLICIT_DEF,
+ TopDomBB, TopDomBB->getFirstNonPHI())
.getReg(0);
}
>From 33dac3385aa418b480345b56f00d8bcbb17c4af0 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at amd.com>
Date: Wed, 15 Oct 2025 16:46:44 +0000
Subject: [PATCH 4/6] fix member initialization order
---
llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
index 5623e98fce55f..7e46e94d3512a 100644
--- a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
@@ -27,11 +27,11 @@ class MachineIDFSSAUpdater {
Register LiveOutValue;
};
- const bool RunOnGenericRegs;
MachineDominatorTree &DT;
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
MachineRegisterInfo::VRegAttrs RegAttrs;
+ const bool RunOnGenericRegs;
SmallVector<std::pair<MachineBasicBlock *, Register>, 4> Defines;
SmallVector<MachineBasicBlock *, 4> UseBlocks;
>From 2e41b939d5b1bbbfe20df46ea3163771df4e11af Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at amd.com>
Date: Fri, 31 Oct 2025 12:23:51 +0000
Subject: [PATCH 5/6] header issues fixed
---
llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
index 7e46e94d3512a..632628e169fab 100644
--- a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
@@ -10,16 +10,20 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H
-#define LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H
+#ifndef LLVM_CODEGEN_MACHINEIDFSSAUPDATER_H
+#define LLVM_CODEGEN_MACHINEIDFSSAUPDATER_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
namespace llvm {
class MachineDominatorTree;
class MachineInstrBuilder;
class MachineBasicBlock;
+class TargetInstrInfo;
class MachineIDFSSAUpdater {
struct BBValueInfo {
@@ -77,4 +81,4 @@ class MachineIDFSSAUpdater {
} // end namespace llvm
-#endif // LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H
+#endif // LLVM_CODEGEN_MACHINEIDFSSAUPDATER_H
>From 08b5b4e2422515d6e04f8ab73ae202a277e3d83a Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at amd.com>
Date: Fri, 31 Oct 2025 12:24:50 +0000
Subject: [PATCH 6/6] Revert "Add constructor parameter to switch between
G_IMPLICIT_DEF and IMPLICIT_DEF."
This reverts commit 72b9a0ac2284590f9e02c196718cc15e51dd1dcf.
---
llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h | 11 ++++-------
llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp | 5 ++---
2 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
index 632628e169fab..80bb531cef988 100644
--- a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h
@@ -35,7 +35,6 @@ class MachineIDFSSAUpdater {
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
MachineRegisterInfo::VRegAttrs RegAttrs;
- const bool RunOnGenericRegs;
SmallVector<std::pair<MachineBasicBlock *, Register>, 4> Defines;
SmallVector<MachineBasicBlock *, 4> UseBlocks;
@@ -50,15 +49,13 @@ class MachineIDFSSAUpdater {
public:
MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF,
- const MachineRegisterInfo::VRegAttrs &RegAttr,
- bool RunOnGenericRegs = false)
+ const MachineRegisterInfo::VRegAttrs &RegAttr)
: DT(DT), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
- RegAttrs(RegAttr), RunOnGenericRegs(RunOnGenericRegs) {}
+ RegAttrs(RegAttr) {}
MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF,
- Register Reg, bool RunOnGenericRegs = false)
- : MachineIDFSSAUpdater(DT, MF, MF.getRegInfo().getVRegAttrs(Reg),
- RunOnGenericRegs) {}
+ Register Reg)
+ : MachineIDFSSAUpdater(DT, MF, MF.getRegInfo().getVRegAttrs(Reg)) {}
/// Indicate that a rewritten value is available in the specified block
/// with the specified value. Must be called before invoking Calculate().
diff --git a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
index b184579cb0740..eab4b7798e974 100644
--- a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp
@@ -126,9 +126,8 @@ Register MachineIDFSSAUpdater::computeValue(MachineBasicBlock *BB,
}
if (!V) {
- V = createInst(RunOnGenericRegs ? TargetOpcode::G_IMPLICIT_DEF
- : TargetOpcode::IMPLICIT_DEF,
- TopDomBB, TopDomBB->getFirstNonPHI())
+ V = createInst(TargetOpcode::IMPLICIT_DEF, TopDomBB,
+ TopDomBB->getFirstNonPHI())
.getReg(0);
}
More information about the llvm-commits
mailing list