[llvm] CodeGen/AMDGPU: Allow 3-address conversion of bundled instructions (PR #166213)
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 10:59:19 PST 2025
https://github.com/nhaehnle updated https://github.com/llvm/llvm-project/pull/166213
>From 909e8a0bbdce25aa05d7efee5f87b4f83e962403 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle at amd.com>
Date: Tue, 7 Oct 2025 12:17:02 -0700
Subject: [PATCH] CodeGen/AMDGPU: Allow 3-address conversion of bundled
instructions
This is in preparation for future changes in AMDGPU that will make more
substantial use of bundles pre-RA. For now, simply test this with
degenerate (single-instruction) bundles.
commit-id:4a30cb78
---
.../lib/CodeGen/TwoAddressInstructionPass.cpp | 54 ++++++++++--------
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 56 +++++++++++++++++--
llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir | 9 ++-
3 files changed, 87 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1f816b94cf56b..7056ced5385ed 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -794,29 +794,34 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr(
if (!NewMI)
return false;
- LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
- LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
-
- // If the old instruction is debug value tracked, an update is required.
- if (auto OldInstrNum = mi->peekDebugInstrNum()) {
- assert(mi->getNumExplicitDefs() == 1);
- assert(NewMI->getNumExplicitDefs() == 1);
-
- // Find the old and new def location.
- unsigned OldIdx = mi->defs().begin()->getOperandNo();
- unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
-
- // Record that one def has been replaced by the other.
- unsigned NewInstrNum = NewMI->getDebugInstrNum();
- MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
- std::make_pair(NewInstrNum, NewIdx));
- }
-
- MBB->erase(mi); // Nuke the old inst.
-
for (MachineInstr &MI : MIS)
DistanceMap.insert(std::make_pair(&MI, Dist++));
- Dist--;
+
+ if (&*mi == NewMI) {
+ LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi);
+ } else {
+ LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+
+ // If the old instruction is debug value tracked, an update is required.
+ if (auto OldInstrNum = mi->peekDebugInstrNum()) {
+ assert(mi->getNumExplicitDefs() == 1);
+ assert(NewMI->getNumExplicitDefs() == 1);
+
+ // Find the old and new def location.
+ unsigned OldIdx = mi->defs().begin()->getOperandNo();
+ unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
+
+ // Record that one def has been replaced by the other.
+ unsigned NewInstrNum = NewMI->getDebugInstrNum();
+ MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
+ std::make_pair(NewInstrNum, NewIdx));
+ }
+
+ MBB->erase(mi); // Nuke the old inst.
+ Dist--;
+ }
+
mi = NewMI;
nmi = std::next(mi);
@@ -1329,6 +1334,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
+ // Give targets a chance to convert bundled instructions.
+ bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle);
+
// If the instruction is convertible to 3 Addr, instead
// of returning try 3 Addr transformation aggressively and
// use this variable to check later. Because it might be better.
@@ -1337,7 +1345,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
// addl %esi, %edi
// movl %edi, %eax
// ret
- if (Commuted && !MI.isConvertibleTo3Addr())
+ if (Commuted && !ConvertibleTo3Addr)
return false;
if (shouldOnlyCommute)
@@ -1357,7 +1365,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform(
regBKilled = isKilled(MI, regB, true);
}
- if (MI.isConvertibleTo3Addr()) {
+ if (ConvertibleTo3Addr) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21c2d7f5..031ed90e0ad15 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4044,10 +4044,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineInstr *CandidateMI = &MI;
+
+ if (MI.isBundle()) {
+ // This is a temporary placeholder for bundle handling that enables us to
+ // exercise the relevant code paths in the two-address instruction pass.
+ if (MI.getBundleSize() != 1)
+ return nullptr;
+ CandidateMI = MI.getNextNode();
+ }
+
ThreeAddressUpdates U;
- MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
+ MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
+ if (!NewMI)
+ return nullptr;
- if (NewMI) {
+ if (MI.isBundle()) {
+ CandidateMI->eraseFromBundle();
+
+ for (MachineOperand &MO : MI.all_defs()) {
+ if (MO.isTied())
+ MI.untieRegOperand(MO.getOperandNo());
+ }
+ } else {
updateLiveVariables(LV, MI, *NewMI);
if (LIS) {
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
@@ -4088,7 +4107,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(DefReg).AliveBlocks.clear();
}
- if (LIS) {
+ if (MI.isBundle()) {
+ VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
+ if (!VRI.Reads && !VRI.Writes) {
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (MO.isReg() && MO.getReg() == DefReg) {
+ MI.removeOperand(MO.getOperandNo());
+ break;
+ }
+ }
+
+ if (LIS)
+ LIS->shrinkToUses(&LIS->getInterval(DefReg));
+ }
+ } else if (LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
// We cannot delete the original instruction here, so hack out the use
@@ -4103,11 +4135,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
}
}
+ if (MI.isBundle()) {
+ VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg);
+ if (!VRI.Reads && !VRI.Writes) {
+ for (MachineOperand &MIOp : MI.uses()) {
+ if (MIOp.isReg() && MIOp.getReg() == DefReg) {
+ MIOp.setIsUndef(true);
+ MIOp.setReg(DummyReg);
+ }
+ }
+ }
+
+ auto MO = MachineOperand::CreateReg(DummyReg, false);
+ MO.setIsUndef(true);
+ MI.addOperand(MO);
+ }
+
LIS->shrinkToUses(&DefLI);
}
}
- return NewMI;
+ return MI.isBundle() ? &MI : NewMI;
}
MachineInstr *
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
index 696962a88c8b8..8ae50d8e0e071 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir
@@ -31,7 +31,7 @@ body: |
...
-# This test is an example where conversion to three-address form would be beneficial.
+# This test is an example where conversion to three-address form is beneficial.
---
name: test_fmac_reuse_bundle
body: |
@@ -41,11 +41,10 @@ body: |
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec {
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec
+ ; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec {
+ ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: }
- ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec
%2:vgpr_32 = COPY $vgpr0
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
More information about the llvm-commits
mailing list