[llvm-branch-commits] [llvm] [AMDGPU] Make SIShrinkInstructions pass return valid changed state (PR #168833)
Vikram Hegde via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Nov 30 19:54:50 PST 2025
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/168833
>From 7b56e305b5631a4c8fdc21524fd33fc2a2b46b8b Mon Sep 17 00:00:00 2001
From: vikhegde <vikram.hegde at amd.com>
Date: Thu, 20 Nov 2025 11:18:25 +0530
Subject: [PATCH 1/2] [AMDGPU] Make SIShrinkInstructions pass return valid
changed state
---
.../Target/AMDGPU/SIShrinkInstructions.cpp | 95 ++++++++++++-------
1 file changed, 60 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 1b78f67e76d07..0c25092e38ccd 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -41,10 +41,10 @@ class SIShrinkInstructions {
bool isKUImmOperand(const MachineOperand &Src) const;
bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
- void shrinkScalarCompare(MachineInstr &MI) const;
- void shrinkMIMG(MachineInstr &MI) const;
- void shrinkMadFma(MachineInstr &MI) const;
- bool shrinkScalarLogicOp(MachineInstr &MI) const;
+ bool shrinkScalarCompare(MachineInstr &MI) const;
+ bool shrinkMIMG(MachineInstr &MI) const;
+ bool shrinkMadFma(MachineInstr &MI) const;
+ bool shrinkScalarLogicOp(MachineInstr &MI, bool &MoveIterator) const;
bool tryReplaceDeadSDST(MachineInstr &MI) const;
bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
Register Reg, unsigned SubReg) const;
@@ -241,27 +241,30 @@ void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
}
}
-void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if (!ST->hasSCmpK())
- return;
+ return false;
// cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
// get constants on the RHS.
- if (!MI.getOperand(0).isReg())
- TII->commuteInstruction(MI, false, 0, 1);
+ bool Changed = false;
+ if (!MI.getOperand(0).isReg()) {
+ if (TII->commuteInstruction(MI, false, 0, 1))
+ Changed = true;
+ }
// cmpk requires src0 to be a register
const MachineOperand &Src0 = MI.getOperand(0);
if (!Src0.isReg())
- return;
+ return Changed;
MachineOperand &Src1 = MI.getOperand(1);
if (!Src1.isImm())
- return;
+ return Changed;
int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
if (SOPKOpc == -1)
- return;
+ return Changed;
// eq/ne is special because the imm16 can be treated as signed or unsigned,
// and initially selected to the unsigned versions.
@@ -275,9 +278,10 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
}
MI.setDesc(TII->get(SOPKOpc));
+ Changed = true;
}
- return;
+ return Changed;
}
const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
@@ -287,14 +291,16 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if (!SIInstrInfo::sopkIsZext(SOPKOpc))
Src1.setImm(SignExtend64(Src1.getImm(), 32));
MI.setDesc(NewDesc);
+ Changed = true;
}
+ return Changed;
}
// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
-void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (!Info)
- return;
+ return false;
uint8_t NewEncoding;
switch (Info->MIMGEncoding) {
@@ -305,7 +311,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
NewEncoding = AMDGPU::MIMGEncGfx11Default;
break;
default:
- return;
+ return false;
}
int VAddr0Idx =
@@ -359,7 +365,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
} else if (Vgpr == NextVgpr) {
NextVgpr = Vgpr + Dwords;
} else {
- return;
+ return false;
}
if (!Op.isUndef())
@@ -369,7 +375,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
}
if (VgprBase + NewAddrDwords > 256)
- return;
+ return false;
// Further check for implicit tied operands - this may be present if TFE is
// enabled
@@ -408,21 +414,22 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
ToUntie - (EndVAddr - 1));
}
+ return true;
}
// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
-void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
// Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
// there is no reason to try to shrink them.
if (!ST->hasVOP3Literal())
- return;
+ return false;
// There is no advantage to doing this pre-RA.
if (!IsPostRA)
- return;
+ return false;
if (TII->hasAnyModifiersSet(MI))
- return;
+ return false;
const unsigned Opcode = MI.getOpcode();
MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
@@ -439,7 +446,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
Swap = true;
else
- return;
+ return false;
switch (Opcode) {
default:
@@ -477,7 +484,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
else if (Src0.isImm() && !TII->isInlineConstant(Src0))
Swap = true;
else
- return;
+ return false;
switch (Opcode) {
default:
@@ -509,10 +516,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
}
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
- return;
+ return false;
if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
- return;
+ return false;
if (Swap) {
// Swap Src0 and Src1 by building a new instruction.
@@ -527,14 +534,16 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
TII->removeModOperands(MI);
MI.setDesc(TII->get(NewOpcode));
}
+ return true;
}
/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
/// XNOR (as a ^ b == ~(a ^ ~b)).
-/// \returns true if the caller should continue the machine function iterator
-bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
+/// \param MoveIterator true if the caller should continue the machine function iterator.
+/// \return true if MI was modified.
+bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI, bool &MoveIterator) const {
unsigned Opc = MI.getOpcode();
const MachineOperand *Dest = &MI.getOperand(0);
MachineOperand *Src0 = &MI.getOperand(1);
@@ -580,6 +589,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+ MoveIterator = true;
return true;
}
@@ -598,6 +608,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
} else {
SrcImm->setImm(NewImm);
}
+ return true;
}
}
@@ -856,6 +867,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
IsPostRA = MF.getProperties().hasNoVRegs();
unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
+ bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I, Next;
@@ -879,6 +891,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
if (ModOpcode != 0) {
MI.setDesc(TII->get(ModOpcode));
Src.setImm(static_cast<int64_t>(ModImm));
+ Changed = true;
continue;
}
}
@@ -889,6 +902,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::COPY)) {
if (auto *NextMI = matchSwap(MI)) {
Next = NextMI->getIterator();
+ Changed = true;
continue;
}
}
@@ -901,8 +915,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MachineOperand *Src1 = &MI.getOperand(2);
if (!Src0->isReg() && Src1->isReg()) {
- if (TII->commuteInstruction(MI, false, 1, 2))
+ if (TII->commuteInstruction(MI, false, 1, 2)) {
std::swap(Src0, Src1);
+ Changed = true;
+ }
}
// FIXME: This could work better if hints worked with subregisters. If
@@ -922,13 +938,14 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
Src1->setImm(SignExtend64(Src1->getImm(), 32));
MI.setDesc(TII->get(Opc));
MI.tieOperands(0, 1);
+ Changed = true;
}
}
}
// Try to use s_cmpk_*
if (MI.isCompare() && TII->isSOPC(MI)) {
- shrinkScalarCompare(MI);
+ Changed |= shrinkScalarCompare(MI);
continue;
}
@@ -943,10 +960,12 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
if (isKImmOperand(Src)) {
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
Src.setImm(SignExtend64(Src.getImm(), 32));
+ Changed = true;
} else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
/*Scalar=*/true))) {
MI.setDesc(TII->get(ModOpc));
Src.setImm(static_cast<int64_t>(ModImm));
+ Changed = true;
}
}
@@ -957,13 +976,15 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
MI.getOpcode() == AMDGPU::S_OR_B32 ||
MI.getOpcode() == AMDGPU::S_XOR_B32) {
- if (shrinkScalarLogicOp(MI))
+ bool MoveIterator = false;
+ Changed |= shrinkScalarLogicOp(MI, MoveIterator);
+ if (MoveIterator)
continue;
}
if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&
ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
- shrinkMIMG(MI);
+ Changed |= shrinkMIMG(MI);
continue;
}
@@ -979,14 +1000,14 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
ST->hasFmaakFmamkF64Insts())) {
- shrinkMadFma(MI);
+ Changed |= shrinkMadFma(MI);
continue;
}
// If there is no chance we will shrink it and use VCC as sdst to get
// a 32 bit form try to replace dead sdst with NULL.
if (TII->isVOP3(MI.getOpcode())) {
- tryReplaceDeadSDST(MI);
+ Changed |= tryReplaceDeadSDST(MI);
if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
continue;
}
@@ -997,9 +1018,12 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
!TII->canShrink(MI, *MRI)) {
- tryReplaceDeadSDST(MI);
+ Changed |= tryReplaceDeadSDST(MI);
continue;
}
+
+ // Operands were commuted.
+ Changed = true;
}
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
@@ -1103,9 +1127,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
foldImmediates(*Inst32);
LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
+ Changed = true;
}
}
- return false;
+ return Changed;
}
bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
>From 68c9eb1dda228e73482f5ec67d2ca1d602af116b Mon Sep 17 00:00:00 2001
From: vikhegde <vikram.hegde at amd.com>
Date: Fri, 21 Nov 2025 11:16:19 +0530
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 0c25092e38ccd..2f4ed9d97eccd 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -541,9 +541,11 @@ bool SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
/// XNOR (as a ^ b == ~(a ^ ~b)).
-/// \param MoveIterator true if the caller should continue the machine function iterator.
+/// \param MoveIterator true if the caller should continue the machine function
+/// iterator.
/// \return true if MI was modified.
-bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI, bool &MoveIterator) const {
+bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI,
+ bool &MoveIterator) const {
unsigned Opc = MI.getOpcode();
const MachineOperand *Dest = &MI.getOperand(0);
MachineOperand *Src0 = &MI.getOperand(1);
@@ -1000,7 +1002,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
ST->hasFmaakFmamkF64Insts())) {
- Changed |= shrinkMadFma(MI);
+ Changed |= shrinkMadFma(MI);
continue;
}
More information about the llvm-branch-commits
mailing list