[llvm] 7dcd698 - [AMDGPU] Make SIShrinkInstructions pass return valid changed state (#168833)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 25 18:45:28 PST 2025
Author: Vikram Hegde
Date: 2025-12-26T08:15:23+05:30
New Revision: 7dcd69880102cda2fe1a43f3356abb0ec41f2dc4
URL: https://github.com/llvm/llvm-project/commit/7dcd69880102cda2fe1a43f3356abb0ec41f2dc4
DIFF: https://github.com/llvm/llvm-project/commit/7dcd69880102cda2fe1a43f3356abb0ec41f2dc4.diff
LOG: [AMDGPU] Make SIShrinkInstructions pass return valid changed state (#168833)
The SIShrinkInstructions run() method currently returns "false"
unconditionally. This change makes it return the actual changed state.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 1b78f67e76d07..57f1096579669 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -27,6 +27,8 @@ using namespace llvm;
namespace {
+enum ChangeKind { None, UpdateHint, UpdateInst };
+
class SIShrinkInstructions {
MachineFunction *MF;
MachineRegisterInfo *MRI;
@@ -41,10 +43,10 @@ class SIShrinkInstructions {
bool isKUImmOperand(const MachineOperand &Src) const;
bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
- void shrinkScalarCompare(MachineInstr &MI) const;
- void shrinkMIMG(MachineInstr &MI) const;
- void shrinkMadFma(MachineInstr &MI) const;
- bool shrinkScalarLogicOp(MachineInstr &MI) const;
+ bool shrinkScalarCompare(MachineInstr &MI) const;
+ bool shrinkMIMG(MachineInstr &MI) const;
+ bool shrinkMadFma(MachineInstr &MI) const;
+ ChangeKind shrinkScalarLogicOp(MachineInstr &MI) const;
bool tryReplaceDeadSDST(MachineInstr &MI) const;
bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
Register Reg, unsigned SubReg) const;
@@ -241,27 +243,30 @@ void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
}
}
-void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if (!ST->hasSCmpK())
- return;
+ return false;
// cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
// get constants on the RHS.
- if (!MI.getOperand(0).isReg())
- TII->commuteInstruction(MI, false, 0, 1);
+ bool Changed = false;
+ if (!MI.getOperand(0).isReg()) {
+ if (TII->commuteInstruction(MI, false, 0, 1))
+ Changed = true;
+ }
// cmpk requires src0 to be a register
const MachineOperand &Src0 = MI.getOperand(0);
if (!Src0.isReg())
- return;
+ return Changed;
MachineOperand &Src1 = MI.getOperand(1);
if (!Src1.isImm())
- return;
+ return Changed;
int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
if (SOPKOpc == -1)
- return;
+ return Changed;
// eq/ne is special because the imm16 can be treated as signed or unsigned,
// and initially selected to the unsigned versions.
@@ -275,9 +280,10 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
}
MI.setDesc(TII->get(SOPKOpc));
+ Changed = true;
}
- return;
+ return Changed;
}
const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
@@ -287,14 +293,16 @@ void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
if (!SIInstrInfo::sopkIsZext(SOPKOpc))
Src1.setImm(SignExtend64(Src1.getImm(), 32));
MI.setDesc(NewDesc);
+ Changed = true;
}
+ return Changed;
}
// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
-void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (!Info)
- return;
+ return false;
uint8_t NewEncoding;
switch (Info->MIMGEncoding) {
@@ -305,7 +313,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
NewEncoding = AMDGPU::MIMGEncGfx11Default;
break;
default:
- return;
+ return false;
}
int VAddr0Idx =
@@ -359,7 +367,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
} else if (Vgpr == NextVgpr) {
NextVgpr = Vgpr + Dwords;
} else {
- return;
+ return false;
}
if (!Op.isUndef())
@@ -369,7 +377,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
}
if (VgprBase + NewAddrDwords > 256)
- return;
+ return false;
// Further check for implicit tied operands - this may be present if TFE is
// enabled
@@ -408,21 +416,22 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
ToUntie - (EndVAddr - 1));
}
+ return true;
}
// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
-void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
+bool SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
// Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
// there is no reason to try to shrink them.
if (!ST->hasVOP3Literal())
- return;
+ return false;
// There is no advantage to doing this pre-RA.
if (!IsPostRA)
- return;
+ return false;
if (TII->hasAnyModifiersSet(MI))
- return;
+ return false;
const unsigned Opcode = MI.getOpcode();
MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
@@ -439,7 +448,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
Swap = true;
else
- return;
+ return false;
switch (Opcode) {
default:
@@ -477,7 +486,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
else if (Src0.isImm() && !TII->isInlineConstant(Src0))
Swap = true;
else
- return;
+ return false;
switch (Opcode) {
default:
@@ -509,10 +518,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
}
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
- return;
+ return false;
if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
- return;
+ return false;
if (Swap) {
// Swap Src0 and Src1 by building a new instruction.
@@ -527,14 +536,17 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
TII->removeModOperands(MI);
MI.setDesc(TII->get(NewOpcode));
}
+ return true;
}
/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
/// XNOR (as a ^ b == ~(a ^ ~b)).
-/// \returns true if the caller should continue the machine function iterator
-bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
+/// \return ChangeKind::None if no changes were made.
+/// ChangeKind::UpdateHint if regalloc hints were updated.
+/// ChangeKind::UpdateInst if the instruction was modified.
+ChangeKind SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
const MachineOperand *Dest = &MI.getOperand(0);
MachineOperand *Src0 = &MI.getOperand(1);
@@ -544,7 +556,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
if (!SrcImm->isImm() ||
AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
- return false;
+ return ChangeKind::None;
uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
uint32_t NewImm = 0;
@@ -580,7 +592,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
- return true;
+ return ChangeKind::UpdateHint;
}
if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
@@ -598,10 +610,11 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
} else {
SrcImm->setImm(NewImm);
}
+ return ChangeKind::UpdateInst;
}
}
- return false;
+ return ChangeKind::None;
}
// This is the same as MachineInstr::readsRegister/modifiesRegister except
@@ -856,6 +869,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
IsPostRA = MF.getProperties().hasNoVRegs();
unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
+ bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I, Next;
@@ -879,6 +893,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
if (ModOpcode != 0) {
MI.setDesc(TII->get(ModOpcode));
Src.setImm(static_cast<int64_t>(ModImm));
+ Changed = true;
continue;
}
}
@@ -889,6 +904,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::COPY)) {
if (auto *NextMI = matchSwap(MI)) {
Next = NextMI->getIterator();
+ Changed = true;
continue;
}
}
@@ -901,8 +917,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MachineOperand *Src1 = &MI.getOperand(2);
if (!Src0->isReg() && Src1->isReg()) {
- if (TII->commuteInstruction(MI, false, 1, 2))
+ if (TII->commuteInstruction(MI, false, 1, 2)) {
std::swap(Src0, Src1);
+ Changed = true;
+ }
}
// FIXME: This could work better if hints worked with subregisters. If
@@ -922,13 +940,14 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
Src1->setImm(SignExtend64(Src1->getImm(), 32));
MI.setDesc(TII->get(Opc));
MI.tieOperands(0, 1);
+ Changed = true;
}
}
}
// Try to use s_cmpk_*
if (MI.isCompare() && TII->isSOPC(MI)) {
- shrinkScalarCompare(MI);
+ Changed |= shrinkScalarCompare(MI);
continue;
}
@@ -943,10 +962,12 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
if (isKImmOperand(Src)) {
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
Src.setImm(SignExtend64(Src.getImm(), 32));
+ Changed = true;
} else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
/*Scalar=*/true))) {
MI.setDesc(TII->get(ModOpc));
Src.setImm(static_cast<int64_t>(ModImm));
+ Changed = true;
}
}
@@ -957,13 +978,15 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
MI.getOpcode() == AMDGPU::S_OR_B32 ||
MI.getOpcode() == AMDGPU::S_XOR_B32) {
- if (shrinkScalarLogicOp(MI))
+ ChangeKind CK = shrinkScalarLogicOp(MI);
+ if (CK == ChangeKind::UpdateHint)
continue;
+ Changed |= (CK == ChangeKind::UpdateInst);
}
if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&
ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
- shrinkMIMG(MI);
+ Changed |= shrinkMIMG(MI);
continue;
}
@@ -979,14 +1002,14 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
(MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
ST->hasFmaakFmamkF64Insts())) {
- shrinkMadFma(MI);
+ Changed |= shrinkMadFma(MI);
continue;
}
// If there is no chance we will shrink it and use VCC as sdst to get
// a 32 bit form try to replace dead sdst with NULL.
if (TII->isVOP3(MI.getOpcode())) {
- tryReplaceDeadSDST(MI);
+ Changed |= tryReplaceDeadSDST(MI);
if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
continue;
}
@@ -997,9 +1020,12 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
!TII->canShrink(MI, *MRI)) {
- tryReplaceDeadSDST(MI);
+ Changed |= tryReplaceDeadSDST(MI);
continue;
}
+
+ // Operands were commuted.
+ Changed = true;
}
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
@@ -1103,9 +1129,10 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
foldImmediates(*Inst32);
LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
+ Changed = true;
}
}
- return false;
+ return Changed;
}
bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
More information about the llvm-commits
mailing list