[llvm] [AMDGPU]:: Minor Unpacking Fixes. (PR #163992)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 09:48:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Akash Dutta (akadutta)
<details>
<summary>Changes</summary>
Optimize condition checks, Remove compilation overhead for unsupported archs
---
Full diff: https://github.com/llvm/llvm-project/pull/163992.diff
1 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp (+30-44)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 01a40c1e38817..d9c3d6b399225 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -47,9 +47,6 @@ class SIPreEmitPeephole {
const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
- // Check if the machine instruction being processed is a supported packed
- // instruction.
- bool isUnpackingSupportedInstr(MachineInstr &MI) const;
// Creates a list of packed instructions following an MFMA that are suitable
// for unpacking.
void collectUnpackingCandidates(MachineInstr &BeginMI,
@@ -454,23 +451,6 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}
-// If support is extended to new operations, add tests in
-// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
-bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
- if (!TII->isNeverCoissue(MI))
- return false;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AMDGPU::V_PK_ADD_F32:
- case AMDGPU::V_PK_MUL_F32:
- case AMDGPU::V_PK_FMA_F32:
- return true;
- default:
- return false;
- }
- llvm_unreachable("Fully covered switch");
-}
-
bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
unsigned OpCode = MI.getOpcode();
Register DstReg = MI.getOperand(0).getReg();
@@ -612,10 +592,12 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
for (auto I = std::next(BeginMI.getIterator()); I != E; ++I) {
MachineInstr &Instr = *I;
+ uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);
if (Instr.isMetaInstruction())
continue;
if ((Instr.isTerminator()) ||
- (TII->isNeverCoissue(Instr) && !isUnpackingSupportedInstr(Instr)) ||
+ (TII->isNeverCoissue(Instr) &&
+ (UnpackedOpCode == std::numeric_limits<uint16_t>::max())) ||
(SIInstrInfo::modifiesModeRegister(Instr) &&
Instr.modifiesRegister(AMDGPU::EXEC, TRI)))
return;
@@ -639,7 +621,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TRI->regsOverlap(MFMADef, InstrMO.getReg()))
return;
}
- if (!isUnpackingSupportedInstr(Instr))
+ if (UnpackedOpCode == std::numeric_limits<uint16_t>::max())
continue;
if (canUnpackingClobberRegister(Instr))
@@ -687,8 +669,8 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
bool IsHiBits) {
MachineBasicBlock &MBB = *I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
- const MachineOperand *SrcMO2 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
+ const MachineOperand *SrcMO0 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
+ const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
Register DstReg = I.getOperand(0).getReg();
unsigned OpCode = I.getOpcode();
Register UnpackedDstReg = IsHiBits ? TRI->getSubReg(DstReg, AMDGPU::sub1)
@@ -702,15 +684,15 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
MachineInstrBuilder NewMI = BuildMI(MBB, I, DL, TII->get(UnpackedOpcode));
NewMI.addDef(UnpackedDstReg); // vdst
- addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO1);
- addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO2);
+ addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO0);
+ addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO1);
if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
- const MachineOperand *SrcMO3 =
+ const MachineOperand *SrcMO2 =
TII->getNamedOperand(I, AMDGPU::OpName::src2);
unsigned Src2Mods =
TII->getNamedOperand(I, AMDGPU::OpName::src2_modifiers)->getImm();
- addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO3);
+ addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO2);
}
NewMI.addImm(ClampVal); // clamp
// Packed instructions do not support output modifiers. safe to assign them 0
@@ -787,22 +769,26 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
// TODO: Fold this into previous block, if possible. Evaluate and handle any
// side effects.
- for (MachineBasicBlock &MBB : MF) {
- // Unpack packed instructions overlapped by MFMAs. This allows the compiler
- // to co-issue unpacked instructions with MFMA
- auto SchedModel = TII->getSchedModel();
- SetVector<MachineInstr *> InstrsToUnpack;
- for (auto &MI : make_early_inc_range(MBB.instrs())) {
- if (!SIInstrInfo::isMFMA(MI))
- continue;
- const MCSchedClassDesc *SchedClassDesc =
- SchedModel.resolveSchedClass(&MI);
- uint16_t NumMFMACycles =
- SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;
- collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);
- }
- for (MachineInstr *MI : InstrsToUnpack) {
- performF32Unpacking(*MI);
+
+ // Perform the extra MF scans only for supported archs
+ if (ST.hasGFX950Insts() || ST.hasGFX940Insts()) {
+ for (MachineBasicBlock &MBB : MF) {
+ // Unpack packed instructions overlapped by MFMAs. This allows the compiler
+ // to co-issue unpacked instructions with MFMA
+ auto SchedModel = TII->getSchedModel();
+ SetVector<MachineInstr *> InstrsToUnpack;
+ for (auto &MI : make_early_inc_range(MBB.instrs())) {
+ if (!SIInstrInfo::isMFMA(MI))
+ continue;
+ const MCSchedClassDesc *SchedClassDesc =
+ SchedModel.resolveSchedClass(&MI);
+ uint16_t NumMFMACycles =
+ SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;
+ collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);
+ }
+ for (MachineInstr *MI : InstrsToUnpack) {
+ performF32Unpacking(*MI);
+ }
}
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/163992
More information about the llvm-commits
mailing list