[llvm] Co-issue packed instructions by unpacking (PR #151704)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 11:48:14 PDT 2025
================
@@ -260,38 +995,60 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
Changed |= processReg(Reg);
}
- if (!ST.useRealTrue16Insts())
- return Changed;
-
// Add RA hints to improve True16 COPY elimination.
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- if (MI.getOpcode() != AMDGPU::COPY)
- continue;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- if (Dst.isVirtual() &&
- MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
- Src.isPhysical() &&
- TRI->getRegClassForReg(*MRI, Src) == &AMDGPU::VGPR_32RegClass)
- MRI->setRegAllocationHint(Dst, 0, TRI->getSubReg(Src, AMDGPU::lo16));
- if (Src.isVirtual() &&
- MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass &&
- Dst.isPhysical() &&
- TRI->getRegClassForReg(*MRI, Dst) == &AMDGPU::VGPR_32RegClass)
- MRI->setRegAllocationHint(Src, 0, TRI->getSubReg(Dst, AMDGPU::lo16));
- if (!Dst.isVirtual() || !Src.isVirtual())
- continue;
- if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_32RegClass &&
- MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass) {
- MRI->setRegAllocationHint(Dst, AMDGPURI::Size32, Src);
- MRI->setRegAllocationHint(Src, AMDGPURI::Size16, Dst);
+ // Unpack packed instructions to overlap MFMAs. This allows the compiler to
+ // co-issue unpacked instructions with MFMA
+ for (MachineBasicBlock &MBB : MF) {
+ SetVector<MachineInstr *> InstrsToUnpack;
+ IsF16MaskSet = false;
+ uint16_t NumMFMACycles = 0;
+ auto SchedModel = TII->getSchedModel();
+ for (MachineInstr &MI : MBB) {
+ if (SIInstrInfo::isMFMA(MI)) {
+ const MCSchedClassDesc *SchedClassDesc =
+ SchedModel.resolveSchedClass(&MI);
+ NumMFMACycles =
----------------
jrbyrnes wrote:
Should we reduce for the issue latency of the MFMA
https://github.com/llvm/llvm-project/pull/151704
More information about the llvm-commits
mailing list