[llvm] unpack packed instructions overlapped by MFMAs post-RA scheduling (PR #157968)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 12:19:30 PDT 2025
================
@@ -417,6 +454,281 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}
+// If support is extended to new operations, add tests in
+// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
+bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ if (!TII->isNeverCoissue(MI))
+ return false;
+ switch (Opcode) {
+ case AMDGPU::V_PK_ADD_F32:
+ case AMDGPU::V_PK_MUL_F32:
+ case AMDGPU::V_PK_FMA_F32:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("Fully covered switch");
+}
+
+bool SIPreEmitPeephole::canUnpackingClobberRegister(
+ const MachineInstr &MI) {
+ unsigned OpCode = MI.getOpcode();
+ Register DstReg = MI.getOperand(0).getReg();
+ // Only the first register in the register pair needs to be checked due to the
+ // unpacking order. Packed instructions are unpacked such that the lower 32
+ // bits (i.e., the first register in the pair) are written first. This can
+ // introduce dependencies if the first register is written in one instruction
+ // and then read as part of the higher 32 bits in the subsequent instruction.
+ // Such scenarios can arise due to specific combinations of op_sel and
+ // op_sel_hi modifiers.
+ Register UnpackedDstReg = TRI->getSubReg(DstReg, AMDGPU::sub0);
+
+ const MachineOperand *Src0MO = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (Src0MO && Src0MO->isReg()) {
+ Register SrcReg0 = Src0MO->getReg();
+ unsigned Src0Mods =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
+ Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)
+ ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)
+ : TRI->getSubReg(SrcReg0, AMDGPU::sub0);
+ // Check if the register selected by op_sel_hi is the same as the first
+ // register in the destination register pair.
+ if (TRI->regsOverlap(UnpackedDstReg, HiSrc0Reg))
+ return true;
+ }
+
+ const MachineOperand *Src1MO = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1MO && Src1MO->isReg()) {
+ Register SrcReg1 = Src1MO->getReg();
+ unsigned Src1Mods =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
+ Register HiSrc1Reg = (Src1Mods & SISrcMods::OP_SEL_1)
+ ? TRI->getSubReg(SrcReg1, AMDGPU::sub1)
+ : TRI->getSubReg(SrcReg1, AMDGPU::sub0);
+ if (TRI->regsOverlap(UnpackedDstReg, HiSrc1Reg))
+ return true;
+ }
+
+ // Applicable for packed instructions with 3 source operands, such as
+ // V_PK_FMA.
+ if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
+ const MachineOperand *Src2MO =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2MO && Src2MO->isReg()) {
+ Register SrcReg2 =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2)->getReg();
----------------
jrbyrnes wrote:
```suggestion
Src2MO->getReg();
```
https://github.com/llvm/llvm-project/pull/157968
More information about the llvm-commits
mailing list