[llvm] unpack packed instructions overlapped by MFMAs post-RA scheduling (PR #157968)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 17 12:28:01 PDT 2025


================
@@ -417,6 +454,293 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
   return true;
 }
 
+// If support is extended to new operations, add tests in
+// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
+bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  case AMDGPU::V_PK_ADD_F32:
+  case AMDGPU::V_PK_MUL_F32:
+  case AMDGPU::V_PK_FMA_F32:
+    return true;
+  default:
+    return false;
+  }
+  llvm_unreachable("Fully covered switch");
+}
+
+bool SIPreEmitPeephole::canUnpackingIntroduceDependencies(
+    const MachineInstr &MI) {
+  unsigned OpCode = MI.getOpcode();
+  Register DstReg = MI.getOperand(0).getReg();
+  // Only the first register in the register pair needs to be checked due to the
+  // unpacking order. Packed instructions are unpacked such that the lower 32
+  // bits (i.e., the first register in the pair) are written first. This can
+  // introduce dependencies if the first register is written in one instruction
+  // and then read as part of the higher 32 bits in the subsequent instruction.
+  // Such scenarios can arise due to specific combinations of op_sel and
+  // op_sel_hi modifiers.
+  Register UnpackedDstReg = TRI->getSubReg(DstReg, AMDGPU::sub0);
+  int Src0ModifiersIdx =
+      AMDGPU::getNamedOperandIdx(OpCode, AMDGPU::OpName::src0_modifiers);
+  int Src1ModifiersIdx =
+      AMDGPU::getNamedOperandIdx(OpCode, AMDGPU::OpName::src1_modifiers);
+  unsigned Src0Mods = MI.getOperand(Src0ModifiersIdx).getImm();
+  unsigned Src1Mods = MI.getOperand(Src1ModifiersIdx).getImm();
+
+  if (TII->getNamedOperand(MI, AMDGPU::OpName::src0)->isReg()) {
+    Register SrcReg0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)->getReg();
+    Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)
+                             ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)
+                             : TRI->getSubReg(SrcReg0, AMDGPU::sub0);
+    // Check if the register selected by op_sel_hi is the same as the first
+    // register in the destination register pair
+    if (UnpackedDstReg == HiSrc0Reg ||
----------------
jrbyrnes wrote:

Can drop the equality check. The first thing done in `TRI->regsOverlap` is check equality.

https://github.com/llvm/llvm-project/pull/157968


More information about the llvm-commits mailing list