[llvm] unpack packed instructions overlapped by MFMAs post-RA scheduling (PR #157968)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 12 14:24:56 PDT 2025


================
@@ -417,6 +460,263 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
   return true;
 }
 
+bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  case AMDGPU::V_PK_ADD_F32:
+  case AMDGPU::V_PK_MUL_F32:
+  case AMDGPU::V_PK_FMA_F32:
+    return true;
+  default:
+    return false;
+  }
+  llvm_unreachable("Fully covered switch");
+}
+
+bool SIPreEmitPeephole::hasRWDependencies(const MachineInstr &PredMI,
+                                          const MachineInstr &SuccMI) {
+  for (const MachineOperand &PredOps : PredMI.operands()) {
+    if (!PredOps.isReg() || !PredOps.isDef())
+      continue;
+    Register PredReg = PredOps.getReg();
+    if (!PredReg.isValid())
+      continue;
+    for (const MachineOperand &SuccOps : SuccMI.operands()) {
+      if (!SuccOps.isReg())
+        continue;
+      Register SuccReg = SuccOps.getReg();
+      if (!SuccReg.isValid())
+        continue;
+      if ((PredReg == SuccReg) || TRI->regsOverlap(PredReg, SuccReg))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool SIPreEmitPeephole::canUnpackingIntroduceDependencies(
+    const MachineInstr &MI) {
+  unsigned OpCode = MI.getOpcode();
+  bool IsFMA = (OpCode == AMDGPU::V_PK_FMA_F32) ? true : false;
+  MachineOperand DstMO = MI.getOperand(0);
+  Register DstReg = DstMO.getReg();
+  Register SrcReg0 = MI.getOperand(2).getReg();
+  Register SrcReg1 = MI.getOperand(4).getReg();
+
+  Register UnpackedDstReg = TRI->getSubReg(DstReg, AMDGPU::sub0);
+  int Src0ModifiersIdx =
+      AMDGPU::getNamedOperandIdx(OpCode, AMDGPU::OpName::src0_modifiers);
+  int Src1ModifiersIdx =
+      AMDGPU::getNamedOperandIdx(OpCode, AMDGPU::OpName::src1_modifiers);
+  unsigned Src0Mods = MI.getOperand(Src0ModifiersIdx).getImm();
+  unsigned Src1Mods = MI.getOperand(Src1ModifiersIdx).getImm();
+
+  Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)
+                           ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)
+                           : TRI->getSubReg(SrcReg0, AMDGPU::sub0);
+  Register HiSrc1Reg = (Src1Mods & SISrcMods::OP_SEL_1)
+                           ? TRI->getSubReg(SrcReg1, AMDGPU::sub1)
+                           : TRI->getSubReg(SrcReg1, AMDGPU::sub0);
+  if (UnpackedDstReg == HiSrc0Reg ||
+      TRI->regsOverlap(UnpackedDstReg, HiSrc0Reg) ||
+      UnpackedDstReg == HiSrc1Reg ||
+      TRI->regsOverlap(UnpackedDstReg, HiSrc1Reg))
+    return true;
+  if (IsFMA) {
+    int Src2ModifiersIdx =
+        AMDGPU::getNamedOperandIdx(OpCode, AMDGPU::OpName::src2_modifiers);
+    unsigned Src2Mods = MI.getOperand(Src2ModifiersIdx).getImm();
+    Register SrcReg2 = MI.getOperand(6).getReg();
----------------
jrbyrnes wrote:

TII->getNamedOperand(MI, AMDGPU::OpName::src2)

Do we need to check if it is a reg?

https://github.com/llvm/llvm-project/pull/157968


More information about the llvm-commits mailing list