[llvm] [AMDGPU] misched: avoid subregister dependencies (PR #140255)

Robert Imschweiler via llvm-commits llvm-commits at lists.llvm.org
Fri May 16 10:11:19 PDT 2025


================
@@ -535,6 +535,62 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
 }
 
+bool GCNSubtarget::isRealSchedDependency(MachineInstr *DefI, int DefOpIdx,
+                                         MachineInstr *UseI,
+                                         int UseOpIdx) const {
+  if (!InstrInfo.isVOP3P(*UseI))
+    return true;
+  MachineOperand &DefOp = DefI->getOperand(DefOpIdx);
+  if (!DefOp.isReg() || !DefOp.getReg().isPhysical())
+    return true;
+
+  AMDGPU::OpName UseModName;
+  if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(), AMDGPU::OpName::src0) ==
+      UseOpIdx)
+    UseModName = AMDGPU::OpName::src0_modifiers;
+  else if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(),
+                                      AMDGPU::OpName::src1) == UseOpIdx)
+    UseModName = AMDGPU::OpName::src1_modifiers;
+  else if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(),
+                                      AMDGPU::OpName::src2) == UseOpIdx)
+    UseModName = AMDGPU::OpName::src2_modifiers;
+  else
+    return true;
+  MachineOperand *UseOpMod = InstrInfo.getNamedOperand(*UseI, UseModName);
+  if (!UseOpMod)
+    return true;
+  // Check whether all parts of the register are being used (= op_sel and
+  // op_sel_hi differ). In that case we can return early.
+  auto OpSel = UseOpMod->getImm() & SISrcMods::OP_SEL_0;
+  auto OpSelHi = UseOpMod->getImm() & SISrcMods::OP_SEL_1;
+  if ((!OpSel || !OpSelHi) && (OpSel || OpSelHi))
+    return true;
+
+  MachineOperand &UseOp = UseI->getOperand(UseOpIdx);
+  if (!UseOp.isReg() || !UseOp.getReg().isPhysical())
+    return true;
+  const SIRegisterInfo *TRI = getRegisterInfo();
+  const MachineRegisterInfo &MRI = UseI->getParent()->getParent()->getRegInfo();
+  MCRegister DefReg = DefOp.getReg().asMCReg();
+  MCRegister UseReg = UseOp.getReg().asMCReg();
+  // We specifically look for a packed 32bit Use and smaller Def.
+  if (TRI->getRegSizeInBits(UseReg, MRI) != 64 ||
+      TRI->getRegSizeInBits(DefReg, MRI) > 32)
+    return true;
----------------
ro-i wrote:

I couldn't find any documentation on whether it's guaranteed that regunits for the amdgpu backend are 16bit, that's why I was unsure whether this is sufficient. Is this a premise?

https://github.com/llvm/llvm-project/pull/140255


More information about the llvm-commits mailing list