[llvm] [AMDGPU] misched: avoid subregister dependencies (PR #140255)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Fri May 16 10:11:19 PDT 2025
================
@@ -535,6 +535,62 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
}
+bool GCNSubtarget::isRealSchedDependency(MachineInstr *DefI, int DefOpIdx,
+ MachineInstr *UseI,
+ int UseOpIdx) const {
+ if (!InstrInfo.isVOP3P(*UseI))
+ return true;
+ MachineOperand &DefOp = DefI->getOperand(DefOpIdx);
+ if (!DefOp.isReg() || !DefOp.getReg().isPhysical())
+ return true;
+
+ AMDGPU::OpName UseModName;
+ if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(), AMDGPU::OpName::src0) ==
+ UseOpIdx)
+ UseModName = AMDGPU::OpName::src0_modifiers;
+ else if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(),
+ AMDGPU::OpName::src1) == UseOpIdx)
+ UseModName = AMDGPU::OpName::src1_modifiers;
+ else if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(),
+ AMDGPU::OpName::src2) == UseOpIdx)
+ UseModName = AMDGPU::OpName::src2_modifiers;
+ else
+ return true;
+ MachineOperand *UseOpMod = InstrInfo.getNamedOperand(*UseI, UseModName);
+ if (!UseOpMod)
+ return true;
+ // Check whether all parts of the register are being used (= op_sel and
+ // op_sel_hi differ). In that case we can return early.
+ auto OpSel = UseOpMod->getImm() & SISrcMods::OP_SEL_0;
+ auto OpSelHi = UseOpMod->getImm() & SISrcMods::OP_SEL_1;
+ if ((!OpSel || !OpSelHi) && (OpSel || OpSelHi))
+ return true;
+
+ MachineOperand &UseOp = UseI->getOperand(UseOpIdx);
+ if (!UseOp.isReg() || !UseOp.getReg().isPhysical())
+ return true;
+ const SIRegisterInfo *TRI = getRegisterInfo();
+ const MachineRegisterInfo &MRI = UseI->getParent()->getParent()->getRegInfo();
+ MCRegister DefReg = DefOp.getReg().asMCReg();
+ MCRegister UseReg = UseOp.getReg().asMCReg();
+ // We specifically look for a packed 32bit Use and smaller Def.
+ if (TRI->getRegSizeInBits(UseReg, MRI) != 64 ||
+ TRI->getRegSizeInBits(DefReg, MRI) > 32)
+ return true;
----------------
ro-i wrote:
I couldn't find any documentation on whether it's guaranteed that regunits for the amdgpu backend are 16bit, that's why I was unsure whether this is sufficient. Is this a premise?
https://github.com/llvm/llvm-project/pull/140255
More information about the llvm-commits
mailing list