[llvm] [AMDGPU] misched: avoid subregister dependencies (PR #140255)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri May 16 08:51:32 PDT 2025
================
@@ -535,6 +535,62 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
}
+bool GCNSubtarget::isRealSchedDependency(MachineInstr *DefI, int DefOpIdx,
+ MachineInstr *UseI,
+ int UseOpIdx) const {
+ if (!InstrInfo.isVOP3P(*UseI))
+ return true;
+ MachineOperand &DefOp = DefI->getOperand(DefOpIdx);
+ if (!DefOp.isReg() || !DefOp.getReg().isPhysical())
+ return true;
+
+ AMDGPU::OpName UseModName;
+ if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(), AMDGPU::OpName::src0) ==
+ UseOpIdx)
+ UseModName = AMDGPU::OpName::src0_modifiers;
+ else if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(),
+ AMDGPU::OpName::src1) == UseOpIdx)
+ UseModName = AMDGPU::OpName::src1_modifiers;
+ else if (AMDGPU::getNamedOperandIdx(UseI->getOpcode(),
+ AMDGPU::OpName::src2) == UseOpIdx)
+ UseModName = AMDGPU::OpName::src2_modifiers;
+ else
+ return true;
+ MachineOperand *UseOpMod = InstrInfo.getNamedOperand(*UseI, UseModName);
+ if (!UseOpMod)
+ return true;
+ // Check whether all parts of the register are being used (= op_sel and
+ // op_sel_hi differ). In that case we can return early.
+ auto OpSel = UseOpMod->getImm() & SISrcMods::OP_SEL_0;
+ auto OpSelHi = UseOpMod->getImm() & SISrcMods::OP_SEL_1;
+ if ((!OpSel || !OpSelHi) && (OpSel || OpSelHi))
+ return true;
+
+ MachineOperand &UseOp = UseI->getOperand(UseOpIdx);
+ if (!UseOp.isReg() || !UseOp.getReg().isPhysical())
+ return true;
+ const SIRegisterInfo *TRI = getRegisterInfo();
+ const MachineRegisterInfo &MRI = UseI->getParent()->getParent()->getRegInfo();
+ MCRegister DefReg = DefOp.getReg().asMCReg();
+ MCRegister UseReg = UseOp.getReg().asMCReg();
+ // We specifically look for a packed 32bit Use and smaller Def.
+ if (TRI->getRegSizeInBits(UseReg, MRI) != 64 ||
+ TRI->getRegSizeInBits(DefReg, MRI) > 32)
+ return true;
+ SmallVector<MCRegUnit, 2> DefRegUnits(TRI->regunits(DefReg));
+ assert(DefRegUnits.size() <= 2 && "unexpected number of register units");
+ SmallVector<MCRegUnit, 4> UseRegUnits(TRI->regunits(UseReg));
+ assert(UseRegUnits.size() == 4 && "unexpected number of register units");
+
+ auto FindRegunit = [&DefRegUnits](MCRegUnit A, MCRegUnit B) {
+ return llvm::find_if(DefRegUnits, [A, B](MCRegUnit RU) {
+ return RU == A || RU == B;
+ }) != DefRegUnits.end();
+ };
+ return OpSel ? FindRegunit(UseRegUnits[2], UseRegUnits[3])
+ : FindRegunit(UseRegUnits[0], UseRegUnits[1]);
+}
----------------
arsenm wrote:
This seems unnecessarily cumbersome. It's odd to directly operate on regunits like this, can you query a read of the subregister index
https://github.com/llvm/llvm-project/pull/140255
More information about the llvm-commits
mailing list