[llvm] [AMDGPU] Mitigate GFX12 VALU read SGPR hazard (PR #100067)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 05:10:52 PDT 2024
================
@@ -2876,22 +2909,269 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
auto NextMI = std::next(MI->getIterator());
// Add s_waitcnt_depctr sa_sdst(0) after SALU write.
- BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
- TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
+ auto NewMI = BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
// SALU write may be s_getpc in a bundle.
- if (MI->getOpcode() == AMDGPU::S_GETPC_B64) {
- // Update offsets of any references in the bundle.
- while (NextMI != MI->getParent()->end() &&
- NextMI->isBundledWithPred()) {
- for (auto &Operand : NextMI->operands()) {
- if (Operand.isGlobal())
- Operand.setOffset(Operand.getOffset() + 4);
+ updateGetPCBundle(NewMI);
+
+ return true;
+}
+
+static unsigned baseSGPRNumber(Register Reg, const SIRegisterInfo &TRI) {
+ unsigned RegN = TRI.getEncodingValue(Reg);
+ assert(RegN <= 127);
+ return (RegN >> 1) & 0x3f;
+}
+
+// For VALUReadSGPRHazard: pre-compute a bit vector of all SGPRs used by VALUs.
+void GCNHazardRecognizer::computeVALUHazardSGPRs(MachineFunction *MMF) {
+ assert(MMF == &MF);
+
+ // Assume non-empty vector means it has already been computed.
+ if (!VALUReadHazardSGPRs.empty())
+ return;
+
+ auto CallingConv = MF.getFunction().getCallingConv();
+ bool IsCallFree =
+ AMDGPU::isEntryFunctionCC(CallingConv) && !MF.getFrameInfo().hasCalls();
+
+ // Exhaustive search is only viable in non-caller/callee functions where
+ // VALUs will be exposed to the hazard recognizer.
+ UseVALUReadHazardExhaustiveSearch =
+ IsCallFree && MF.getTarget().getOptLevel() > CodeGenOptLevel::None &&
+ MF.getInstructionCount() <= MaxExhaustiveHazardSearch;
+
+ // Consider all SGPRs hazards if the shader uses function calls or is callee.
+ bool UseVALUUseCache =
+ IsCallFree && MF.getTarget().getOptLevel() > CodeGenOptLevel::None;
+ VALUReadHazardSGPRs.resize(64, !UseVALUUseCache);
+ if (!UseVALUUseCache)
+ return;
+
+ // Perform a post ordered reverse scan to find VALUs which read an SGPR
+ // before a SALU write to the same SGPR. This provides a reduction in
+ // hazard insertion when all VALU access to an SGPR occurs after its last
+ // SALU write, when compared to a linear scan.
+ const unsigned SGPR_NULL = TRI.getEncodingValue(AMDGPU::SGPR_NULL_gfx11plus);
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ BitVector SALUWriteSGPRs(64), ReadSGPRs(64);
+ MachineCycleInfo CI;
+ CI.compute(*MMF);
+
+ for (auto *MBB : post_order(&MF)) {
+ bool InCycle = CI.getCycle(MBB) != nullptr;
+ for (auto &MI : reverse(MBB->instrs())) {
+ bool IsVALU = SIInstrInfo::isVALU(MI);
+ bool IsSALU = SIInstrInfo::isSALU(MI);
+ if (!(IsVALU || IsSALU))
+ continue;
+
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
----------------
arsenm wrote:
What about regmask/ calls?
https://github.com/llvm/llvm-project/pull/100067
More information about the llvm-commits
mailing list