[llvm] [AMDGPU] Mitigate GFX12 VALU read SGPR hazard (PR #100067)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 05:10:52 PDT 2024


================
@@ -2876,22 +2909,269 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
   auto NextMI = std::next(MI->getIterator());
 
   // Add s_waitcnt_depctr sa_sdst(0) after SALU write.
-  BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
-          TII.get(AMDGPU::S_WAITCNT_DEPCTR))
-      .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
+  auto NewMI = BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
+                       TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+                   .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
 
   // SALU write may be s_getpc in a bundle.
-  if (MI->getOpcode() == AMDGPU::S_GETPC_B64) {
-    // Update offsets of any references in the bundle.
-    while (NextMI != MI->getParent()->end() &&
-           NextMI->isBundledWithPred()) {
-      for (auto &Operand : NextMI->operands()) {
-        if (Operand.isGlobal())
-          Operand.setOffset(Operand.getOffset() + 4);
+  updateGetPCBundle(NewMI);
+
+  return true;
+}
+
+static unsigned baseSGPRNumber(Register Reg, const SIRegisterInfo &TRI) {
+  unsigned RegN = TRI.getEncodingValue(Reg);
+  assert(RegN <= 127);
+  return (RegN >> 1) & 0x3f;
+}
+
+// For VALUReadSGPRHazard: pre-compute a bit vector of all SGPRs used by VALUs.
+void GCNHazardRecognizer::computeVALUHazardSGPRs(MachineFunction *MMF) {
+  assert(MMF == &MF);
+
+  // Assume non-empty vector means it has already been computed.
+  if (!VALUReadHazardSGPRs.empty())
+    return;
+
+  auto CallingConv = MF.getFunction().getCallingConv();
+  bool IsCallFree =
+      AMDGPU::isEntryFunctionCC(CallingConv) && !MF.getFrameInfo().hasCalls();
+
+  // Exhaustive search is only viable in non-caller/callee functions where
+  // VALUs will be exposed to the hazard recognizer.
+  UseVALUReadHazardExhaustiveSearch =
+      IsCallFree && MF.getTarget().getOptLevel() > CodeGenOptLevel::None &&
+      MF.getInstructionCount() <= MaxExhaustiveHazardSearch;
+
+  // Consider all SGPRs hazards if the shader uses function calls or is callee.
+  bool UseVALUUseCache =
+      IsCallFree && MF.getTarget().getOptLevel() > CodeGenOptLevel::None;
+  VALUReadHazardSGPRs.resize(64, !UseVALUUseCache);
+  if (!UseVALUUseCache)
+    return;
+
+  // Perform a post ordered reverse scan to find VALUs which read an SGPR
+  // before a SALU write to the same SGPR.  This provides a reduction in
+  // hazard insertion when all VALU access to an SGPR occurs after its last
+  // SALU write, when compared to a linear scan.
+  const unsigned SGPR_NULL = TRI.getEncodingValue(AMDGPU::SGPR_NULL_gfx11plus);
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  BitVector SALUWriteSGPRs(64), ReadSGPRs(64);
+  MachineCycleInfo CI;
+  CI.compute(*MMF);
+
+  for (auto *MBB : post_order(&MF)) {
+    bool InCycle = CI.getCycle(MBB) != nullptr;
+    for (auto &MI : reverse(MBB->instrs())) {
+      bool IsVALU = SIInstrInfo::isVALU(MI);
----------------
arsenm wrote:

Need to skip meta instructions?

https://github.com/llvm/llvm-project/pull/100067


More information about the llvm-commits mailing list