[llvm] [AMDGPU] w/a hazard with writing s102/103 and reading FLAT_SCRATCH_BASE (PR #153878)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 15 14:21:08 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

<details>
<summary>Changes</summary>



---

Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153878.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+78) 
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+6) 
- (modified) llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir (+478) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5e297c7540c48..dd7c1914d3440 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1204,6 +1204,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
     fixGetRegWaitIdle(MI);
   if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug())
     fixDsAtomicAsyncBarrierArriveB64(MI);
+  if (ST.hasScratchBaseForwardingHazard())
+    fixScratchBaseForwardingHazard(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3468,3 +3470,79 @@ bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) {
 
   return true;
 }
+
+bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
+  // No reason to check this in pre-RA scheduling, SGPRs have to be allocated
+  // for hazard to trigger.
+  if (!IsHazardRecognizerMode)
+    return false;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  // Hazard expires after 10 SGPR writes by SALU or 8 SGPR writes by VALU.
+  const int FlatScrBaseWaitStates = 10;
+
+  bool ReadsFlatScrLo =
+      MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO, TRI);
+  bool ReadsFlatScrHi =
+      MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI, TRI);
+  if (isSGetReg(MI->getOpcode())) {
+    switch (getHWReg(TII, *MI)) {
+    default:
+      break;
+    case AMDGPU::Hwreg::ID_FLAT_SCR_LO:
+      ReadsFlatScrLo = true;
+      break;
+    case AMDGPU::Hwreg::ID_FLAT_SCR_HI:
+      ReadsFlatScrHi = true;
+      break;
+    }
+  }
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  auto IsRegDefHazard = [&](Register Reg) -> bool {
+    DenseSet<const MachineBasicBlock *> Visited;
+    auto IsHazardFn = [TRI, Reg](const MachineInstr &MI) {
+      return MI.modifiesRegister(Reg, TRI);
+    };
+
+    // This literally abuses the idea of waitstates. Instead of waitstates it
+    // returns 1 for SGPR written and 0 otherwise.
+    auto IsSGPRDef = [TII, TRI, &MRI](const MachineInstr &MI) -> unsigned {
+      if (!TII->isSALU(MI) && !TII->isVALU(MI))
+        return 0;
+      for (const MachineOperand &MO : MI.all_defs()) {
+        if (TRI->isSGPRReg(MRI, MO.getReg()))
+          return 1;
+      }
+      return 0;
+    };
+
+    auto IsExpiredFn = [=](const MachineInstr &MI, int SgprWrites) {
+      if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+        unsigned Wait = MI.getOperand(0).getImm();
+        if (AMDGPU::DepCtr::decodeFieldSaSdst(Wait) == 0 &&
+            AMDGPU::DepCtr::decodeFieldVaSdst(Wait) == 0)
+          return true;
+      }
+      return SgprWrites >= FlatScrBaseWaitStates;
+    };
+
+    return ::getWaitStatesSince(
+               IsHazardFn, MI->getParent(), std::next(MI->getReverseIterator()),
+               0, IsExpiredFn, Visited, IsSGPRDef) < FlatScrBaseWaitStates;
+  };
+
+  if ((!ReadsFlatScrLo || MRI.isConstantPhysReg(AMDGPU::SGPR102) ||
+       !IsRegDefHazard(AMDGPU::SGPR102)) &&
+      (!ReadsFlatScrHi || MRI.isConstantPhysReg(AMDGPU::SGPR103) ||
+       !IsRegDefHazard(AMDGPU::SGPR103)))
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(AMDGPU::DepCtr::encodeFieldVaSdst(
+          AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 890d5cbd154d6..e0982b46424b9 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -112,6 +112,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixRequiredExportPriority(MachineInstr *MI);
   bool fixGetRegWaitIdle(MachineInstr *MI);
   bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
+  bool fixScratchBaseForwardingHazard(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 436f5c0801fad..404a476a3076a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1821,6 +1821,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {
     return getGeneration() == GFX12;
   }
+
+  // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
+  // read.
+  bool hasScratchBaseForwardingHazard() const {
+    return GFX1250Insts && getGeneration() == GFX12;
+  }
 };
 
 class GCNUserSGPRUsageInfo {
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f1dbabf1e1a83..f4596b0832d97 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -15,3 +15,481 @@ body: |
     ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
     DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
 ...
+
+---
+name: write_s102_read_flat_scr_base_lo
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s103_read_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr103 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base
+    ; GCN: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+    $sgpr102 = S_MOV_B32 0
+    $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+...
+
+---
+name: write_s103_read_flat_scr_base
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base
+    ; GCN: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+    $sgpr103 = S_MOV_B32 0
+    $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+...
+
+---
+name: write_s102_s103_read_flat_scr_base
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s102_s103_read_flat_scr_base
+    ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+    $sgpr102_sgpr103 = S_MOV_B64 0
+    $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+...
+
+---
+name: write_s102_getreg_flat_scr_base_lo
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_getreg_flat_scr_base_lo
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr1 = S_GETREG_B32 20, implicit $mode
+    $sgpr102 = S_MOV_B32 0
+    $sgpr1 = S_GETREG_B32 20, implicit $mode
+...
+
+---
+name: write_s103_getreg_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_getreg_flat_scr_base_hi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode
+    $sgpr103 = S_MOV_B32 0
+    $sgpr1 = S_GETREG_B32 21, implicit $mode
+...
+
+---
+name: write_s102_s103_getreg_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s102_s103_getreg_flat_scr_base_hi
+    ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode
+    $sgpr102_sgpr103 = S_MOV_B64 0
+    $sgpr1 = S_GETREG_B32 21, implicit $mode
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_9_salu_valu
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_9_salu_valu
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $sgpr10 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s103_read_flat_scr_base_hi_9_salu_valu
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_9_salu_valu
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr103 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr103 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $sgpr10 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_hi_no_hazard
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_hi_no_hazard
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_expired_by_wait0
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait0
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 0
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 0
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 61950
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61951
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 61951
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 65534
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 65534
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $sgpr103 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_cross_blocks
+tracksRegLiveness: true
+body: |
+  ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_cross_blocks
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; GCN-NEXT:   liveins: $vgpr0, $sgpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
+  ; GCN-NEXT:   $sgpr1 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr2 = S_MOV_B32 0
+  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/153878


More information about the llvm-commits mailing list