[llvm] f1fc507 - [AMDGPU] w/a hazard with writing s102/103 and reading FLAT_SCRATCH_BASE (#153878)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 15 15:23:10 PDT 2025


Author: Stanislav Mekhanoshin
Date: 2025-08-15T15:23:06-07:00
New Revision: f1fc50748aee471daa9e51eaf61e9e853f11f0c7

URL: https://github.com/llvm/llvm-project/commit/f1fc50748aee471daa9e51eaf61e9e853f11f0c7
DIFF: https://github.com/llvm/llvm-project/commit/f1fc50748aee471daa9e51eaf61e9e853f11f0c7.diff

LOG: [AMDGPU] w/a hazard with writing s102/103 and reading FLAT_SCRATCH_BASE (#153878)

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5e297c7540c48..dd7c1914d3440 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1204,6 +1204,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
     fixGetRegWaitIdle(MI);
   if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug())
     fixDsAtomicAsyncBarrierArriveB64(MI);
+  if (ST.hasScratchBaseForwardingHazard())
+    fixScratchBaseForwardingHazard(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3468,3 +3470,79 @@ bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) {
 
   return true;
 }
+
+bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
+  // No reason to check this in pre-RA scheduling, SGPRs have to be allocated
+  // for hazard to trigger.
+  if (!IsHazardRecognizerMode)
+    return false;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  // Hazard expires after 10 SGPR writes by SALU or 8 SGPR writes by VALU.
+  const int FlatScrBaseWaitStates = 10;
+
+  bool ReadsFlatScrLo =
+      MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO, TRI);
+  bool ReadsFlatScrHi =
+      MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI, TRI);
+  if (isSGetReg(MI->getOpcode())) {
+    switch (getHWReg(TII, *MI)) {
+    default:
+      break;
+    case AMDGPU::Hwreg::ID_FLAT_SCR_LO:
+      ReadsFlatScrLo = true;
+      break;
+    case AMDGPU::Hwreg::ID_FLAT_SCR_HI:
+      ReadsFlatScrHi = true;
+      break;
+    }
+  }
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  auto IsRegDefHazard = [&](Register Reg) -> bool {
+    DenseSet<const MachineBasicBlock *> Visited;
+    auto IsHazardFn = [TRI, Reg](const MachineInstr &MI) {
+      return MI.modifiesRegister(Reg, TRI);
+    };
+
+    // This literally abuses the idea of waitstates. Instead of waitstates it
+    // returns 1 for SGPR written and 0 otherwise.
+    auto IsSGPRDef = [TII, TRI, &MRI](const MachineInstr &MI) -> unsigned {
+      if (!TII->isSALU(MI) && !TII->isVALU(MI))
+        return 0;
+      for (const MachineOperand &MO : MI.all_defs()) {
+        if (TRI->isSGPRReg(MRI, MO.getReg()))
+          return 1;
+      }
+      return 0;
+    };
+
+    auto IsExpiredFn = [=](const MachineInstr &MI, int SgprWrites) {
+      if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+        unsigned Wait = MI.getOperand(0).getImm();
+        if (AMDGPU::DepCtr::decodeFieldSaSdst(Wait) == 0 &&
+            AMDGPU::DepCtr::decodeFieldVaSdst(Wait) == 0)
+          return true;
+      }
+      return SgprWrites >= FlatScrBaseWaitStates;
+    };
+
+    return ::getWaitStatesSince(
+               IsHazardFn, MI->getParent(), std::next(MI->getReverseIterator()),
+               0, IsExpiredFn, Visited, IsSGPRDef) < FlatScrBaseWaitStates;
+  };
+
+  if ((!ReadsFlatScrLo || MRI.isConstantPhysReg(AMDGPU::SGPR102) ||
+       !IsRegDefHazard(AMDGPU::SGPR102)) &&
+      (!ReadsFlatScrHi || MRI.isConstantPhysReg(AMDGPU::SGPR103) ||
+       !IsRegDefHazard(AMDGPU::SGPR103)))
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(AMDGPU::DepCtr::encodeFieldVaSdst(
+          AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
+  return true;
+}

diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 890d5cbd154d6..e0982b46424b9 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -112,6 +112,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixRequiredExportPriority(MachineInstr *MI);
   bool fixGetRegWaitIdle(MachineInstr *MI);
   bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
+  bool fixScratchBaseForwardingHazard(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 436f5c0801fad..404a476a3076a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1821,6 +1821,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {
     return getGeneration() == GFX12;
   }
+
+  // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
+  // read.
+  bool hasScratchBaseForwardingHazard() const {
+    return GFX1250Insts && getGeneration() == GFX12;
+  }
 };
 
 class GCNUserSGPRUsageInfo {

diff  --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f1dbabf1e1a83..f4596b0832d97 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -15,3 +15,481 @@ body: |
     ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
     DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec
 ...
+
+---
+name: write_s102_read_flat_scr_base_lo
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s103_read_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr103 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base
+    ; GCN: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+    $sgpr102 = S_MOV_B32 0
+    $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+...
+
+---
+name: write_s103_read_flat_scr_base
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base
+    ; GCN: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+    $sgpr103 = S_MOV_B32 0
+    $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+...
+
+---
+name: write_s102_s103_read_flat_scr_base
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s102_s103_read_flat_scr_base
+    ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+    $sgpr102_sgpr103 = S_MOV_B64 0
+    $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base
+...
+
+---
+name: write_s102_getreg_flat_scr_base_lo
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_getreg_flat_scr_base_lo
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr1 = S_GETREG_B32 20, implicit $mode
+    $sgpr102 = S_MOV_B32 0
+    $sgpr1 = S_GETREG_B32 20, implicit $mode
+...
+
+---
+name: write_s103_getreg_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_getreg_flat_scr_base_hi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode
+    $sgpr103 = S_MOV_B32 0
+    $sgpr1 = S_GETREG_B32 21, implicit $mode
+...
+
+---
+name: write_s102_s103_getreg_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: write_s102_s103_getreg_flat_scr_base_hi
+    ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode
+    $sgpr102_sgpr103 = S_MOV_B64 0
+    $sgpr1 = S_GETREG_B32 21, implicit $mode
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_9_salu_valu
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_9_salu_valu
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $sgpr10 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s103_read_flat_scr_base_hi_9_salu_valu
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_9_salu_valu
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr103 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0
+    ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr103 = S_MOV_B32 0
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = S_MOV_B64 0
+    $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec
+    $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; NOP does not count because it does not write SGPRs
+    S_NOP 0
+    ; DS_READ_B32 does not count because it is not SALU or VALU
+    $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    ; S_LOAD_DWORDX2_IMM does not count because it is not SALU
+    $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0
+    $sgpr10 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_hi_no_hazard
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_hi_no_hazard
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_expired_by_wait0
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait0
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 0
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 0
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 61950
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61951
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 61951
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 65534
+    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    S_WAITCNT_DEPCTR 65534
+    S_NOP 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr102 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr103 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 61950
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+    $sgpr102 = S_MOV_B32 0
+    $sgpr103 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec
+...
+
+---
+name: write_s102_read_flat_scr_base_lo_cross_blocks
+tracksRegLiveness: true
+body: |
+  ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_cross_blocks
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; GCN-NEXT:   liveins: $vgpr0, $sgpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
+  ; GCN-NEXT:   $sgpr1 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr2 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr3 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr4 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr5 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr6 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr7 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr8 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT:   liveins: $vgpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr102 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr1 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr2 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr3 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr4 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr5 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr6 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr7 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr8 = S_MOV_B32 0
+  ; GCN-NEXT:   $sgpr9 = S_MOV_B32 0
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   liveins: $vgpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_WAITCNT_DEPCTR 61950
+  ; GCN-NEXT:   $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+    $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2 = S_MOV_B32 0
+    $sgpr3 = S_MOV_B32 0
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    $sgpr8 = S_MOV_B32 0
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    liveins: $vgpr0
+    $sgpr102 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2 = S_MOV_B32 0
+    $sgpr3 = S_MOV_B32 0
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    $sgpr8 = S_MOV_B32 0
+    $sgpr9 = S_MOV_B32 0
+    S_BRANCH %bb.2
+
+  bb.2:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
+...


        


More information about the llvm-commits mailing list