[llvm] 63f21f4 - [AMDGPU] Handle LDS DMA and LDS_DIRECT hazards

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed May 4 14:45:32 PDT 2022


Author: Stanislav Mekhanoshin
Date: 2022-05-04T14:45:16-07:00
New Revision: 63f21f4cc7bb12614e7049c464f115bdcb8b7fe5

URL: https://github.com/llvm/llvm-project/commit/63f21f4cc7bb12614e7049c464f115bdcb8b7fe5
DIFF: https://github.com/llvm/llvm-project/commit/63f21f4cc7bb12614e7049c464f115bdcb8b7fe5.diff

LOG: [AMDGPU] Handle LDS DMA and LDS_DIRECT hazards

There shall be 1 wait state between M0 write and LDS DMA/LDS_DIRECT use.

Differential Revision: https://reviews.llvm.org/D124550

Added: 
    llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir

Modified: 
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/test/CodeGen/AMDGPU/hazard.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 7f5bc9af66196..51d3704d56451 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -166,6 +166,11 @@ static bool isPermlane(const MachineInstr &MI) {
          Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
 }
 
+static bool isLdsDma(const MachineInstr &MI) {
+  return SIInstrInfo::isVALU(MI) &&
+         (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI));
+}
+
 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
                                                      AMDGPU::OpName::simm16);
@@ -226,12 +231,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
     return HazardType;
 
-  if (ST.hasReadM0MovRelInterpHazard() &&
-      (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
-      checkReadM0Hazards(MI) > 0)
-    return HazardType;
-
-  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
+  if (((ST.hasReadM0MovRelInterpHazard() &&
+        (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
+       (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
+       (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
+       (ST.hasReadM0LdsDirectHazard() &&
+        MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
       checkReadM0Hazards(MI) > 0)
     return HazardType;
 
@@ -351,11 +356,11 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
   if (isRFE(MI->getOpcode()))
     return std::max(WaitStates, checkRFEHazards(MI));
 
-  if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
-                                           isSMovRel(MI->getOpcode())))
-    return std::max(WaitStates, checkReadM0Hazards(MI));
-
-  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
+  if ((ST.hasReadM0MovRelInterpHazard() &&
+       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
+      (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
+      (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
+      (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT)))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
   if (SIInstrInfo::isMAI(*MI))
@@ -1014,10 +1019,10 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
 
 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
   const SIInstrInfo *TII = ST.getInstrInfo();
-  const int SMovRelWaitStates = 1;
+  const int ReadM0WaitStates = 1;
   auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
-  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
-                                                   SMovRelWaitStates);
+  return ReadM0WaitStates -
+         getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
 }
 
 void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f6e1d9ca3c3f9..bc92b7292b077 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -931,6 +931,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
            getGeneration() <= AMDGPUSubtarget::GFX9;
   }
 
+  bool hasReadM0LdsDmaHazard() const {
+    return getGeneration() == AMDGPUSubtarget::GFX9;
+  }
+
+  bool hasReadM0LdsDirectHazard() const {
+    return getGeneration() == AMDGPUSubtarget::GFX9;
+  }
+
   bool hasVcmpxPermlaneHazard() const {
     return HasVcmpxPermlaneHazard;
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/hazard.mir b/llvm/test/CodeGen/AMDGPU/hazard.mir
index 5bc4c62569a25..1845009e2fe39 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard.mir
@@ -171,3 +171,27 @@ body: |
     S_SENDMSG 3, implicit $exec, implicit $m0
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: name: buffer_store_lds_dword
+# GCN:       $m0 = S_MOV_B32 0
+# GFX9-NEXT: S_NOP 0
+# GCN-NEXT:  BUFFER_STORE_LDS_DWORD
+---
+name: buffer_store_lds_dword
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: lds_direct_read_m0
+# GCN:       $m0 = S_MOV_B32 0
+# GFX9-NEXT: S_NOP 0
+# GCN-NEXT:  V_MOV_B32
+---
+name: lds_direct_read_m0
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    $vgpr0 = V_MOV_B32_e32 $lds_direct, implicit $exec, implicit $m0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir
new file mode 100644
index 0000000000000..a3dc15752fa1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir
@@ -0,0 +1,49 @@
+# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s
+
+# GCN-LABEL: name: buffer_load_dword_lds
+# GCN:      $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_ADDR64
+---
+name: buffer_load_dword_lds
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    BUFFER_LOAD_DWORD_LDS_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: buffer_store_lds_dword
+# GCN:      $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: BUFFER_STORE_LDS_DWORD
+---
+name: buffer_store_lds_dword
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: global_load_lds_dword
+# GCN:      $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: GLOBAL_LOAD_LDS_DWORD
+---
+name: global_load_lds_dword
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    GLOBAL_LOAD_LDS_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $m0
+...
+
+# GCN-LABEL: name: scratch_load_lds_dword
+# GCN:      $m0 = S_MOV_B32 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: SCRATCH_LOAD_LDS_DWORD
+---
+name: scratch_load_lds_dword
+body:             |
+  bb.0:
+    $m0 = S_MOV_B32 0
+    SCRATCH_LOAD_LDS_DWORD $vgpr2, 0, 0, implicit $exec, implicit $m0
+...


        


More information about the llvm-commits mailing list