[llvm] [AMDGPU] Support GFX12 VDSDIR instructions WAITVMSRC operand in GCNHazardRecognizer (PR #77628)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 10 09:26:27 PST 2024


https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/77628

Modify GCNHazardRecognizer::fixLdsDirectVMEMHazard() so the waitvsrc operand
in gfx12 DS_PARAM_LOAD or DS_DIRECT_LOAD instructions is set appropriately
depending on whether a hazard is found or not, rather than inserting an
S_WAITCNT_DEPCTR instruction if a hazard needs to be mitigated.


>From 37d5d3e40cd60401988c9c3f67fc9ed75d21714b Mon Sep 17 00:00:00 2001
From: Stephen Thomas <Stephen.Thomas at amd.com>
Date: Tue, 17 Jan 2023 13:26:29 +0000
Subject: [PATCH] [AMDGPU] Support GFX12 VDSDIR instructions WAITVMSRC operand
 in GCNHazardRecognizer

Modify GCNHazardRecognizer::fixLdsDirectVMEMHazard() so the waitvsrc operand
in gfx12 DS_PARAM_LOAD or DS_DIRECT_LOAD instructions is set appropriately
depending on whether a hazard is found or not, rather than inserting an
S_WAITCNT_DEPCTR instruction if a hazard needs to be mitigated.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  17 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   2 +
 ...zards.mir => lds-direct-hazards-gfx11.mir} |   0
 .../AMDGPU/lds-direct-hazards-gfx12.mir       | 391 ++++++++++++++++++
 4 files changed, 405 insertions(+), 5 deletions(-)
 rename llvm/test/CodeGen/AMDGPU/{lds-direct-hazards.mir => lds-direct-hazards-gfx11.mir} (100%)
 create mode 100644 llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a7d8ff0242b801..bcd93e30d6c2d1 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
       return false;
     return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
   };
-  auto IsExpiredFn = [](const MachineInstr &I, int) {
+  bool LdsdirCanWait = ST.hasLdsWaitVMSRC();
+  auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) {
     return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
            (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
            (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
-            AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
+            AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) ||
+           (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) &&
+            !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm());
   };
 
   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
       std::numeric_limits<int>::max())
     return false;
 
-  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
-          TII.get(AMDGPU::S_WAITCNT_DEPCTR))
-      .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+  if (LdsdirCanWait) {
+    TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0);
+  } else {
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+            TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+        .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+  }
 
   return true;
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f6f37f5170a403..85d062a9a6f5e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1128,6 +1128,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasLdsDirect() const { return getGeneration() >= GFX11; }
 
+  bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
+
   bool hasVALUPartialForwardingHazard() const {
     return getGeneration() >= GFX11;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir
rename to llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir
diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir
new file mode 100644
index 00000000000000..1ef6ce88e61106
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir
@@ -0,0 +1,391 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name:            lds_param_load_no_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_no_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_va_vdst0_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_va_vdst0_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_va_vdst0_war_salu
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_va_vdst0_war_salu
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $m0 = S_MOV_B32 killed $sgpr0
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $m0 = S_MOV_B32 killed $sgpr0
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_va_vdst1_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_va_vdst1_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 1, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_va_vdst10_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_va_vdst10_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 10, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_va_vdst10_waw
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_va_vdst10_waw
+    ; GCN: $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 10, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_va_vdst20_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_va_vdst20_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_valu_war_trans
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_valu_war_trans
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_trans_war_valu
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_trans_war_valu
+    ; GCN: $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_valu_war_vmem
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_valu_war_vmem
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_valu_war_lds
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_valu_war_lds
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_valu_war_ldsdir
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_valu_war_ldsdir
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = DS_PARAM_LOAD 0, 1, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 4, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr10 = DS_PARAM_LOAD 0, 1, 15, 1, implicit $m0, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 4, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_vmem_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_vmem_war
+    ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_vmem_war_valu
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_vmem_war_valu
+    ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+    ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_vmem_war_exp
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_vmem_war_exp
+    ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+    ; GCN-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_vmem_war_waitcnt_depctr
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr
+    ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    S_WAITCNT_DEPCTR 65507
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_param_load_vmem_war_waitcnt_depctr2
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2
+    ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 65535
+    ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    S_WAITCNT_DEPCTR 65535
+    $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_direct_load_no_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_direct_load_no_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_DIRECT_LOAD 15, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr1 = DS_DIRECT_LOAD 0, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            lds_direct_load_va_vdst0_war
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: lds_direct_load_va_vdst0_war
+    ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr1 = DS_DIRECT_LOAD 0, 1, implicit $m0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr1 = DS_DIRECT_LOAD 15, 1, implicit $m0, implicit $exec
+    S_ENDPGM 0
+...



More information about the llvm-commits mailing list