[llvm] [AMDGPU] Correctly insert s_nops for implicit read of SDWA (PR #100276)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 14:34:24 PDT 2024


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/100276

>From 92f9ef7f238ed8e2d06f148fe6e7607fdd573794 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 23 Jul 2024 15:14:14 -0700
Subject: [PATCH 1/3] [AMDGPU] Correctly insert s_nops for implicit read of
 SDWA

Change-Id: I4e22bb3764705f328827eb64704720a0d6aa1a9b
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 24 +++++++++
 .../AMDGPU/sdwa-dst-preserve-hazard.mir       | 54 +++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/sdwa-dst-preserve-hazard.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a402fc6d7e611..570fcf63587fd 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -935,6 +935,30 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
           if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
             return true;
         }
+
+        // If the non-impacted bits of a sdwa dst are preserved, then we
+        // read them and must resolve the hazard. SDWA dst preserve is modelled
+        // as a tied-def implicit use
+        if (SIInstrInfo::isSDWA(*VALU)) {
+          if (auto *DstSel =
+                  TII->getNamedOperand(*VALU, AMDGPU::OpName::dst_sel)) {
+            if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
+              return false;
+            if (auto *ThisDst =
+                    TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
+              if (!TRI->regsOverlap(Def, ThisDst->getReg()))
+                return false;
+              for (const MachineOperand &ImplicitOp :
+                   VALU->implicit_operands()) {
+                if (ImplicitOp.isDef())
+                  continue;
+                if (ImplicitOp.isReg() &&
+                    TRI->regsOverlap(Def, ImplicitOp.getReg()))
+                  return true;
+              }
+            }
+          }
+        }
       }
 
       return false;
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-dst-preserve-hazard.mir b/llvm/test/CodeGen/AMDGPU/sdwa-dst-preserve-hazard.mir
new file mode 100644
index 0000000000000..ec3b61c4776ee
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-dst-preserve-hazard.mir
@@ -0,0 +1,54 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s
+
+---
+name:            hazard_vcmpx_sdwa_permlane16
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+    ; HAZARD-LABEL: name: hazard_vcmpx_sdwa_permlane16
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr7 = COPY $vgpr6, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr6 = COPY $vgpr5, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+    ; HAZARD-NEXT: KILL killed renamable $vgpr2, renamable $vgpr3
+    ; HAZARD-NEXT: KILL killed renamable $vgpr0, renamable $vgpr1
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: hazard_vcmpx_sdwa_permlane16
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr7 = COPY $vgpr6, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr6 = COPY $vgpr5, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+    ; NOHAZARD-NEXT: KILL killed renamable $vgpr2, renamable $vgpr3
+    ; NOHAZARD-NEXT: KILL killed renamable $vgpr0, renamable $vgpr1
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr7 = COPY $vgpr6, implicit $exec
+  renamable $vgpr6 = COPY $vgpr5, implicit $exec
+  renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+  renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+  KILL killed renamable $vgpr2, renamable $vgpr3
+  KILL killed renamable $vgpr0, renamable $vgpr1
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+  renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+  renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+  S_ENDPGM 0
+...

>From c6b2c2c0bbcf8d3ae9f1b633db37d7b0fa51ee1a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 24 Jul 2024 12:36:40 -0700
Subject: [PATCH 2/3] Handle opsel case + address review comments

Change-Id: I131dbb24762a53f31af0aa685c48b6c14233bf3d
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  36 +++---
 llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir   | 103 ++++++++++++++++++
 2 files changed, 116 insertions(+), 23 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 570fcf63587fd..66349e9527725 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -875,6 +875,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
     return DataIdx >= 0 &&
            TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
   };
+
   int WaitStatesNeededForDef =
     VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
@@ -931,33 +932,22 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
       if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
         Register Def = Dst->getReg();
 
-        for (const MachineOperand &Use : VALU->explicit_uses()) {
+        for (const MachineOperand &Use : VALU->all_uses()) {
           if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
             return true;
         }
 
-        // If the non-impacted bits of a sdwa dst are preserved, then we
-        // read them and must resolve the hazard. SDWA dst preserve is modelled
-        // as a tied-def implicit use
-        if (SIInstrInfo::isSDWA(*VALU)) {
-          if (auto *DstSel =
-                  TII->getNamedOperand(*VALU, AMDGPU::OpName::dst_sel)) {
-            if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
-              return false;
-            if (auto *ThisDst =
-                    TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
-              if (!TRI->regsOverlap(Def, ThisDst->getReg()))
-                return false;
-              for (const MachineOperand &ImplicitOp :
-                   VALU->implicit_operands()) {
-                if (ImplicitOp.isDef())
-                  continue;
-                if (ImplicitOp.isReg() &&
-                    TRI->regsOverlap(Def, ImplicitOp.getReg()))
-                  return true;
-              }
-            }
-          }
+        // We also read the dst for sub 32 writes to the same register for ECC
+        if (auto *ThisDst = TII->getNamedOperand(*VALU, AMDGPU::OpName::vdst)) {
+          Register ThisDef = ThisDst->getReg();
+          if (!TRI->regsOverlap(Def, ThisDef))
+            return false;
+          if (AMDGPU::hasNamedOperand(VALU->getOpcode(),
+                                      AMDGPU::OpName::op_sel) &&
+              TII->getNamedOperand(*VALU, AMDGPU::OpName::src0_modifiers)
+                      ->getImm() &
+                  SISrcMods::DST_OP_SEL)
+            return true;
         }
       }
 
diff --git a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
new file mode 100644
index 0000000000000..a8a26b49a57e9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
@@ -0,0 +1,103 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s
+
+---
+name:            sdwa_opsel_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: sdwa_opsel_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: sdwa_opsel_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            opsel_sdwa_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: opsel_sdwa_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: opsel_sdwa_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  S_ENDPGM 0
+...
+
+---
+name:            opsel_opsel_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: opsel_opsel_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: opsel_opsel_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            sdwa_sdwa_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: sdwa_sdwa_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: sdwa_sdwa_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  S_ENDPGM 0
+...

>From 86c8316e99a9e5ccdca8d32e2d92890062abafd2 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 24 Jul 2024 13:59:08 -0700
Subject: [PATCH 3/3] Add minimal support for INLINEASM correctness

Change-Id: Icbe306650f5513787db77acd49eedbb08d8f2149
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  70 ++++--
 llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir   | 221 ++++++++++++++++++
 2 files changed, 275 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 66349e9527725..c87cdd043b889 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -883,6 +883,39 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
   return WaitStatesNeeded;
 }
 
+static const MachineOperand *
+getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  if (SIInstrInfo::isVALU(MI) && SIInstrInfo::isSDWA(MI)) {
+    if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
+      if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
+        return nullptr;
+  } else if (SIInstrInfo::isVALU(MI)) {
+    if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) ||
+        !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+          SISrcMods::DST_OP_SEL))
+      return nullptr;
+  }
+
+  const MachineOperand *Dst = nullptr;
+
+  if (SIInstrInfo::isVALU(MI))
+    Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+  // Assume inline asm has dst forwarding hazard
+  else if (MI.isInlineAsm()) {
+    for (auto &Op :
+         llvm::drop_begin(MI.operands(), InlineAsm::MIOp_FirstOperand)) {
+      if (Op.isReg() && Op.isDef()) {
+        Dst = &Op;
+        break;
+      }
+    }
+  }
+
+  return Dst;
+}
+
 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
   int WaitStatesNeeded = 0;
 
@@ -914,22 +947,10 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
     const int Shift16DefWaitstates = 1;
 
     auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
-      if (!SIInstrInfo::isVALU(MI))
-        return false;
       const SIInstrInfo *TII = ST.getInstrInfo();
-      if (SIInstrInfo::isSDWA(MI)) {
-        if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
-          if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
-            return false;
-      } else {
-        if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) ||
-            !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
-                  ->getImm() &
-              SISrcMods::DST_OP_SEL))
-          return false;
-      }
       const SIRegisterInfo *TRI = ST.getRegisterInfo();
-      if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
+      const MachineOperand *Dst = getDstSelForwardingOperand(MI, ST);
+      if (Dst) {
         Register Def = Dst->getReg();
 
         for (const MachineOperand &Use : VALU->all_uses()) {
@@ -950,7 +971,6 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
             return true;
         }
       }
-
       return false;
     };
 
@@ -1043,7 +1063,7 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
   // problematic thus far.
 
   // see checkVALUHazards()
-  if (!ST.has12DWordStoreHazard())
+  if (!ST.has12DWordStoreHazard() && !ST.hasDstSelForwardingHazard())
     return 0;
 
   const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1054,6 +1074,24 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
     if (Op.isReg() && Op.isDef()) {
       WaitStatesNeeded =
           std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
+
+      if (!TRI.isVectorRegister(MRI, Op.getReg()))
+        continue;
+
+      if (ST.hasDstSelForwardingHazard()) {
+        const int Shift16DefWaitstates = 1;
+
+        auto IsShift16BitDefFn = [this](const MachineInstr &MI) {
+          const MachineOperand *Dst = getDstSelForwardingOperand(MI, ST);
+          // Assume inline asm reads the dst
+          return Dst ? true : false;
+        };
+
+        int WaitStatesNeededForDef =
+            Shift16DefWaitstates -
+            getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
+        WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
index a8a26b49a57e9..55dfa8cb64009 100644
--- a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
@@ -27,6 +27,30 @@ body:            |
   S_ENDPGM 0
 ...
 
+---
+name:            sdwa_no_opsel_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: sdwa_no_opsel_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: sdwa_no_opsel_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
 ---
 name:            opsel_sdwa_hazard
 body:            |
@@ -52,6 +76,54 @@ body:            |
   S_ENDPGM 0
 ...
 
+---
+name:            opsel_no_sdwa_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: opsel_no_sdwa_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: opsel_no_sdwa_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            no_opsel_sdwa_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: no_opsel_sdwa_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: no_opsel_sdwa_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  S_ENDPGM 0
+...
+
 ---
 name:            opsel_opsel_hazard
 body:            |
@@ -77,6 +149,54 @@ body:            |
   S_ENDPGM 0
 ...
 
+---
+name:            opsel_no_opsel_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: opsel_no_opsel_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: opsel_no_opsel_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            no_opsel_opsel_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: no_opsel_opsel_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: no_opsel_opsel_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 12, killed $vgpr3, 8, killed $vgpr4, 8, killed $vgpr2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  renamable $vgpr0 = nofpexcept V_PK_FMA_F16 0, killed $vgpr1, 8, killed $vgpr4, 8, killed $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
 ---
 name:            sdwa_sdwa_hazard
 body:            |
@@ -101,3 +221,104 @@ body:            |
   renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
   S_ENDPGM 0
 ...
+
+---
+name:            sdwa_nosdwa_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: sdwa_nosdwa_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: sdwa_nosdwa_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            inline_sdwa_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: inline_sdwa_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: inline_sdwa_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  S_ENDPGM 0
+...
+
+---
+name:            sdwa_inline_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: sdwa_inline_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: sdwa_inline_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+  S_ENDPGM 0
+...
+
+
+---
+name:            inline_inline_hazard
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+    ; HAZARD-LABEL: name: inline_inline_hazard
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: inline_inline_hazard
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+  INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+  S_ENDPGM 0
+...
+



More information about the llvm-commits mailing list