[llvm] [AMDGPU] Merge consecutive wait_alu instruction (PR #128916)

Ana Mihajlovic via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 28 03:35:51 PST 2025


https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/128916

>From 2c77fd3998321dde8b0a221fc4ff544b014916dc Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:20:55 +0100
Subject: [PATCH 1/5] merge consecutive wait_alu instructions

---
 .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp   | 22 ++++++++++++++
 .../AMDGPU/merge-consecutive-wait-alus.mir    | 30 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 4df55eac5d76b..bb15d12ada650 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -164,6 +164,21 @@ class AMDGPUWaitSGPRHazards {
       BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
   }
 
+  unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+    unsigned Mask = Mask1 & Mask2;
+
+    Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+    return Mask;
+  }
+
   bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
     enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
 
@@ -362,6 +377,13 @@ class AMDGPUWaitSGPRHazards {
           Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
         }
         if (Emit) {
+          if (MI != MI->getParent()->begin()) {
+            MachineInstr &PrevMI = *std::prev(MI);
+            if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+              Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
+              PrevMI.eraseFromParent();
+            }
+          }
           auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
                                TII->get(AMDGPU::S_WAITCNT_DEPCTR))
                            .addImm(Mask);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
new file mode 100644
index 0000000000000..0cd203e6a9bbb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o -  %s | FileCheck %s
+
+
+---
+name: merge_consecutive_wait_alus
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: merge_consecutive_wait_alus
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+
+
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}

>From 2eb2045b009a3332ea68931022e07ccd8f484828 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:30:41 +0100
Subject: [PATCH 2/5] update test

---
 llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index 0cd203e6a9bbb..ff3c60d98da11 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -4,12 +4,6 @@
 
 ---
 name: merge_consecutive_wait_alus
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $vgpr0

>From a6cfd55e53e0a88b4ba0afefd0fdab4d0975b1ff Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 27 Feb 2025 16:57:00 +0100
Subject: [PATCH 3/5] Added predecessor lookup, updated tests

---
 .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp   |  33 ++-
 .../AMDGPU/merge-consecutive-wait-alus.mir    | 193 +++++++++++++++++-
 2 files changed, 220 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index bb15d12ada650..d1d31e7e7ee68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -16,7 +16,13 @@
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
+#include "llvm-c/Core.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/CFG.h"
+#include <iterator>
 
 using namespace llvm;
 
@@ -165,6 +171,7 @@ class AMDGPUWaitSGPRHazards {
   }
 
   unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+    //this is enough to clear SA_SDST, VA_VCC, HOLD_CNT, VA_SSRC since they are 1-bit fields
     unsigned Mask = Mask1 & Mask2;
 
     Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
@@ -179,6 +186,15 @@ class AMDGPUWaitSGPRHazards {
     return Mask;
   }
 
+  MachineInstr* getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
+    auto PrevMI = std::prev(MI);
+    while (PrevMI != PrevMI->getParent()->instr_begin() &&
+           (PrevMI->isDebugInstr() || PrevMI->isMetaInstruction()))
+      --PrevMI;
+
+    return &(*PrevMI);
+  }
+
   bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
     enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
 
@@ -377,13 +393,22 @@ class AMDGPUWaitSGPRHazards {
           Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
         }
         if (Emit) {
+          MachineInstr* PrevWaitAlu = nullptr;
           if (MI != MI->getParent()->begin()) {
-            MachineInstr &PrevMI = *std::prev(MI);
-            if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
-              Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
-              PrevMI.eraseFromParent();
+            PrevWaitAlu = getPreviousWaitAlu(MI);
+          } else {
+            auto Preds = MBB.predecessors();
+            if (MBB.pred_size() == 1) {
+              auto &Pred = *Preds.begin();
+              auto PrevMI = Pred->instr_end();
+              PrevWaitAlu = getPreviousWaitAlu(PrevMI);
             }
           }
+
+          if (PrevWaitAlu != nullptr && PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR){
+            Mask = mergeMasks(Mask, PrevWaitAlu->getOperand(0).getImm());
+            PrevWaitAlu->eraseFromParent();
+          }
           auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
                                TII->get(AMDGPU::S_WAITCNT_DEPCTR))
                            .addImm(Mask);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index ff3c60d98da11..f9d9c64202cf7 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -18,7 +18,196 @@ body:             |
     S_WAITCNT_DEPCTR 65530
     renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
 ...
+---
+name: merge_consecutive_wait_alus_two_bb
+body:             |
+  ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT_DEPCTR 61946
+  ; CHECK-NEXT:   renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+  bb.0:
+    liveins: $vgpr0
+
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
 
+  bb.1:
+    liveins: $sgpr0
 
-## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-# CHECK: {{.*}}
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_implicit_def
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_implicit_def
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    $sgpr0 = IMPLICIT_DEF
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_kill
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_kill
+    ; CHECK: KILL $sgpr0
+    ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: KILL $sgpr0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    KILL $sgpr0
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    KILL $sgpr0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_cfi
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_cfi
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: CFI_INSTRUCTION undefined $sgpr0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    CFI_INSTRUCTION undefined $sgpr0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_eh_label
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_eh_label
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: EH_LABEL 0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    EH_LABEL 0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_gc_label
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_gc_label
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: GC_LABEL 0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    GC_LABEL 0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_dbg_value
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_dbg_value
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: DBG_VALUE 0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    DBG_VALUE 0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_dbg_label
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_dbg_label
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: DBG_LABEL 0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    DBG_LABEL 0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_lifetime_start
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_lifetime_start
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: LIFETIME_START 0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    LIFETIME_START 0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_lifetime_end
+machineFunctionInfo:
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: skip_lifetime_end
+    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+    ; CHECK-NEXT: LIFETIME_END 0
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    LIFETIME_END 0
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: merge_consecutive_wait_alus_two_bb_meta
+body:             |
+  ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb_meta
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+  ; CHECK-NEXT:   EH_LABEL 0
+  ; CHECK-NEXT:   GC_LABEL 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT_DEPCTR 61946
+  ; CHECK-NEXT:   renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+  bb.0:
+    liveins: $vgpr0
+
+    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+    S_WAITCNT_DEPCTR 65530
+    EH_LABEL 0
+    GC_LABEL 0
+
+  bb.1:
+    liveins: $sgpr0
+
+    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...

>From 2275d5a85d04ce21ea653ed472d61c1376bb42d3 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 27 Feb 2025 17:37:41 +0100
Subject: [PATCH 4/5] update merge mask function

---
 .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp   | 23 ++++++++----
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 36 +++++++++++++++++++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 18 ++++++++++
 3 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index d1d31e7e7ee68..e9f1b3bc09059 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -171,9 +171,13 @@ class AMDGPUWaitSGPRHazards {
   }
 
   unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
-    //this is enough to clear SA_SDST, VA_VCC, HOLD_CNT, VA_SSRC since they are 1-bit fields
-    unsigned Mask = Mask1 & Mask2;
-
+    unsigned Mask = 0xffff;
+    Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
+                       AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
     Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
         Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
                        AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
@@ -183,10 +187,16 @@ class AMDGPUWaitSGPRHazards {
     Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
         Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
                        AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
+                       AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
+    Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
+        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
+                       AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
     return Mask;
   }
 
-  MachineInstr* getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
+  MachineInstr *getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
     auto PrevMI = std::prev(MI);
     while (PrevMI != PrevMI->getParent()->instr_begin() &&
            (PrevMI->isDebugInstr() || PrevMI->isMetaInstruction()))
@@ -393,7 +403,7 @@ class AMDGPUWaitSGPRHazards {
           Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
         }
         if (Emit) {
-          MachineInstr* PrevWaitAlu = nullptr;
+          MachineInstr *PrevWaitAlu = nullptr;
           if (MI != MI->getParent()->begin()) {
             PrevWaitAlu = getPreviousWaitAlu(MI);
           } else {
@@ -405,7 +415,8 @@ class AMDGPUWaitSGPRHazards {
             }
           }
 
-          if (PrevWaitAlu != nullptr && PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR){
+          if (PrevWaitAlu != nullptr &&
+              PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
             Mask = mergeMasks(Mask, PrevWaitAlu->getOperand(0).getImm());
             PrevWaitAlu->eraseFromParent();
           }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6a92e54b69edc..84c16a84f0bcd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; }
 /// \returns SaSdst bit shift
 inline unsigned getSaSdstBitShift() { return 0; }
 
+/// \returns VaSsrc width
+inline unsigned getVaSsrcBitWidth() { return 1; }
+
+/// \returns VaSsrc bit shift
+inline unsigned getVaSsrcBitShift() { return 8; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntWidth() { return 1; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntBitShift() { return 7; }
+
 } // end anonymous namespace
 
 namespace llvm {
@@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) {
   return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
 }
 
+unsigned decodeFieldVaSsrc(unsigned Encoded) {
+  return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned decodeFieldHoldCnt(unsigned Encoded) {
+  return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
   return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
 }
@@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) {
   return encodeFieldVaVcc(0xffff, VaVcc);
 }
 
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
+  return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
+  return encodeFieldVaSsrc(0xfff, VaSsrc);
+}
+
+unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
+  return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
+unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
+  return encodeFieldHoldCnt(0xfff, HoldCnt);
+}
+
 } // namespace DepCtr
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 103993e6435de..edf0f478252eb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1181,6 +1181,12 @@ unsigned decodeFieldVaSdst(unsigned Encoded);
 /// \returns Decoded VaVcc from given immediate \p Encoded.
 unsigned decodeFieldVaVcc(unsigned Encoded);
 
+/// \returns Decoded SaSrc from given immediate \p Encoded.
+unsigned decodeFieldVaSsrc(unsigned Encoded);
+
+/// \returns Decoded HoldCnt from given immediate \p Encoded.
+unsigned decodeFieldHoldCnt(unsigned Encoded);
+
 /// \returns \p VmVsrc as an encoded Depctr immediate.
 unsigned encodeFieldVmVsrc(unsigned VmVsrc);
 
@@ -1211,6 +1217,18 @@ unsigned encodeFieldVaVcc(unsigned VaVcc);
 /// \returns \p Encoded combined with encoded \p VaVcc.
 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
 
+/// \returns \p HoldCnt as an encoded Depctr immediate.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt);
+
+/// \returns \p Encoded combined with encoded \p HoldCnt.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
+
+/// \returns \p VaSsrc as an encoded Depctr immediate.
+unsigned encodeFieldVaSsrc(unsigned VaSsrc);
+
+/// \returns \p Encoded combined with encoded \p VaSsrc.
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
+
 } // namespace DepCtr
 
 namespace Exp {

>From 7ee5f39c6645f751ce76643dff6ab1e8bc9e9ed3 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Fri, 28 Feb 2025 12:35:02 +0100
Subject: [PATCH 5/5] update mask instead of creating new instruction, update
 test, remove unnecessary includes

---
 .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp   |  15 +-
 .../AMDGPU/merge-consecutive-wait-alus.mir    | 157 +-----------------
 2 files changed, 9 insertions(+), 163 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index e9f1b3bc09059..261f6cfcb0cd8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -16,13 +16,7 @@
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
-#include "llvm-c/Core.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/ilist_iterator.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/CFG.h"
-#include <iterator>
 
 using namespace llvm;
 
@@ -410,15 +404,18 @@ class AMDGPUWaitSGPRHazards {
             auto Preds = MBB.predecessors();
             if (MBB.pred_size() == 1) {
               auto &Pred = *Preds.begin();
-              auto PrevMI = Pred->instr_end();
-              PrevWaitAlu = getPreviousWaitAlu(PrevMI);
+              if (!Pred->empty()) {
+                auto PrevMI = Pred->instr_end();
+                PrevWaitAlu = getPreviousWaitAlu(PrevMI);
+              }
             }
           }
 
           if (PrevWaitAlu != nullptr &&
               PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
             Mask = mergeMasks(Mask, PrevWaitAlu->getOperand(0).getImm());
-            PrevWaitAlu->eraseFromParent();
+            PrevWaitAlu->getOperand(0).setImm(Mask);
+            continue;
           }
           auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
                                TII->get(AMDGPU::S_WAITCNT_DEPCTR))
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index f9d9c64202cf7..646c0202aeaf5 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -27,11 +27,11 @@ body:             |
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+  ; CHECK-NEXT:   S_WAITCNT_DEPCTR 61946
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   liveins: $sgpr0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_WAITCNT_DEPCTR 61946
   ; CHECK-NEXT:   renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
   bb.0:
     liveins: $vgpr0
@@ -45,169 +45,18 @@ body:             |
     renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
 ...
 ---
-name: skip_implicit_def
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_implicit_def
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: $sgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    $sgpr0 = IMPLICIT_DEF
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_kill
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_kill
-    ; CHECK: KILL $sgpr0
-    ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: KILL $sgpr0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    KILL $sgpr0
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    KILL $sgpr0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_cfi
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_cfi
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: CFI_INSTRUCTION undefined $sgpr0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    CFI_INSTRUCTION undefined $sgpr0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_eh_label
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_eh_label
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: EH_LABEL 0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    EH_LABEL 0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_gc_label
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_gc_label
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: GC_LABEL 0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    GC_LABEL 0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_dbg_value
+name: skip_meta_instruction
 machineFunctionInfo:
 body: |
   bb.0:
-    ; CHECK-LABEL: name: skip_dbg_value
+    ; CHECK-LABEL: name: skip_meta_instruction
     ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: DBG_VALUE 0
     ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    DBG_VALUE 0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_dbg_label
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_dbg_label
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: DBG_LABEL 0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    DBG_LABEL 0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_lifetime_start
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_lifetime_start
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-    ; CHECK-NEXT: LIFETIME_START 0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
-    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    LIFETIME_START 0
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_lifetime_end
-machineFunctionInfo:
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: skip_lifetime_end
-    ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
     ; CHECK-NEXT: LIFETIME_END 0
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
     ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
     renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
     S_WAITCNT_DEPCTR 65530
     LIFETIME_END 0
     renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
 ...
----
-name: merge_consecutive_wait_alus_two_bb_meta
-body:             |
-  ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb_meta
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
-  ; CHECK-NEXT:   EH_LABEL 0
-  ; CHECK-NEXT:   GC_LABEL 0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $sgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_WAITCNT_DEPCTR 61946
-  ; CHECK-NEXT:   renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
-  bb.0:
-    liveins: $vgpr0
-
-    renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
-    S_WAITCNT_DEPCTR 65530
-    EH_LABEL 0
-    GC_LABEL 0
 
-  bb.1:
-    liveins: $sgpr0
-
-    renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...



More information about the llvm-commits mailing list