[llvm] [AMDGPU] Merge consecutive wait_alu instruction (PR #128916)
Ana Mihajlovic via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 03:35:51 PST 2025
https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/128916
>From 2c77fd3998321dde8b0a221fc4ff544b014916dc Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:20:55 +0100
Subject: [PATCH 1/5] merge consecutive wait_alu instructions
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 22 ++++++++++++++
.../AMDGPU/merge-consecutive-wait-alus.mir | 30 +++++++++++++++++++
2 files changed, 52 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 4df55eac5d76b..bb15d12ada650 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -164,6 +164,21 @@ class AMDGPUWaitSGPRHazards {
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
}
+ unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+ unsigned Mask = Mask1 & Mask2;
+
+ Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ return Mask;
+ }
+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
@@ -362,6 +377,13 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
+ if (MI != MI->getParent()->begin()) {
+ MachineInstr &PrevMI = *std::prev(MI);
+ if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+ Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
+ PrevMI.eraseFromParent();
+ }
+ }
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(Mask);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
new file mode 100644
index 0000000000000..0cd203e6a9bbb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s
+
+
+---
+name: merge_consecutive_wait_alus
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+
+
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
>From 2eb2045b009a3332ea68931022e07ccd8f484828 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:30:41 +0100
Subject: [PATCH 2/5] update test
---
llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir | 6 ------
1 file changed, 6 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index 0cd203e6a9bbb..ff3c60d98da11 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -4,12 +4,6 @@
---
name: merge_consecutive_wait_alus
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
>From a6cfd55e53e0a88b4ba0afefd0fdab4d0975b1ff Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 27 Feb 2025 16:57:00 +0100
Subject: [PATCH 3/5] Added predecessor lookup, updated tests
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 33 ++-
.../AMDGPU/merge-consecutive-wait-alus.mir | 193 +++++++++++++++++-
2 files changed, 220 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index bb15d12ada650..d1d31e7e7ee68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -16,7 +16,13 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
+#include "llvm-c/Core.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/CFG.h"
+#include <iterator>
using namespace llvm;
@@ -165,6 +171,7 @@ class AMDGPUWaitSGPRHazards {
}
unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+ //this is enough to clear SA_SDST, VA_VCC, HOLD_CNT, VA_SSRC since they are 1-bit fields
unsigned Mask = Mask1 & Mask2;
Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
@@ -179,6 +186,15 @@ class AMDGPUWaitSGPRHazards {
return Mask;
}
+ MachineInstr* getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
+ auto PrevMI = std::prev(MI);
+ while (PrevMI != PrevMI->getParent()->instr_begin() &&
+ (PrevMI->isDebugInstr() || PrevMI->isMetaInstruction()))
+ --PrevMI;
+
+ return &(*PrevMI);
+ }
+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
@@ -377,13 +393,22 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
+ MachineInstr* PrevWaitAlu = nullptr;
if (MI != MI->getParent()->begin()) {
- MachineInstr &PrevMI = *std::prev(MI);
- if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
- Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
- PrevMI.eraseFromParent();
+ PrevWaitAlu = getPreviousWaitAlu(MI);
+ } else {
+ auto Preds = MBB.predecessors();
+ if (MBB.pred_size() == 1) {
+ auto &Pred = *Preds.begin();
+ auto PrevMI = Pred->instr_end();
+ PrevWaitAlu = getPreviousWaitAlu(PrevMI);
}
}
+
+ if (PrevWaitAlu != nullptr && PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR){
+ Mask = mergeMasks(Mask, PrevWaitAlu->getOperand(0).getImm());
+ PrevWaitAlu->eraseFromParent();
+ }
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(Mask);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index ff3c60d98da11..f9d9c64202cf7 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -18,7 +18,196 @@ body: |
S_WAITCNT_DEPCTR 65530
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
...
+---
+name: merge_consecutive_wait_alus_two_bb
+body: |
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ bb.0:
+ liveins: $vgpr0
+
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ bb.1:
+ liveins: $sgpr0
-## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-# CHECK: {{.*}}
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_implicit_def
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_implicit_def
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: $sgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ $sgpr0 = IMPLICIT_DEF
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_kill
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_kill
+ ; CHECK: KILL $sgpr0
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: KILL $sgpr0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ KILL $sgpr0
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ KILL $sgpr0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_cfi
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_cfi
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: CFI_INSTRUCTION undefined $sgpr0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ CFI_INSTRUCTION undefined $sgpr0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_eh_label
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_eh_label
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: EH_LABEL 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ EH_LABEL 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_gc_label
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_gc_label
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: GC_LABEL 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ GC_LABEL 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_dbg_value
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_dbg_value
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: DBG_VALUE 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ DBG_VALUE 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_dbg_label
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_dbg_label
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: DBG_LABEL 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ DBG_LABEL 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_lifetime_start
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_lifetime_start
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: LIFETIME_START 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ LIFETIME_START 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: skip_lifetime_end
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: skip_lifetime_end
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: LIFETIME_END 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ LIFETIME_END 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: merge_consecutive_wait_alus_two_bb_meta
+body: |
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb_meta
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: EH_LABEL 0
+ ; CHECK-NEXT: GC_LABEL 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ bb.0:
+ liveins: $vgpr0
+
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ EH_LABEL 0
+ GC_LABEL 0
+
+ bb.1:
+ liveins: $sgpr0
+
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
>From 2275d5a85d04ce21ea653ed472d61c1376bb42d3 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 27 Feb 2025 17:37:41 +0100
Subject: [PATCH 4/5] update merge mask function
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 23 ++++++++----
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 36 +++++++++++++++++++
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 18 ++++++++++
3 files changed, 71 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index d1d31e7e7ee68..e9f1b3bc09059 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -171,9 +171,13 @@ class AMDGPUWaitSGPRHazards {
}
unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
- //this is enough to clear SA_SDST, VA_VCC, HOLD_CNT, VA_SSRC since they are 1-bit fields
- unsigned Mask = Mask1 & Mask2;
-
+ unsigned Mask = 0xffff;
+ Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
@@ -183,10 +187,16 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
+ AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
return Mask;
}
- MachineInstr* getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
+ MachineInstr *getPreviousWaitAlu(MachineBasicBlock::instr_iterator &MI) {
auto PrevMI = std::prev(MI);
while (PrevMI != PrevMI->getParent()->instr_begin() &&
(PrevMI->isDebugInstr() || PrevMI->isMetaInstruction()))
@@ -393,7 +403,7 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
- MachineInstr* PrevWaitAlu = nullptr;
+ MachineInstr *PrevWaitAlu = nullptr;
if (MI != MI->getParent()->begin()) {
PrevWaitAlu = getPreviousWaitAlu(MI);
} else {
@@ -405,7 +415,8 @@ class AMDGPUWaitSGPRHazards {
}
}
- if (PrevWaitAlu != nullptr && PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR){
+ if (PrevWaitAlu != nullptr &&
+ PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
Mask = mergeMasks(Mask, PrevWaitAlu->getOperand(0).getImm());
PrevWaitAlu->eraseFromParent();
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6a92e54b69edc..84c16a84f0bcd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; }
/// \returns SaSdst bit shift
inline unsigned getSaSdstBitShift() { return 0; }
+/// \returns VaSsrc width
+inline unsigned getVaSsrcBitWidth() { return 1; }
+
+/// \returns VaSsrc bit shift
+inline unsigned getVaSsrcBitShift() { return 8; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntWidth() { return 1; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntBitShift() { return 7; }
+
} // end anonymous namespace
namespace llvm {
@@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) {
return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
}
+unsigned decodeFieldVaSsrc(unsigned Encoded) {
+ return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned decodeFieldHoldCnt(unsigned Encoded) {
+ return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
}
@@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) {
return encodeFieldVaVcc(0xffff, VaVcc);
}
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
+ return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
+ return encodeFieldVaSsrc(0xfff, VaSsrc);
+}
+
+unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
+ return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
+unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
+ return encodeFieldHoldCnt(0xfff, HoldCnt);
+}
+
} // namespace DepCtr
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 103993e6435de..edf0f478252eb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1181,6 +1181,12 @@ unsigned decodeFieldVaSdst(unsigned Encoded);
/// \returns Decoded VaVcc from given immediate \p Encoded.
unsigned decodeFieldVaVcc(unsigned Encoded);
+/// \returns Decoded SaSrc from given immediate \p Encoded.
+unsigned decodeFieldVaSsrc(unsigned Encoded);
+
+/// \returns Decoded HoldCnt from given immediate \p Encoded.
+unsigned decodeFieldHoldCnt(unsigned Encoded);
+
/// \returns \p VmVsrc as an encoded Depctr immediate.
unsigned encodeFieldVmVsrc(unsigned VmVsrc);
@@ -1211,6 +1217,18 @@ unsigned encodeFieldVaVcc(unsigned VaVcc);
/// \returns \p Encoded combined with encoded \p VaVcc.
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
+/// \returns \p HoldCnt as an encoded Depctr immediate.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt);
+
+/// \returns \p Encoded combined with encoded \p HoldCnt.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
+
+/// \returns \p VaSsrc as an encoded Depctr immediate.
+unsigned encodeFieldVaSsrc(unsigned VaSsrc);
+
+/// \returns \p Encoded combined with encoded \p VaSsrc.
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
+
} // namespace DepCtr
namespace Exp {
>From 7ee5f39c6645f751ce76643dff6ab1e8bc9e9ed3 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Fri, 28 Feb 2025 12:35:02 +0100
Subject: [PATCH 5/5] update mask instead of creating new instruction, update
test, remove unnecessary includes
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 15 +-
.../AMDGPU/merge-consecutive-wait-alus.mir | 157 +-----------------
2 files changed, 9 insertions(+), 163 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index e9f1b3bc09059..261f6cfcb0cd8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -16,13 +16,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
-#include "llvm-c/Core.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/ilist_iterator.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/CFG.h"
-#include <iterator>
using namespace llvm;
@@ -410,15 +404,18 @@ class AMDGPUWaitSGPRHazards {
auto Preds = MBB.predecessors();
if (MBB.pred_size() == 1) {
auto &Pred = *Preds.begin();
- auto PrevMI = Pred->instr_end();
- PrevWaitAlu = getPreviousWaitAlu(PrevMI);
+ if (!Pred->empty()) {
+ auto PrevMI = Pred->instr_end();
+ PrevWaitAlu = getPreviousWaitAlu(PrevMI);
+ }
}
}
if (PrevWaitAlu != nullptr &&
PrevWaitAlu->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
Mask = mergeMasks(Mask, PrevWaitAlu->getOperand(0).getImm());
- PrevWaitAlu->eraseFromParent();
+ PrevWaitAlu->getOperand(0).setImm(Mask);
+ continue;
}
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index f9d9c64202cf7..646c0202aeaf5 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -27,11 +27,11 @@ body: |
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
bb.0:
liveins: $vgpr0
@@ -45,169 +45,18 @@ body: |
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
...
---
-name: skip_implicit_def
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_implicit_def
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: $sgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- $sgpr0 = IMPLICIT_DEF
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_kill
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_kill
- ; CHECK: KILL $sgpr0
- ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: KILL $sgpr0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- KILL $sgpr0
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- KILL $sgpr0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_cfi
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_cfi
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: CFI_INSTRUCTION undefined $sgpr0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- CFI_INSTRUCTION undefined $sgpr0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_eh_label
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_eh_label
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: EH_LABEL 0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- EH_LABEL 0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_gc_label
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_gc_label
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: GC_LABEL 0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- GC_LABEL 0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_dbg_value
+name: skip_meta_instruction
machineFunctionInfo:
body: |
bb.0:
- ; CHECK-LABEL: name: skip_dbg_value
+ ; CHECK-LABEL: name: skip_meta_instruction
; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: DBG_VALUE 0
; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- DBG_VALUE 0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_dbg_label
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_dbg_label
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: DBG_LABEL 0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- DBG_LABEL 0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_lifetime_start
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_lifetime_start
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: LIFETIME_START 0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- LIFETIME_START 0
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
----
-name: skip_lifetime_end
-machineFunctionInfo:
-body: |
- bb.0:
- ; CHECK-LABEL: name: skip_lifetime_end
- ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
; CHECK-NEXT: LIFETIME_END 0
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
S_WAITCNT_DEPCTR 65530
LIFETIME_END 0
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
...
----
-name: merge_consecutive_wait_alus_two_bb_meta
-body: |
- ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb_meta
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
- ; CHECK-NEXT: EH_LABEL 0
- ; CHECK-NEXT: GC_LABEL 0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
- ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
- bb.0:
- liveins: $vgpr0
-
- renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
- S_WAITCNT_DEPCTR 65530
- EH_LABEL 0
- GC_LABEL 0
- bb.1:
- liveins: $sgpr0
-
- renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
-...
More information about the llvm-commits
mailing list