[llvm] [AMDGPU] New AMDGPUInsertSingleUseVDST pass (PR #72388)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 15 05:48:26 PST 2023
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/72388
Add support for emitting GFX11.5 s_singleuse_vdst instructions. This is
a power saving feature whereby the compiler can annotate VALU
instructions whose results are known to have only a single use, so the
hardware can in some cases avoid writing the result back to VGPR RAM.
To begin with the pass is disabled by default because of one missing
feature: we need an exclusion list of opcodes that never qualify as
single-use producers and/or consumers. A future patch will implement
this and enable the pass by default.
>From 0e03b4b6ccaa47ea5deeb3167a92025dc46b50b7 Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Wed, 26 Jul 2023 12:05:25 +0100
Subject: [PATCH] [AMDGPU] New AMDGPUInsertSingleUseVDST pass
Add support for emitting GFX11.5 s_singleuse_vdst instructions. This is
a power saving feature whereby the compiler can annotate VALU
instructions whose results are known to have only a single use, so the
hardware can in some cases avoid writing the result back to VGPR RAM.
To begin with the pass is disabled by default because of one missing
feature: we need an exclusion list of opcodes that never qualify as
single-use producers and/or consumers. A future patch will implement
this and enable the pass by default.
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +
.../AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 120 ++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 10 +
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
.../CodeGen/AMDGPU/insert-singleuse_vdst.mir | 1023 +++++++++++++++++
5 files changed, 1157 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
create mode 100644 llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 403014db56171ac..323560a46f31de2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -335,6 +335,9 @@ extern char &SIModeRegisterID;
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
+void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
+extern char &AMDGPUInsertSingleUseVDSTID;
+
void initializeSIInsertHardClausesPass(PassRegistry &);
extern char &SIInsertHardClausesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
new file mode 100644
index 000000000000000..88a77b22d989127
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -0,0 +1,120 @@
+//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
+/// instructions that produce single-use VGPR values. If the value is forwarded
+/// to the consumer instruction prior to VGPR writeback, the hardware can
+/// then skip (kill) the VGPR write.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
+
+namespace {
+class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
+private:
+ const SIInstrInfo *SII;
+
+public:
+ static char ID;
+
+ AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
+
+ void emitSingleUseVDST(MachineInstr &MI) const {
+ BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
+ .addImm(0x1);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const auto &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasVGPRSingleUseHintInsts())
+ return false;
+
+ SII = ST.getInstrInfo();
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ bool InstructionEmitted = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits
+
+ // Handle boundaries at the end of basic block separately to avoid
+ // false positives. If they are live at the end of a basic block then
+ // assume it has more uses later on.
+ for (const auto &Liveouts : MBB.liveouts())
+ RegisterUseCount[Liveouts.PhysReg] = 2;
+
+ for (MachineInstr &MI : reverse(MBB.instrs())) {
+ // All registers in all operands need to be single use for an
+ // instruction to be marked as a single use producer.
+ bool AllProducerOperandsAreSingleUse = true;
+
+ for (const auto &Operand : MI.operands()) {
+ if (!Operand.isReg())
+ continue;
+ const auto Reg = Operand.getReg();
+
+ // Count the number of times each register is read.
+ if (Operand.readsReg())
+ RegisterUseCount[Reg]++;
+
+ // Do not attempt to optimise across exec mask changes.
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ for (auto &UsedReg : RegisterUseCount)
+ UsedReg.second = 2;
+ }
+
+ // If we are at the point where the register first became live,
+ // check if the operands are single use.
+ if (!MI.modifiesRegister(Reg, TRI))
+ continue;
+ if (RegisterUseCount[Reg] > 1)
+ AllProducerOperandsAreSingleUse = false;
+ // Reset uses count when a register is no longer live.
+ RegisterUseCount.erase(Reg);
+ }
+ if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
+ // TODO: Replace with candidate logging for instruction grouping
+ // later.
+ emitSingleUseVDST(MI);
+ InstructionEmitted = true;
+ }
+ }
+ }
+ return InstructionEmitted;
+ }
+};
+} // namespace
+
+char AMDGPUInsertSingleUseVDST::ID = 0;
+
+char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
+
+INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
+ "AMDGPU Insert SingleUseVDST", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 951ed9420594b19..0c38fa32c6f33a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -286,6 +286,12 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
+// Enable GFX11.5+ s_singleuse_vdst insertion
+static cl::opt<bool>
+ EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
+ cl::desc("Enable s_singleuse_vdst insertion"),
+ cl::init(false), cl::Hidden);
+
// Enable GFX11+ s_delay_alu insertion
static cl::opt<bool>
EnableInsertDelayAlu("amdgpu-enable-delay-alu",
@@ -404,6 +410,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
+ initializeAMDGPUInsertSingleUseVDSTPass(*PR);
initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
@@ -1448,6 +1455,9 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
+ if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
+ addPass(&AMDGPUInsertSingleUseVDSTID);
+
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
addPass(&AMDGPUInsertDelayAluID);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 0c0720890794b66..53a33f8210d2a84 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -77,6 +77,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMacroFusion.cpp
AMDGPUMCInstLower.cpp
AMDGPUIGroupLP.cpp
+ AMDGPUInsertSingleUseVDST.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
AMDGPUPerfHintAnalysis.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
new file mode 100644
index 000000000000000..8480f8d211ebac1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
@@ -0,0 +1,1023 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: valu_dep_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: valu_dep_3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: valu_dep_4
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-LABEL: name: valu_dep_5
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: multiple_uses_1
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: multiple_uses_2
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: multiple_uses_3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+...
+
+---
+name: basic_block_1
+tracksRegLiveness: true
+
+body: |
+ ; CHECK-LABEL: name: basic_block_1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ successors: %bb.1
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: basic_block_2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: basic_block_2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ successors: %bb.1
+
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+...
+
+---
+name: basic_block_3
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: basic_block_3
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ successors: %bb.1
+
+ $vgpr0 = IMPLICIT_DEF
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: exec_mask_1
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: exec_mask_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: exec_mask_3
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 5678
+ $exec = COPY $vgpr0
+ $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trans32_dep_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: trans32_dep_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: trans32_dep_3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: trans32_dep_4
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: salu_cycle_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: salu_cycle_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: trans32_dep_1_only
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: trans32_dep_1_only
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_no_next_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_1_no_next_1
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+...
+
+---
+name: valu_dep_1_no_next_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_1_no_next_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+...
+
+---
+name: implicit_cmp_cndmask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-LABEL: name: implicit_cmp_cndmask
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $vcc, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
+ implicit $vcc = V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
+...
+
+---
+name: explicit_cmp_cndmask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-LABEL: name: explicit_cmp_cndmask
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
+ $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
+...
+
+---
+name: implicit_addc_addc
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vcc
+ ; CHECK-LABEL: name: implicit_addc_addc
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: explicit_addc_addc
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: explicit_addc_addc
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0, $vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr0,$vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
+ $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: valu_dep_3_bundle
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: valu_dep_3_bundle
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: BUNDLE {
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ BUNDLE {
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ }
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: if
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: if
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vcc
+ S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.2:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: else
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: else
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vcc
+ S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ bb.1:
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.3:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: if_else
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: if_else
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vcc
+ S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ bb.1:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $vgpr0, $vgpr1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ bb.3:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: loop_1
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: loop_1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vcc
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ bb.2:
+...
+
+---
+name: loop_2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: loop_2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ bb.1:
+ liveins: $vgpr0, $vcc
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ bb.2:
+...
+
+---
+name: sendmsg_rtn
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: sendmsg_rtn
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 128
+ ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_SENDMSG_RTN_B32 128
+ $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: flat_load
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: flat_load
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+...
+
+---
+name: waitcnt_depctr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: waitcnt_depctr
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 4095
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_WAITCNT_DEPCTR 4095
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: writelane1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ ; CHECK-LABEL: name: writelane1
+ ; CHECK: liveins: $vgpr0, $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+...
+
+---
+name: writelane2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ ; CHECK-LABEL: name: writelane2
+ ; CHECK: liveins: $vgpr0, $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
More information about the llvm-commits
mailing list