[llvm] [AMDGPU] New AMDGPUInsertSingleUseVDST pass (PR #72388)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 22 09:06:43 PST 2023


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/72388

>From 0e03b4b6ccaa47ea5deeb3167a92025dc46b50b7 Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Wed, 26 Jul 2023 12:05:25 +0100
Subject: [PATCH 1/4] [AMDGPU] New AMDGPUInsertSingleUseVDST pass

Add support for emitting GFX11.5 s_singleuse_vdst instructions. This is
a power saving feature whereby the compiler can annotate VALU
instructions whose results are known to have only a single use, so the
hardware can in some cases avoid writing the result back to VGPR RAM.

To begin with the pass is disabled by default because of one missing
feature: we need an exclusion list of opcodes that never qualify as
single-use producers and/or consumers. A future patch will implement
this and enable the pass by default.
---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |    3 +
 .../AMDGPU/AMDGPUInsertSingleUseVDST.cpp      |  120 ++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   10 +
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |    1 +
 .../CodeGen/AMDGPU/insert-singleuse_vdst.mir  | 1023 +++++++++++++++++
 5 files changed, 1157 insertions(+)
 create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
 create mode 100644 llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 403014db56171ac..323560a46f31de2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -335,6 +335,9 @@ extern char &SIModeRegisterID;
 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
 extern char &AMDGPUInsertDelayAluID;
 
+void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
+extern char &AMDGPUInsertSingleUseVDSTID;
+
 void initializeSIInsertHardClausesPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
new file mode 100644
index 000000000000000..88a77b22d989127
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -0,0 +1,120 @@
+//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
+/// instructions that produce single-use VGPR values. If the value is forwarded
+/// to the consumer instruction prior to VGPR writeback, the hardware can
+/// then skip (kill) the VGPR write.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
+
+namespace {
+class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
+private:
+  const SIInstrInfo *SII;
+
+public:
+  static char ID;
+
+  AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
+
+  void emitSingleUseVDST(MachineInstr &MI) const {
+    BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
+        .addImm(0x1);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    const auto &ST = MF.getSubtarget<GCNSubtarget>();
+    if (!ST.hasVGPRSingleUseHintInsts())
+      return false;
+
+    SII = ST.getInstrInfo();
+    const auto *TRI = MF.getSubtarget().getRegisterInfo();
+    bool InstructionEmitted = false;
+
+    for (MachineBasicBlock &MBB : MF) {
+      DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits
+
+      // Handle boundaries at the end of basic block separately to avoid
+      // false positives. If they are live at the end of a basic block then
+      // assume it has more uses later on.
+      for (const auto &Liveouts : MBB.liveouts())
+        RegisterUseCount[Liveouts.PhysReg] = 2;
+
+      for (MachineInstr &MI : reverse(MBB.instrs())) {
+        // All registers in all operands need to be single use for an
+        // instruction to be marked as a single use producer.
+        bool AllProducerOperandsAreSingleUse = true;
+
+        for (const auto &Operand : MI.operands()) {
+          if (!Operand.isReg())
+            continue;
+          const auto Reg = Operand.getReg();
+
+          // Count the number of times each register is read.
+          if (Operand.readsReg())
+            RegisterUseCount[Reg]++;
+
+          // Do not attempt to optimise across exec mask changes.
+          if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+            for (auto &UsedReg : RegisterUseCount)
+              UsedReg.second = 2;
+          }
+
+          // If we are at the point where the register first became live,
+          // check if the operands are single use.
+          if (!MI.modifiesRegister(Reg, TRI))
+            continue;
+          if (RegisterUseCount[Reg] > 1)
+            AllProducerOperandsAreSingleUse = false;
+          // Reset uses count when a register is no longer live.
+          RegisterUseCount.erase(Reg);
+        }
+        if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
+          // TODO: Replace with candidate logging for instruction grouping
+          // later.
+          emitSingleUseVDST(MI);
+          InstructionEmitted = true;
+        }
+      }
+    }
+    return InstructionEmitted;
+  }
+};
+} // namespace
+
+char AMDGPUInsertSingleUseVDST::ID = 0;
+
+char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
+
+INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
+                "AMDGPU Insert SingleUseVDST", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 951ed9420594b19..0c38fa32c6f33a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -286,6 +286,12 @@ static cl::opt<bool> EnableSIModeRegisterPass(
   cl::init(true),
   cl::Hidden);
 
+// Enable GFX11.5+ s_singleuse_vdst insertion
+static cl::opt<bool>
+    EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
+                              cl::desc("Enable s_singleuse_vdst insertion"),
+                              cl::init(false), cl::Hidden);
+
 // Enable GFX11+ s_delay_alu insertion
 static cl::opt<bool>
     EnableInsertDelayAlu("amdgpu-enable-delay-alu",
@@ -404,6 +410,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
   initializeAMDGPUUnifyMetadataPass(*PR);
   initializeSIAnnotateControlFlowPass(*PR);
+  initializeAMDGPUInsertSingleUseVDSTPass(*PR);
   initializeAMDGPUInsertDelayAluPass(*PR);
   initializeSIInsertHardClausesPass(*PR);
   initializeSIInsertWaitcntsPass(*PR);
@@ -1448,6 +1455,9 @@ void GCNPassConfig::addPreEmitPass() {
   // cases.
   addPass(&PostRAHazardRecognizerID);
 
+  if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
+    addPass(&AMDGPUInsertSingleUseVDSTID);
+
   if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
     addPass(&AMDGPUInsertDelayAluID);
 
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 0c0720890794b66..53a33f8210d2a84 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -77,6 +77,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUMacroFusion.cpp
   AMDGPUMCInstLower.cpp
   AMDGPUIGroupLP.cpp
+  AMDGPUInsertSingleUseVDST.cpp
   AMDGPUMIRFormatter.cpp
   AMDGPUOpenCLEnqueuedBlockLowering.cpp
   AMDGPUPerfHintAnalysis.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
new file mode 100644
index 000000000000000..8480f8d211ebac1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
@@ -0,0 +1,1023 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: valu_dep_1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: valu_dep_2
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-LABEL: name: valu_dep_3
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-LABEL: name: valu_dep_4
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_5
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK-LABEL: name: valu_dep_5
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-LABEL: name: multiple_uses_1
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-LABEL: name: multiple_uses_2
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-LABEL: name: multiple_uses_3
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+...
+
+---
+name: basic_block_1
+tracksRegLiveness: true
+
+body: |
+  ; CHECK-LABEL: name: basic_block_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    successors: %bb.1
+
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.1:
+  liveins: $vgpr0, $vgpr1, $vgpr2
+
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: basic_block_2
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
+  ; CHECK-NEXT:   $vgpr1 = IMPLICIT_DEF
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    successors: %bb.1
+
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.1:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+...
+
+---
+name: basic_block_3
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    successors: %bb.1
+
+    $vgpr0 = IMPLICIT_DEF
+
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.1:
+  liveins: $vgpr0, $vgpr1, $vgpr2
+
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-LABEL: name: exec_mask_1
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+    ; CHECK-NEXT: $exec = COPY $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    %9:_(s64) = G_CONSTANT i64 1234
+    $exec = COPY $vgpr0
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: exec_mask_2
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+    ; CHECK-NEXT: $exec = COPY $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    %9:_(s64) = G_CONSTANT i64 1234
+    $exec = COPY $vgpr0
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: exec_mask_3
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+    ; CHECK-NEXT: $exec = COPY $vgpr0
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
+    ; CHECK-NEXT: $exec = COPY $vgpr0
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+    ; CHECK-NEXT: $exec = COPY $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    %9:_(s64) = G_CONSTANT i64 1234
+    $exec = COPY $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    %9:_(s64) = G_CONSTANT i64 5678
+    $exec = COPY $vgpr0
+    $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+    %9:_(s64) = G_CONSTANT i64 1234
+    $exec = COPY $vgpr0
+    $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: trans32_dep_1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: trans32_dep_2
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-LABEL: name: trans32_dep_3
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_4
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-LABEL: name: trans32_dep_4
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: salu_cycle_1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: salu_cycle_2
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: trans32_dep_1_only
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: trans32_dep_1_only
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_no_next_1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: valu_dep_1_no_next_1
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+...
+
+---
+name: valu_dep_1_no_next_2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: valu_dep_1_no_next_2
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+    $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+...
+
+---
+name: implicit_cmp_cndmask
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK-LABEL: name: implicit_cmp_cndmask
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $vcc, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
+    implicit $vcc = V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
+...
+
+---
+name: explicit_cmp_cndmask
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK-LABEL: name: explicit_cmp_cndmask
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
+    $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
+...
+
+---
+name: implicit_addc_addc
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vcc
+    ; CHECK-LABEL: name: implicit_addc_addc
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vcc
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+    $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+    $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: explicit_addc_addc
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: explicit_addc_addc
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0, $vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+    $vgpr0,$vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
+    $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: valu_dep_3_bundle
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-LABEL: name: valu_dep_3_bundle
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: BUNDLE {
+    ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    BUNDLE {
+      $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+      $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    }
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: if
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: if
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.0:
+    liveins: $vcc
+    S_CBRANCH_VCCZ %bb.2, implicit $vcc
+  bb.1:
+    liveins: $vgpr0, $vgpr1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.2:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: else
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: else
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.0:
+    liveins: $vcc
+    S_CBRANCH_VCCZ %bb.2, implicit $vcc
+  bb.1:
+    S_BRANCH %bb.3
+  bb.2:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.3:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: if_else
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: if_else
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.0:
+    liveins: $vcc
+    S_CBRANCH_VCCZ %bb.2, implicit $vcc
+  bb.1:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    S_BRANCH %bb.3
+  bb.2:
+    liveins: $vgpr0, $vgpr1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+  bb.3:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: loop_1
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: loop_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vcc
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+  bb.2:
+...
+
+---
+name: loop_2
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: loop_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  bb.0:
+  bb.1:
+    liveins: $vgpr0, $vcc
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+  bb.2:
+...
+
+---
+name: sendmsg_rtn
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: sendmsg_rtn
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 128
+    ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $sgpr0 = S_SENDMSG_RTN_B32 128
+    $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: flat_load
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-LABEL: name: flat_load
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+...
+
+---
+name: waitcnt_depctr
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: waitcnt_depctr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: S_WAITCNT_DEPCTR 4095
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    S_WAITCNT_DEPCTR 4095
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: writelane1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+    ; CHECK-LABEL: name: writelane1
+    ; CHECK: liveins: $vgpr0, $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+...
+
+---
+name: writelane2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+    ; CHECK-LABEL: name: writelane2
+    ; CHECK: liveins: $vgpr0, $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...

>From e8628203e307c0c3897c62a81e963cc3658d194a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 17 Nov 2023 13:58:36 +0000
Subject: [PATCH 2/4] fixups

---
 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index 88a77b22d989127..93ed77bb6f7efe8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -48,6 +48,8 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
   AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
 
   void emitSingleUseVDST(MachineInstr &MI) const {
+    // Mark the following instruction as a single-use producer:
+    //   s_singleuse_vdst { supr0: 1 }
     BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
         .addImm(0x1);
   }
@@ -58,7 +60,7 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
       return false;
 
     SII = ST.getInstrInfo();
-    const auto *TRI = MF.getSubtarget().getRegisterInfo();
+    const auto *TRI = &SII->getRegisterInfo();
     bool InstructionEmitted = false;
 
     for (MachineBasicBlock &MBB : MF) {

>From 1294a0fd206c9a7b41b79f6b2ef388388ef5915e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 20 Nov 2023 13:46:49 +0000
Subject: [PATCH 3/4] trim liveins

---
 .../CodeGen/AMDGPU/insert-singleuse_vdst.mir  | 86 ++++++++-----------
 1 file changed, 38 insertions(+), 48 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
index 8480f8d211ebac1..5622e30eee8ea6b 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
@@ -121,9 +121,9 @@ name: multiple_uses_1
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
+    liveins: $vgpr0
     ; CHECK-LABEL: name: multiple_uses_1
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -140,9 +140,9 @@ name: multiple_uses_2
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
+    liveins: $vgpr0
     ; CHECK-LABEL: name: multiple_uses_2
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -167,9 +167,9 @@ name: multiple_uses_3
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    liveins: $vgpr0
     ; CHECK-LABEL: name: multiple_uses_3
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -192,14 +192,16 @@ body: |
   ; CHECK-LABEL: name: basic_block_1
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
@@ -207,14 +209,14 @@ body: |
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
+    liveins: $vgpr0
     successors: %bb.1
 
     $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   bb.1:
-  liveins: $vgpr0, $vgpr1, $vgpr2
+    liveins: $vgpr0
 
     $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
@@ -228,21 +230,19 @@ body: |
   ; CHECK-LABEL: name: basic_block_2
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
   ; CHECK-NEXT:   $vgpr1 = IMPLICIT_DEF
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+  ; CHECK-NEXT:   liveins: $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     successors: %bb.1
 
     $vgpr0 = IMPLICIT_DEF
@@ -250,7 +250,7 @@ body: |
 
     $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
   bb.1:
-  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    liveins: $vgpr2
 
     $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
     $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
@@ -263,15 +263,15 @@ body: |
   ; CHECK-LABEL: name: basic_block_3
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
   ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
@@ -279,7 +279,6 @@ body: |
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
     successors: %bb.1
 
     $vgpr0 = IMPLICIT_DEF
@@ -288,7 +287,7 @@ body: |
     $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   bb.1:
-  liveins: $vgpr0, $vgpr1, $vgpr2
+    liveins: $vgpr0, $vgpr1
 
     $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
     $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
@@ -300,9 +299,9 @@ name: exec_mask_1
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    liveins: $vgpr0
     ; CHECK-LABEL: name: exec_mask_1
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
@@ -325,9 +324,9 @@ name: exec_mask_2
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1
+    liveins: $vgpr0
     ; CHECK-LABEL: name: exec_mask_2
-    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
@@ -356,18 +355,18 @@ name: exec_mask_3
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1
+    liveins: $vgpr0
     ; CHECK-LABEL: name: exec_mask_3
-    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
     ; CHECK-NEXT: $exec = COPY $vgpr0
     ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
     ; CHECK-NEXT: $exec = COPY $vgpr0
     ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
     ; CHECK-NEXT: $exec = COPY $vgpr0
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
@@ -611,9 +610,9 @@ name: valu_dep_1_no_next_1
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1
+    liveins: $vgpr0
     ; CHECK-LABEL: name: valu_dep_1_no_next_1
-    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -653,9 +652,9 @@ name: implicit_cmp_cndmask
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
     ; CHECK-LABEL: name: implicit_cmp_cndmask
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $vcc, implicit $exec
@@ -670,9 +669,9 @@ name: explicit_cmp_cndmask
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
     ; CHECK-LABEL: name: explicit_cmp_cndmask
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
@@ -750,13 +749,13 @@ body: |
   ; CHECK-LABEL: name: if
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; CHECK-NEXT:   liveins: $vcc
+  ; CHECK-NEXT:   liveins: $vgpr0, $vcc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   ; CHECK-NEXT: {{  $}}
@@ -766,10 +765,10 @@ body: |
   ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   bb.0:
-    liveins: $vcc
+    liveins: $vgpr0, $vcc
     S_CBRANCH_VCCZ %bb.2, implicit $vcc
   bb.1:
-    liveins: $vgpr0, $vgpr1
+    liveins: $vgpr0
     $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
   bb.2:
     liveins: $vgpr0
@@ -923,11 +922,8 @@ name: sendmsg_rtn
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0
     ; CHECK-LABEL: name: sendmsg_rtn
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
     ; CHECK-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 128
     ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
@@ -944,11 +940,8 @@ name: flat_load
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
     ; CHECK-LABEL: name: flat_load
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
@@ -968,11 +961,8 @@ name: waitcnt_depctr
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0
     ; CHECK-LABEL: name: waitcnt_depctr
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+    ; CHECK: S_SINGLEUSE_VDST 1
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
     ; CHECK-NEXT: S_WAITCNT_DEPCTR 4095
     ; CHECK-NEXT: S_SINGLEUSE_VDST 1

>From d98ffc99e7fb17407b58df183c01441f6985ddb3 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 20 Nov 2023 17:01:20 +0000
Subject: [PATCH 4/4] rewrite tests

---
 .../CodeGen/AMDGPU/insert-singleuse-vdst.mir  |  626 ++++++++++
 .../CodeGen/AMDGPU/insert-singleuse_vdst.mir  | 1013 -----------------
 2 files changed, 626 insertions(+), 1013 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
 delete mode 100644 llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir

diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
new file mode 100644
index 000000000000000..f5b99c4c4d2a078
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
@@ -0,0 +1,626 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
+
+# One single-use producer.
+---
+name: one_producer
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: one_producer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr2
+...
+
+# One single-use producer of a 64-bit value.
+---
+name: one_producer_64bit
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: one_producer_64bit
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0_vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr4_vgpr5
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
+    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+  bb.1:
+    liveins: $vgpr4_vgpr5
+...
+
+# Two consecutive single-use producers.
+---
+name: two_producers
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: two_producers
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr3
+...
+
+# Redefinitions of v0.
+---
+name: redefinitions
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: redefinitions
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+...
+
+# One producer with no consumers.
+---
+name: no_consumer
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: no_consumer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.1:
+...
+
+# One consumer with two uses of the same value.
+---
+name: one_consumer_two_uses
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: one_consumer_two_uses
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr2
+...
+
+# A longer example.
+---
+name: longer_example
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: longer_example
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr16, $vgpr18
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
+    $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
+    $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
+    $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
+    $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
+    $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
+    $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
+    $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+    $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+    $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
+    $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
+  bb.1:
+    liveins: $vgpr16, $vgpr18
+...
+
+# Multiple uses of v0.
+---
+name: multiple_uses_1
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr1, $vgpr2
+...
+
+# Multiple uses of v0 and redefinitions of v1 and v2.
+---
+name: multiple_uses_2
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr1, $vgpr2
+...
+
+# Multiple uses of all but v1.
+---
+name: multiple_uses_3
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2, $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr2, $vgpr3
+...
+
+# Results are live-in to another basic block.
+---
+name: basic_block_1
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.2:
+    liveins: $vgpr1, $vgpr2
+...
+
+# Result v2 has multiple uses in another basic block.
+---
+name: basic_block_2
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr2
+    $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+  bb.2:
+    liveins: $vgpr3
+...
+
+# Results are redefined in another basic block.
+---
+name: basic_block_3
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.2:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+...
+
+# Exec modified between producer and consumer.
+---
+name: exec_mask
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: exec_mask
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1_vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $exec = COPY $vgpr1_vgpr2
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr1_vgpr2
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = COPY $vgpr1_vgpr2
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0
+...
+
+# Exec_lo modified between producer and consumer.
+---
+name: exec_mask_lo
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: exec_mask_lo
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $exec_lo = COPY $vgpr1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr1
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec_lo = COPY $vgpr1
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0
+...
+
+# Exec_hi modified between producer and consumer.
+---
+name: exec_mask_hi
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: exec_mask_hi
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $exec_hi = COPY $vgpr1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr1
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec_hi = COPY $vgpr1
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0
+...
+
+# Write 32-bit vgpr and then read from low 16 bits.
+---
+name: write_full_read_lo
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: write_full_read_lo
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1_lo16
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
+  bb.1:
+    liveins: $vgpr1_lo16
+...
+
+# Write 32-bit vgpr and then read from high 16 bits.
+---
+name: write_full_read_hi
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: write_full_read_hi
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1_hi16
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
+  bb.1:
+    liveins: $vgpr1_hi16
+...
+
+# Write 32-bit vgpr and then read from both halves.
+---
+name: write_full_read_both
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: write_full_read_both
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
+    $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
+  bb.1:
+    liveins: $vgpr1
+...
+
+# Write 32-bit vgpr and then read from both halves in the same instruction.
+---
+name: write_full_read_both_same_instruction
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: write_full_read_both_same_instruction
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1_lo16
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec
+  bb.1:
+    liveins: $vgpr1_lo16
+...
+
+# Write low 16-bits and then read 32-bit vgpr.
+---
+name: write_lo_read_full
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: write_lo_read_full
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr1
+...
+
+# Write high 16-bits and then read 32-bit vgpr.
+---
+name: write_hi_read_full
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: write_hi_read_full
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
deleted file mode 100644
index 5622e30eee8ea6b..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
+++ /dev/null
@@ -1,1013 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
-# RUN: llc -march=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
-
----
-name: valu_dep_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: valu_dep_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: valu_dep_2
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_3
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-LABEL: name: valu_dep_3
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_4
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; CHECK-LABEL: name: valu_dep_4
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_5
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-    ; CHECK-LABEL: name: valu_dep_5
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
-    $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: multiple_uses_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: multiple_uses_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: multiple_uses_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: multiple_uses_2
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: multiple_uses_3
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: multiple_uses_3
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
-...
-
----
-name: basic_block_1
-tracksRegLiveness: true
-
-body: |
-  ; CHECK-LABEL: name: basic_block_1
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.0:
-    liveins: $vgpr0
-    successors: %bb.1
-
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.1:
-    liveins: $vgpr0
-
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: basic_block_2
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: basic_block_2
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
-  ; CHECK-NEXT:   $vgpr1 = IMPLICIT_DEF
-  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-  bb.0:
-    successors: %bb.1
-
-    $vgpr0 = IMPLICIT_DEF
-    $vgpr1 = IMPLICIT_DEF
-
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-  bb.1:
-    liveins: $vgpr2
-
-    $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-...
-
----
-name: basic_block_3
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: basic_block_3
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-  bb.0:
-    successors: %bb.1
-
-    $vgpr0 = IMPLICIT_DEF
-
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.1:
-    liveins: $vgpr0, $vgpr1
-
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-...
-
----
-name: exec_mask_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: exec_mask_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
-    ; CHECK-NEXT: $exec = COPY $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    %9:_(s64) = G_CONSTANT i64 1234
-    $exec = COPY $vgpr0
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-...
-
----
-name: exec_mask_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: exec_mask_2
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
-    ; CHECK-NEXT: $exec = COPY $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    %9:_(s64) = G_CONSTANT i64 1234
-    $exec = COPY $vgpr0
-    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-...
-
----
-name: exec_mask_3
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: exec_mask_3
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
-    ; CHECK-NEXT: $exec = COPY $vgpr0
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
-    ; CHECK-NEXT: $exec = COPY $vgpr0
-    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
-    ; CHECK-NEXT: $exec = COPY $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    %9:_(s64) = G_CONSTANT i64 1234
-    $exec = COPY $vgpr0
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    %9:_(s64) = G_CONSTANT i64 5678
-    $exec = COPY $vgpr0
-    $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
-    %9:_(s64) = G_CONSTANT i64 1234
-    $exec = COPY $vgpr0
-    $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
-...
-
----
-name: trans32_dep_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: trans32_dep_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: trans32_dep_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: trans32_dep_2
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: trans32_dep_3
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-LABEL: name: trans32_dep_3
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: trans32_dep_4
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; CHECK-LABEL: name: trans32_dep_4
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
-    $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: salu_cycle_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: salu_cycle_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
-    $sgpr0 = S_MOV_B32 0
-    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
-...
-
----
-name: salu_cycle_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: salu_cycle_2
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
-    $sgpr0 = S_MOV_B32 0
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_1_same_trans32_dep_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-...
-
----
-name: trans32_dep_1_only
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: trans32_dep_1_only
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
-...
-
----
-name: valu_dep_1_same_salu_cycle_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $sgpr0 = S_MOV_B32 0
-    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_1_next_valu_dep_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: valu_dep_2_next_valu_dep_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-...
-
----
-name: valu_dep_1_no_next_1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0
-    ; CHECK-LABEL: name: valu_dep_1_no_next_1
-    ; CHECK: liveins: $vgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-...
-
----
-name: valu_dep_1_no_next_2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: valu_dep_1_no_next_2
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-    $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
-    $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
-    $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
-...
-
----
-name: implicit_cmp_cndmask
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
-    ; CHECK-LABEL: name: implicit_cmp_cndmask
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $vcc, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
-    implicit $vcc = V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
-...
-
----
-name: explicit_cmp_cndmask
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
-    ; CHECK-LABEL: name: explicit_cmp_cndmask
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
-    $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
-    $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
-...
-
----
-name: implicit_addc_addc
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vcc
-    ; CHECK-LABEL: name: implicit_addc_addc
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vcc
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
-    $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
-    $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
-...
-
----
-name: explicit_addc_addc
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: explicit_addc_addc
-    ; CHECK: liveins: $vgpr0, $vgpr1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0, $vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
-    $vgpr0,$vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
-    $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
-...
-
----
-name: valu_dep_3_bundle
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-LABEL: name: valu_dep_3_bundle
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    ; CHECK-NEXT: BUNDLE {
-    ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-NEXT: }
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    BUNDLE {
-      $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-      $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    }
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: if
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: if
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.0:
-    liveins: $vgpr0, $vcc
-    S_CBRANCH_VCCZ %bb.2, implicit $vcc
-  bb.1:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.2:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: else
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: else
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; CHECK-NEXT:   liveins: $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_BRANCH %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.0:
-    liveins: $vcc
-    S_CBRANCH_VCCZ %bb.2, implicit $vcc
-  bb.1:
-    S_BRANCH %bb.3
-  bb.2:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.3:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: if_else
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: if_else
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
-  ; CHECK-NEXT:   liveins: $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.2, implicit $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_BRANCH %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.0:
-    liveins: $vcc
-    S_CBRANCH_VCCZ %bb.2, implicit $vcc
-  bb.1:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    S_BRANCH %bb.3
-  bb.2:
-    liveins: $vgpr0, $vgpr1
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
-  bb.3:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: loop_1
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: loop_1
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
-  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.1, implicit $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  bb.0:
-    liveins: $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  bb.1:
-    liveins: $vgpr0, $vcc
-    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    S_CBRANCH_VCCZ %bb.1, implicit $vcc
-  bb.2:
-...
-
----
-name: loop_2
-tracksRegLiveness: true
-body: |
-  ; CHECK-LABEL: name: loop_2
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_CBRANCH_VCCZ %bb.1, implicit $vcc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  bb.0:
-  bb.1:
-    liveins: $vgpr0, $vcc
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    S_CBRANCH_VCCZ %bb.1, implicit $vcc
-  bb.2:
-...
-
----
-name: sendmsg_rtn
-tracksRegLiveness: true
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: sendmsg_rtn
-    ; CHECK: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    ; CHECK-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 128
-    ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    $sgpr0 = S_SENDMSG_RTN_B32 128
-    $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: flat_load
-tracksRegLiveness: true
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: flat_load
-    ; CHECK: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
-    ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
-    ; CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
-    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
-    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
-    $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
-...
-
----
-name: waitcnt_depctr
-tracksRegLiveness: true
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: waitcnt_depctr
-    ; CHECK: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    ; CHECK-NEXT: S_WAITCNT_DEPCTR 4095
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    S_WAITCNT_DEPCTR 4095
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...
-
----
-name: writelane1
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $sgpr0
-    ; CHECK-LABEL: name: writelane1
-    ; CHECK: liveins: $vgpr0, $sgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
-    $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
-    $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
-    $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
-    $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
-...
-
----
-name: writelane2
-tracksRegLiveness: true
-body: |
-  bb.0:
-    liveins: $vgpr0, $sgpr0
-    ; CHECK-LABEL: name: writelane2
-    ; CHECK: liveins: $vgpr0, $sgpr0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
-    ; CHECK-NEXT: S_SINGLEUSE_VDST 1
-    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-    $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
-    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-...



More information about the llvm-commits mailing list