[llvm] [AMDGPU] New AMDGPUInsertSingleUseVDST pass (PR #72388)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 20 05:47:42 PST 2023
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/72388
>From 0e03b4b6ccaa47ea5deeb3167a92025dc46b50b7 Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Wed, 26 Jul 2023 12:05:25 +0100
Subject: [PATCH 1/3] [AMDGPU] New AMDGPUInsertSingleUseVDST pass
Add support for emitting GFX11.5 s_singleuse_vdst instructions. This is
a power saving feature whereby the compiler can annotate VALU
instructions whose results are known to have only a single use, so the
hardware can in some cases avoid writing the result back to VGPR RAM.
To begin with the pass is disabled by default because of one missing
feature: we need an exclusion list of opcodes that never qualify as
single-use producers and/or consumers. A future patch will implement
this and enable the pass by default.
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +
.../AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 120 ++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 10 +
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
.../CodeGen/AMDGPU/insert-singleuse_vdst.mir | 1023 +++++++++++++++++
5 files changed, 1157 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
create mode 100644 llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 403014db56171ac..323560a46f31de2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -335,6 +335,9 @@ extern char &SIModeRegisterID;
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
+void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
+extern char &AMDGPUInsertSingleUseVDSTID;
+
void initializeSIInsertHardClausesPass(PassRegistry &);
extern char &SIInsertHardClausesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
new file mode 100644
index 000000000000000..88a77b22d989127
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -0,0 +1,120 @@
+//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
+/// instructions that produce single-use VGPR values. If the value is forwarded
+/// to the consumer instruction prior to VGPR writeback, the hardware can
+/// then skip (kill) the VGPR write.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
+
+namespace {
+class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
+private:
+ const SIInstrInfo *SII;
+
+public:
+ static char ID;
+
+ AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
+
+ void emitSingleUseVDST(MachineInstr &MI) const {
+ BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
+ .addImm(0x1);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const auto &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasVGPRSingleUseHintInsts())
+ return false;
+
+ SII = ST.getInstrInfo();
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ bool InstructionEmitted = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits
+
+ // Handle boundaries at the end of basic block separately to avoid
+ // false positives. If they are live at the end of a basic block then
+ // assume it has more uses later on.
+ for (const auto &Liveouts : MBB.liveouts())
+ RegisterUseCount[Liveouts.PhysReg] = 2;
+
+ for (MachineInstr &MI : reverse(MBB.instrs())) {
+ // All registers in all operands need to be single use for an
+ // instruction to be marked as a single use producer.
+ bool AllProducerOperandsAreSingleUse = true;
+
+ for (const auto &Operand : MI.operands()) {
+ if (!Operand.isReg())
+ continue;
+ const auto Reg = Operand.getReg();
+
+ // Count the number of times each register is read.
+ if (Operand.readsReg())
+ RegisterUseCount[Reg]++;
+
+ // Do not attempt to optimise across exec mask changes.
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ for (auto &UsedReg : RegisterUseCount)
+ UsedReg.second = 2;
+ }
+
+ // If we are at the point where the register first became live,
+ // check if the operands are single use.
+ if (!MI.modifiesRegister(Reg, TRI))
+ continue;
+ if (RegisterUseCount[Reg] > 1)
+ AllProducerOperandsAreSingleUse = false;
+ // Reset uses count when a register is no longer live.
+ RegisterUseCount.erase(Reg);
+ }
+ if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
+ // TODO: Replace with candidate logging for instruction grouping
+ // later.
+ emitSingleUseVDST(MI);
+ InstructionEmitted = true;
+ }
+ }
+ }
+ return InstructionEmitted;
+ }
+};
+} // namespace
+
+char AMDGPUInsertSingleUseVDST::ID = 0;
+
+char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
+
+INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
+ "AMDGPU Insert SingleUseVDST", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 951ed9420594b19..0c38fa32c6f33a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -286,6 +286,12 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
+// Enable GFX11.5+ s_singleuse_vdst insertion
+static cl::opt<bool>
+ EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
+ cl::desc("Enable s_singleuse_vdst insertion"),
+ cl::init(false), cl::Hidden);
+
// Enable GFX11+ s_delay_alu insertion
static cl::opt<bool>
EnableInsertDelayAlu("amdgpu-enable-delay-alu",
@@ -404,6 +410,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
+ initializeAMDGPUInsertSingleUseVDSTPass(*PR);
initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
@@ -1448,6 +1455,9 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
+ if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
+ addPass(&AMDGPUInsertSingleUseVDSTID);
+
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
addPass(&AMDGPUInsertDelayAluID);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 0c0720890794b66..53a33f8210d2a84 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -77,6 +77,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMacroFusion.cpp
AMDGPUMCInstLower.cpp
AMDGPUIGroupLP.cpp
+ AMDGPUInsertSingleUseVDST.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
AMDGPUPerfHintAnalysis.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
new file mode 100644
index 000000000000000..8480f8d211ebac1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
@@ -0,0 +1,1023 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: valu_dep_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: valu_dep_3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: valu_dep_4
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-LABEL: name: valu_dep_5
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: multiple_uses_1
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: multiple_uses_2
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: multiple_uses_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: multiple_uses_3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+...
+
+---
+name: basic_block_1
+tracksRegLiveness: true
+
+body: |
+ ; CHECK-LABEL: name: basic_block_1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ successors: %bb.1
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: basic_block_2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: basic_block_2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ successors: %bb.1
+
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+...
+
+---
+name: basic_block_3
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: basic_block_3
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ successors: %bb.1
+
+ $vgpr0 = IMPLICIT_DEF
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: exec_mask_1
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: exec_mask_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr5 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: exec_mask_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: exec_mask_3
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: $exec = COPY $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 5678
+ $exec = COPY $vgpr0
+ $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
+ %9:_(s64) = G_CONSTANT i64 1234
+ $exec = COPY $vgpr0
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trans32_dep_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: trans32_dep_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: trans32_dep_3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-LABEL: name: trans32_dep_4
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: salu_cycle_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: salu_cycle_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: trans32_dep_1_only
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: trans32_dep_1_only
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_no_next_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_1_no_next_1
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+...
+
+---
+name: valu_dep_1_no_next_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: valu_dep_1_no_next_2
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
+...
+
+---
+name: implicit_cmp_cndmask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-LABEL: name: implicit_cmp_cndmask
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $vcc, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
+ implicit $vcc = V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec
+...
+
+---
+name: explicit_cmp_cndmask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-LABEL: name: explicit_cmp_cndmask
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
+ $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec
+...
+
+---
+name: implicit_addc_addc
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vcc
+ ; CHECK-LABEL: name: implicit_addc_addc
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: explicit_addc_addc
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: explicit_addc_addc
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0, $vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr0,$vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec
+ $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: valu_dep_3_bundle
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: valu_dep_3_bundle
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: BUNDLE {
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ BUNDLE {
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ }
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: if
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: if
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vcc
+ S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.2:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: else
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: else
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vcc
+ S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ bb.1:
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.3:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: if_else
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: if_else
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.0:
+ liveins: $vcc
+ S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ bb.1:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $vgpr0, $vgpr1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ bb.3:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: loop_1
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: loop_1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ liveins: $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vcc
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ bb.2:
+...
+
+---
+name: loop_2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: loop_2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ bb.1:
+ liveins: $vgpr0, $vcc
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ bb.2:
+...
+
+---
+name: sendmsg_rtn
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: sendmsg_rtn
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 128
+ ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_SENDMSG_RTN_B32 128
+ $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: flat_load
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: flat_load
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+...
+
+---
+name: waitcnt_depctr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: waitcnt_depctr
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 4095
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_WAITCNT_DEPCTR 4095
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: writelane1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ ; CHECK-LABEL: name: writelane1
+ ; CHECK: liveins: $vgpr0, $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+...
+
+---
+name: writelane2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ ; CHECK-LABEL: name: writelane2
+ ; CHECK: liveins: $vgpr0, $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
>From e8628203e307c0c3897c62a81e963cc3658d194a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 17 Nov 2023 13:58:36 +0000
Subject: [PATCH 2/3] fixups
---
llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index 88a77b22d989127..93ed77bb6f7efe8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -48,6 +48,8 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
void emitSingleUseVDST(MachineInstr &MI) const {
+ // Mark the following instruction as a single-use producer:
+ // s_singleuse_vdst { supr0: 1 }
BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
.addImm(0x1);
}
@@ -58,7 +60,7 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
return false;
SII = ST.getInstrInfo();
- const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ const auto *TRI = &SII->getRegisterInfo();
bool InstructionEmitted = false;
for (MachineBasicBlock &MBB : MF) {
>From 1294a0fd206c9a7b41b79f6b2ef388388ef5915e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 20 Nov 2023 13:46:49 +0000
Subject: [PATCH 3/3] trim liveins
---
.../CodeGen/AMDGPU/insert-singleuse_vdst.mir | 86 ++++++++-----------
1 file changed, 38 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
index 8480f8d211ebac1..5622e30eee8ea6b 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse_vdst.mir
@@ -121,9 +121,9 @@ name: multiple_uses_1
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2
+ liveins: $vgpr0
; CHECK-LABEL: name: multiple_uses_1
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -140,9 +140,9 @@ name: multiple_uses_2
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2
+ liveins: $vgpr0
; CHECK-LABEL: name: multiple_uses_2
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -167,9 +167,9 @@ name: multiple_uses_3
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ liveins: $vgpr0
; CHECK-LABEL: name: multiple_uses_3
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -192,14 +192,16 @@ body: |
; CHECK-LABEL: name: basic_block_1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -207,14 +209,14 @@ body: |
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2
+ liveins: $vgpr0
successors: %bb.1
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
bb.1:
- liveins: $vgpr0, $vgpr1, $vgpr2
+ liveins: $vgpr0
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
@@ -228,21 +230,19 @@ body: |
; CHECK-LABEL: name: basic_block_2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: liveins: $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
successors: %bb.1
$vgpr0 = IMPLICIT_DEF
@@ -250,7 +250,7 @@ body: |
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
bb.1:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ liveins: $vgpr2
$vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
$vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
@@ -263,15 +263,15 @@ body: |
; CHECK-LABEL: name: basic_block_3
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -279,7 +279,6 @@ body: |
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2
successors: %bb.1
$vgpr0 = IMPLICIT_DEF
@@ -288,7 +287,7 @@ body: |
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
bb.1:
- liveins: $vgpr0, $vgpr1, $vgpr2
+ liveins: $vgpr0, $vgpr1
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
@@ -300,9 +299,9 @@ name: exec_mask_1
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ liveins: $vgpr0
; CHECK-LABEL: name: exec_mask_1
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
@@ -325,9 +324,9 @@ name: exec_mask_2
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1
+ liveins: $vgpr0
; CHECK-LABEL: name: exec_mask_2
- ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
@@ -356,18 +355,18 @@ name: exec_mask_3
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1
+ liveins: $vgpr0
; CHECK-LABEL: name: exec_mask_3
- ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
; CHECK-NEXT: $exec = COPY $vgpr0
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5678
; CHECK-NEXT: $exec = COPY $vgpr0
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr0, implicit $exec
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1234
; CHECK-NEXT: $exec = COPY $vgpr0
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr0, implicit $exec
@@ -611,9 +610,9 @@ name: valu_dep_1_no_next_1
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1
+ liveins: $vgpr0
; CHECK-LABEL: name: valu_dep_1_no_next_1
- ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode
; CHECK-NEXT: S_SINGLEUSE_VDST 1
@@ -653,9 +652,9 @@ name: implicit_cmp_cndmask
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
; CHECK-LABEL: name: implicit_cmp_cndmask
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $vcc, implicit $exec
@@ -670,9 +669,9 @@ name: explicit_cmp_cndmask
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
; CHECK-LABEL: name: explicit_cmp_cndmask
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
@@ -750,13 +749,13 @@ body: |
; CHECK-LABEL: name: if
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK-NEXT: liveins: $vcc
+ ; CHECK-NEXT: liveins: $vgpr0, $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
@@ -766,10 +765,10 @@ body: |
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
bb.0:
- liveins: $vcc
+ liveins: $vgpr0, $vcc
S_CBRANCH_VCCZ %bb.2, implicit $vcc
bb.1:
- liveins: $vgpr0, $vgpr1
+ liveins: $vgpr0
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
bb.2:
liveins: $vgpr0
@@ -923,11 +922,8 @@ name: sendmsg_rtn
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0
; CHECK-LABEL: name: sendmsg_rtn
- ; CHECK: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 128
; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc
@@ -944,11 +940,8 @@ name: flat_load
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: flat_load
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
@@ -968,11 +961,8 @@ name: waitcnt_depctr
tracksRegLiveness: true
body: |
bb.0:
- liveins: $vgpr0
; CHECK-LABEL: name: waitcnt_depctr
- ; CHECK: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_WAITCNT_DEPCTR 4095
; CHECK-NEXT: S_SINGLEUSE_VDST 1
More information about the llvm-commits
mailing list