[llvm] [AMDGPU] GCNRegPressure printing pass for testing. (PR #70031)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 25 14:17:15 PDT 2023
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/70031
>From 89aec40e7e5c63e6c828d3e25d351f1f58b52c44 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Thu, 19 Oct 2023 10:07:44 +0200
Subject: [PATCH 1/3] [AMDGPU] GCNRegPressure printing pass for testing.
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 62 +++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 17 +
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 416 +++++++++++++++++-
5 files changed, 497 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
+ initializeGCNRegPressurePrinterPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..cd939a2b9f373e6 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
+#include "AMDGPU.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -487,3 +488,64 @@ LLVM_DUMP_METHOD
void GCNRegPressure::dump() const { dbgs() << print(*this); }
#endif
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ GCNUpwardRPTracker RPT(LIS);
+
+ auto &OS = dbgs();
+
+ OS << "---\nname: " << MF.getName() << "\nbody: |\n";
+
+ SmallVector<GCNRegPressure, 16> RPAtInstr;
+ SmallVector<GCNRegPressure, 16> RPAfterInstr;
+
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ RPAtInstr.clear();
+ RPAfterInstr.clear();
+
+ RPAtInstr.reserve(MBB.size());
+ RPAfterInstr.reserve(MBB.size() + 1);
+
+ RPT.reset(MBB.instr_back());
+ RPAfterInstr.push_back(RPT.getPressure());
+ for (auto &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ RPAtInstr.push_back(RPT.moveMaxPressure());
+ RPAfterInstr.push_back(RPT.getPressure());
+ }
+
+ auto printRP = [&](const GCNRegPressure &RP) {
+ // Leading spaces are important for YAML syntax here
+ OS << " " << format("%-5d", RP.getSGPRNum()) << ' '
+ << format("%-5d", RP.getVGPRNum(false));
+ };
+
+ MBB.printName(OS);
+ OS << ":\n";
+ OS << " SGPR VGPR\n";
+ unsigned I = RPAfterInstr.size() - 1;
+ printRP(RPAfterInstr[I]);
+ OS << '\n';
+ for (auto &MI : MBB) {
+ printRP(RPAtInstr[--I]);
+ OS << " ";
+ MI.print(OS);
+ printRP(RPAfterInstr[I]);
+ OS << '\n';
+ }
+ }
+ OS << "...\n";
+ return false;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..f2256f68c2c7037 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
+ GCNRegPressure getPressure() const { return CurPressure; }
+
// returns MaxPressure, resetting it
decltype(MaxPressure) moveMaxPressure() {
auto Res = MaxPressure;
@@ -277,6 +279,21 @@ Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
const TargetRegisterInfo *TRI);
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+ static char ID;
+
+public:
+ GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index f8c7be8e414ca15..c00d0702b73b663 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,6 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -171,8 +173,6 @@
...
---
-# CHECK: name: sched_dbg_value_crash
-# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
name: sched_dbg_value_crash
alignment: 1
@@ -198,6 +198,418 @@ body: |
bb.0.bb:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+ ; CHECK-LABEL: name: sched_dbg_value_crash
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 24, 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[DEF]], [[COPY2]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 12, [[S_LOAD_DWORDX2_IMM]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 32, 0
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY1]], 4, 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 4, 0, implicit $exec
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MAD_I64_I32_e64_2:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_3:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[GLOBAL_LOAD_DWORD]], [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 0, 0
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 4, 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 32, 0, implicit $exec
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sreg_64_xexec = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32_xm0 = S_LSHR_B32 [[S_LOAD_DWORDX2_IMM3]].sub0, 16, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_LOAD_DWORD_IMM]], 2, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_LOAD_DWORDX2_IMM5]].sub0, [[S_LSHL_B64_]].sub0, implicit-def $scc
+ ; CHECK-NEXT: dead undef [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[S_LSHL_B64_]].sub1, implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LSHL_B64_1:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_LOAD_DWORD_IMM]], 2, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 0, [[S_LSHL_B64_1]].sub0, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sgpr_32 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[S_LSHL_B64_1]].sub1, implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
+ ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[S_LOAD_DWORDX2_IMM5]].sub0, [[DEF2]].sub0, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sgpr_32 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[DEF2]].sub1, implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[V_ASHRREV_I32_e32_:%[0-9]+]].sub1:vreg_64 = V_ASHRREV_I32_e32 31, [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e32_:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[V_ASHRREV_I32_e32_]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 0, [[V_LSHLREV_B64_e64_]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[V_LSHLREV_B64_e64_]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD1:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_2]], 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_2:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_2]], [[DEF4]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 0, [[DEF4]].sub1, [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_1]]
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_4:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_2]], [[DEF3]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY6]], [[DEF3]].sub1, [[V_ADD_CO_U32_e64_5]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[V_ADD_CO_U32_e64_4]], 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[V_ADD_CO_U32_e64_2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: dead [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[DEF7:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[DEF10:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[DEF10:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 0, [[DEF11]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MUL_LO_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 0, [[COPY2]], implicit $exec
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[DEF20]], 0, [[DEF20]], 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, [[V_MUL_LO_I32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, [[V_ADD_CO_U32_e32_1]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vcc = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, [[DEF22]], 0, [[V_FMA_F32_e64_1]], 0, [[DEF21]], 0, 0, implicit $vcc, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, [[DEF20]], 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_5:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[DEF20]], 0, [[DEF20]], 0, [[DEF30]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_RCP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_6:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_7:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: DBG_VALUE [[DEF28]], $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
+ ; CHECK-NEXT: $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 0, [[V_FMA_F32_e64_1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MUL_LO_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 [[COPY3]], [[S_LOAD_DWORDX2_IMM3]].sub1, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_I64_I32_e64_4:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_5:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 48, [[S_LOAD_DWORDX2_IMM1]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[S_MUL_I32_:%[0-9]+]]:sreg_32_xm0 = S_MUL_I32 [[S_LSHR_B32_]], [[S_LOAD_DWORDX2_IMM3]].sub1
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
+ ; CHECK-NEXT: dead [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
+ ; CHECK-NEXT: dead [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_1]]
+ ; CHECK-NEXT: dead [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, [[V_DIV_FMAS_F32_e64_]], 0, [[DEF20]], 0, [[DEF30]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[DEF7]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_6:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_1]], [[V_LSHLREV_B64_e64_1]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_6:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY7]], [[V_LSHLREV_B64_e64_1]].sub1, [[V_ADD_CO_U32_e64_7]], 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF15]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF14]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF13]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF12]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[GLOBAL_LOAD_DWORD1]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ASHRREV_I32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ASHRREV_I32_e32 31, [[GLOBAL_LOAD_DWORDX2_2]].sub0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ASHRREV_I32_e32_1:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_2]].sub0
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_8:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_9:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 0, [[V_LSHLREV_B64_e64_2]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_8:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 0, [[V_LSHLREV_B64_e64_2]].sub1, [[V_ADD_CO_U32_e64_9]], 0, implicit $exec
+ ; CHECK-NEXT: $sgpr4 = COPY $sgpr101
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF38]]
+ ; CHECK-NEXT: $vgpr3 = COPY [[DEF40]]
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[SI_PC_ADD_REL_OFFSET]], @func, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; CHECK-NEXT: dead [[V_MAD_I64_I32_e64_6:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_7:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; RP-LABEL: name: sched_dbg_value_crash
+ ; RP: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
+ ; RP-NEXT: 4 1
+ ; RP-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RP-NEXT: 8 2
+ ; RP-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
+ ; RP-NEXT: 12 2
+ ; RP-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
+ ; RP-NEXT: 9 2
+ ; RP-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
+ ; RP-NEXT: 9 3
+ ; RP-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
+ ; RP-NEXT: 8 2
+ ; RP-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RP-NEXT: 8 2
+ ; RP-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; RP-NEXT: 9 1
+ ; RP-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 6 3
+ ; RP-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
+ ; RP-NEXT: 5 2
+ ; RP-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RP-NEXT: 7 2
+ ; RP-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RP-NEXT: 5 2
+ ; RP-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
+ ; RP-NEXT: 3 2
+ ; RP-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 4 3
+ ; RP-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 4 4
+ ; RP-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 6 7
+ ; RP-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
+ ; RP-NEXT: 6 8
+ ; RP-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; RP-NEXT: 4 5
+ ; RP-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 5 5
+ ; RP-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
+ ; RP-NEXT: 5 6
+ ; RP-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; RP-NEXT: 3 5
+ ; RP-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 3 6
+ ; RP-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
+ ; RP-NEXT: 3 6
+ ; RP-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 5 5
+ ; RP-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
+ ; RP-NEXT: 4 5
+ ; RP-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 4 4
+ ; RP-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 4 4
+ ; RP-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
+ ; RP-NEXT: 4 5
+ ; RP-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 7 $vcc = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 0 DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 $sgpr4 = COPY $sgpr101
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 0 S_ENDPGM 0
+ ; RP-NEXT: 0 0
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
>From a9a33cd965744091b9b0af950e50e2f955ac2534 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 24 Oct 2023 11:25:29 +0200
Subject: [PATCH 2/3] Various improvents: * Added printing of live-in,
live-out and live-through sets. * Empty BB aren't skipped now.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 67 +++++----
...ched-assert-onlydbg-value-empty-region.mir | 142 +++++++++++++++---
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 5 +-
3 files changed, 168 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index cd939a2b9f373e6..1db4e9dd151d847 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -498,54 +498,67 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
GCNUpwardRPTracker RPT(LIS);
auto &OS = dbgs();
+// Leading spaces are important for YAML syntax.
+#define PFX " "
+
OS << "---\nname: " << MF.getName() << "\nbody: |\n";
- SmallVector<GCNRegPressure, 16> RPAtInstr;
- SmallVector<GCNRegPressure, 16> RPAfterInstr;
+ auto printRP = [](const GCNRegPressure &RP) {
+ return Printable([&RP](raw_ostream &OS) {
+ OS << format(PFX " %-5d", RP.getSGPRNum())
+ << format(" %-5d", RP.getVGPRNum(false));
+ });
+ };
+
+ // Register pressure before and at an instruction (in program order).
+ SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
for (auto &MBB : MF) {
- if (MBB.empty())
+ OS << PFX;
+ MBB.printName(OS);
+ OS << ":\n";
+
+ if (MBB.empty()) {
+ SlotIndex MBBSI = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+ GCNRPTracker::LiveRegSet LRThrough = getLiveRegs(MBBSI, LIS, MRI);
+ GCNRegPressure RP = getRegPressure(MRI, LRThrough);
+ OS << PFX " Live-through:" << llvm::print(LRThrough, MRI);
+ OS << PFX " SGPR VGPR\n" << printRP(RP) << '\n';
continue;
+ }
- RPAtInstr.clear();
- RPAfterInstr.clear();
+ RPT.reset(MBB.instr_back());
+ RPT.moveMaxPressure(); // Clear max pressure.
- RPAtInstr.reserve(MBB.size());
- RPAfterInstr.reserve(MBB.size() + 1);
+ GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
+ GCNRegPressure RPAtMBBEnd = RPT.getPressure();
- RPT.reset(MBB.instr_back());
- RPAfterInstr.push_back(RPT.getPressure());
+ RP.clear();
+ RP.reserve(MBB.size());
for (auto &MI : reverse(MBB)) {
RPT.recede(MI);
- RPAtInstr.push_back(RPT.moveMaxPressure());
- RPAfterInstr.push_back(RPT.getPressure());
+ RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
}
- auto printRP = [&](const GCNRegPressure &RP) {
- // Leading spaces are important for YAML syntax here
- OS << " " << format("%-5d", RP.getSGPRNum()) << ' '
- << format("%-5d", RP.getVGPRNum(false));
- };
-
- MBB.printName(OS);
- OS << ":\n";
- OS << " SGPR VGPR\n";
- unsigned I = RPAfterInstr.size() - 1;
- printRP(RPAfterInstr[I]);
- OS << '\n';
+ OS << PFX " Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+ OS << PFX " SGPR VGPR\n";
+ auto I = RP.rbegin();
for (auto &MI : MBB) {
- printRP(RPAtInstr[--I]);
- OS << " ";
+ auto &[RPBeforeInstr, RPAtInstr] = *I++;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
MI.print(OS);
- printRP(RPAfterInstr[I]);
- OS << '\n';
}
+ OS << printRP(RPAtMBBEnd) << '\n';
+ OS << PFX " Live-out:" << llvm::print(LRAtMBBEnd, MRI);
}
OS << "...\n";
return false;
+
+#undef PFX
}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index e4f56cc328e4782..138c8e785dec280 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
# The sequence of DBG_VALUEs forms a scheduling region with 0 real
# instructions. The RegPressure tracker would end up skipping over any
@@ -27,33 +28,33 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ; CHECK-NEXT: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_F32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
- ; CHECK-NEXT: undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
- ; CHECK-NEXT: undef %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
- ; CHECK-NEXT: %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
- ; CHECK-NEXT: undef %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_F32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[V_ADD_F32_e32_1]], [[V_MOV_B32_e32_]], 32, 0, implicit $exec
+ ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
- ; CHECK-NEXT: %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
- ; CHECK-NEXT: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
- ; CHECK-NEXT: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[GLOBAL_LOAD_DWORD2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_1]]
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -65,9 +66,114 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; RP-LABEL: name: only_dbg_value_sched_region
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RP-NEXT: 0 8
+ ; RP-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 8
+ ; RP-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 11
+ ; RP-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 12
+ ; RP-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 13
+ ; RP-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 14
+ ; RP-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 16
+ ; RP-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 18
+ ; RP-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 19
+ ; RP-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RP-NEXT: 0 21
+ ; RP-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RP-NEXT: 0 16
+ ; RP-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 15
+ ; RP-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 14
+ ; RP-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 12
+ ; RP-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 10
+ ; RP-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 10
+ ; RP-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: bb.3:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 0 S_ENDPGM 0
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
bb.0:
liveins: $vgpr0
@@ -111,6 +217,8 @@ body: |
DBG_VALUE
S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ bb.3:
+
bb.2:
S_NOP 0, implicit %0
S_NOP 0, implicit %16
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index c00d0702b73b663..3f33e795e1e6ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -197,7 +197,6 @@ constants:
body: |
bb.0.bb:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
-
; CHECK-LABEL: name: sched_dbg_value_crash
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
; CHECK-NEXT: {{ $}}
@@ -338,7 +337,8 @@ body: |
; CHECK-NEXT: S_ENDPGM 0
;
; RP-LABEL: name: sched_dbg_value_crash
- ; RP: SGPR VGPR
+ ; RP: Live-in:
+ ; RP-NEXT: SGPR VGPR
; RP-NEXT: 0 0
; RP-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
; RP-NEXT: 2 0
@@ -610,6 +610,7 @@ body: |
; RP-NEXT: 0 0
; RP-NEXT: 0 0 S_ENDPGM 0
; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
>From 814ccb9546ef681b8f231bb0569eb050b5b72995 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Wed, 25 Oct 2023 23:16:42 +0200
Subject: [PATCH 3/3] Added: * downward tracker * skip dbg values
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 64 +-
...ched-assert-onlydbg-value-empty-region.mir | 299 ++++---
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 827 ++++++++++++------
3 files changed, 797 insertions(+), 393 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1db4e9dd151d847..d2f8f4cf5dcc581 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -489,6 +489,11 @@ void GCNRegPressure::dump() const { dbgs() << print(*this); }
#endif
+static cl::opt<bool> UseDownwardTracker(
+ "amdgpu-print-rp-downward",
+ cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
+ cl::init(false), cl::Hidden);
+
char llvm::GCNRegPressurePrinter::ID = 0;
char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
@@ -500,8 +505,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
- GCNUpwardRPTracker RPT(LIS);
-
+
auto &OS = dbgs();
// Leading spaces are important for YAML syntax.
@@ -520,6 +524,9 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
for (auto &MBB : MF) {
+ RP.clear();
+ RP.reserve(MBB.size());
+
OS << PFX;
MBB.printName(OS);
OS << ":\n";
@@ -533,25 +540,52 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- RPT.reset(MBB.instr_back());
- RPT.moveMaxPressure(); // Clear max pressure.
+ GCNRPTracker::LiveRegSet LRAtMBBBegin, LRAtMBBEnd;
+ GCNRegPressure RPAtMBBEnd;
+
+ if (UseDownwardTracker) {
+ GCNDownwardRPTracker RPT(LIS);
+ RPT.reset(MBB.instr_front());
- GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
- GCNRegPressure RPAtMBBEnd = RPT.getPressure();
+ LRAtMBBBegin = RPT.getLiveRegs();
- RP.clear();
- RP.reserve(MBB.size());
- for (auto &MI : reverse(MBB)) {
- RPT.recede(MI);
- RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ while (!RPT.advanceBeforeNext()) {
+ GCNRegPressure RPBeforeMI = RPT.getPressure();
+ RPT.advanceToNext();
+ RP.emplace_back(RPBeforeMI, RPT.getPressure());
+ }
+
+ LRAtMBBEnd = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+
+ } else {
+ GCNUpwardRPTracker RPT(LIS);
+ RPT.reset(MBB.instr_back());
+ RPT.moveMaxPressure(); // Clear max pressure.
+
+ LRAtMBBEnd = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+
+ for (auto &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ if (!MI.isDebugInstr())
+ RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ }
+
+ LRAtMBBBegin = RPT.getLiveRegs();
}
- OS << PFX " Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+ OS << PFX " Live-in:" << llvm::print(LRAtMBBBegin, MRI);
OS << PFX " SGPR VGPR\n";
- auto I = RP.rbegin();
+ int I = 0;
for (auto &MI : MBB) {
- auto &[RPBeforeInstr, RPAtInstr] = *I++;
- OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ if (!MI.isDebugInstr()) {
+ auto &[RPBeforeInstr, RPAtInstr] =
+ RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
+ ++I;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ } else
+ OS << PFX " ";
MI.print(OS);
}
OS << printRP(RPAtMBBEnd) << '\n';
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index 138c8e785dec280..c780f091012e5b5 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
-
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
# The sequence of DBG_VALUEs forms a scheduling region with 0 real
# instructions. The RegPressure tracker would end up skipping over any
# debug instructions, so it would point to the instruction
@@ -74,106 +74,201 @@ body: |
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
;
- ; RP-LABEL: name: only_dbg_value_sched_region
- ; RP: bb.0:
- ; RP-NEXT: Live-in:
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 0
- ; RP-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
- ; RP-NEXT: 0 1
- ; RP-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
- ; RP-NEXT: 0 8
- ; RP-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
- ; RP-NEXT: 0 9
- ; RP-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RP-NEXT: 0 8
- ; RP-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 9
- ; RP-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 11
- ; RP-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 12
- ; RP-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 13
- ; RP-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 14
- ; RP-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 16
- ; RP-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 18
- ; RP-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 19
- ; RP-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RP-NEXT: 0 20
- ; RP-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RP-NEXT: 0 21
- ; RP-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
- ; RP-NEXT: 0 20
- ; RP-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
- ; RP-NEXT: 0 20
- ; RP-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
- ; RP-NEXT: 0 16
- ; RP-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 15
- ; RP-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 14
- ; RP-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 12
- ; RP-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 10
- ; RP-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 10
- ; RP-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
- ; RP-NEXT: 0 9
- ; RP-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: bb.1:
- ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 0 DBG_VALUE
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 0 DBG_VALUE
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 0 DBG_VALUE
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RP-NEXT: 0 2
- ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: bb.2:
- ; RP-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 2
- ; RP-NEXT: bb.3:
- ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
- ; RP-NEXT: 0 1
- ; RP-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
- ; RP-NEXT: 0 0
- ; RP-NEXT: 0 0 S_ENDPGM 0
- ; RP-NEXT: 0 0
- ; RP-NEXT: Live-out:
+ ; RPU-LABEL: name: only_dbg_value_sched_region
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 11
+ ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 13
+ ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 18
+ ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 19
+ ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 21
+ ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 15
+ ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: bb.3:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 0 S_ENDPGM 0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: only_dbg_value_sched_region
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 11
+ ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 13
+ ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 18
+ ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 19
+ ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 21
+ ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 15
+ ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: bb.3:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_ENDPGM 0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
bb.0:
liveins: $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index 3f33e795e1e6ad0..28e03ed803763c0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -336,281 +337,555 @@ body: |
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: S_ENDPGM 0
;
- ; RP-LABEL: name: sched_dbg_value_crash
- ; RP: Live-in:
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 0
- ; RP-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
- ; RP-NEXT: 2 0
- ; RP-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
- ; RP-NEXT: 4 0
- ; RP-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
- ; RP-NEXT: 4 0
- ; RP-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
- ; RP-NEXT: 4 1
- ; RP-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
- ; RP-NEXT: 4 2
- ; RP-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RP-NEXT: 6 2
- ; RP-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RP-NEXT: 8 2
- ; RP-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RP-NEXT: 10 2
- ; RP-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
- ; RP-NEXT: 12 2
- ; RP-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
- ; RP-NEXT: 10 2
- ; RP-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
- ; RP-NEXT: 10 2
- ; RP-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
- ; RP-NEXT: 10 2
- ; RP-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
- ; RP-NEXT: 9 2
- ; RP-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
- ; RP-NEXT: 9 3
- ; RP-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
- ; RP-NEXT: 8 2
- ; RP-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RP-NEXT: 8 2
- ; RP-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RP-NEXT: 8 1
- ; RP-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 8 1
- ; RP-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
- ; RP-NEXT: 8 1
- ; RP-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; RP-NEXT: 9 1
- ; RP-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RP-NEXT: 8 1
- ; RP-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 6 3
- ; RP-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
- ; RP-NEXT: 6 2
- ; RP-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 4 2
- ; RP-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
- ; RP-NEXT: 4 2
- ; RP-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
- ; RP-NEXT: 5 2
- ; RP-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
- ; RP-NEXT: 6 2
- ; RP-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RP-NEXT: 7 2
- ; RP-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
- ; RP-NEXT: 6 2
- ; RP-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RP-NEXT: 5 2
- ; RP-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
- ; RP-NEXT: 3 2
- ; RP-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
- ; RP-NEXT: 4 2
- ; RP-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RP-NEXT: 4 2
- ; RP-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
- ; RP-NEXT: 4 2
- ; RP-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RP-NEXT: 4 2
- ; RP-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 4 3
- ; RP-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RP-NEXT: 4 4
- ; RP-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
- ; RP-NEXT: 4 6
- ; RP-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
- ; RP-NEXT: 4 7
- ; RP-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
- ; RP-NEXT: 4 7
- ; RP-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
- ; RP-NEXT: 4 7
- ; RP-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 6 7
- ; RP-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
- ; RP-NEXT: 6 8
- ; RP-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 4 7
- ; RP-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
- ; RP-NEXT: 4 5
- ; RP-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 4 6
- ; RP-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
- ; RP-NEXT: 4 6
- ; RP-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
- ; RP-NEXT: 4 6
- ; RP-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 5 5
- ; RP-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
- ; RP-NEXT: 5 6
- ; RP-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 3 4
- ; RP-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 3 4
- ; RP-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
- ; RP-NEXT: 3 5
- ; RP-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 3 6
- ; RP-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
- ; RP-NEXT: 3 6
- ; RP-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 5 5
- ; RP-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
- ; RP-NEXT: 4 5
- ; RP-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 4 4
- ; RP-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
- ; RP-NEXT: 3 4
- ; RP-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 4 4
- ; RP-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
- ; RP-NEXT: 4 5
- ; RP-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 3 4
- ; RP-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 7 $vcc = IMPLICIT_DEF
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 0 DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RP-NEXT: 0 4
- ; RP-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 $sgpr4 = COPY $sgpr101
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RP-NEXT: 0 0
- ; RP-NEXT: 0 0 S_ENDPGM 0
- ; RP-NEXT: 0 0
- ; RP-NEXT: Live-out:
+ ; RPU-LABEL: name: sched_dbg_value_crash
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
+ ; RPU-NEXT: 4 1
+ ; RPU-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPU-NEXT: 8 2
+ ; RPU-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
+ ; RPU-NEXT: 12 2
+ ; RPU-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
+ ; RPU-NEXT: 9 2
+ ; RPU-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 9 3
+ ; RPU-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
+ ; RPU-NEXT: 8 2
+ ; RPU-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPU-NEXT: 8 2
+ ; RPU-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; RPU-NEXT: 9 1
+ ; RPU-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 6 3
+ ; RPU-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
+ ; RPU-NEXT: 5 2
+ ; RPU-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPU-NEXT: 7 2
+ ; RPU-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPU-NEXT: 5 2
+ ; RPU-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
+ ; RPU-NEXT: 3 2
+ ; RPU-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 3
+ ; RPU-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 4 4
+ ; RPU-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 6 7
+ ; RPU-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
+ ; RPU-NEXT: 6 8
+ ; RPU-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; RPU-NEXT: 4 5
+ ; RPU-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 5 5
+ ; RPU-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
+ ; RPU-NEXT: 5 6
+ ; RPU-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; RPU-NEXT: 3 5
+ ; RPU-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 6
+ ; RPU-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
+ ; RPU-NEXT: 3 6
+ ; RPU-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 5 5
+ ; RPU-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
+ ; RPU-NEXT: 4 5
+ ; RPU-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 4
+ ; RPU-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 4 4
+ ; RPU-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
+ ; RPU-NEXT: 4 5
+ ; RPU-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 7 $vcc = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 $sgpr4 = COPY $sgpr101
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 0 S_ENDPGM 0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: sched_dbg_value_crash
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 4 1 dead %2:vgpr_32 = COPY $vgpr2
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
+ ; RPD-NEXT: 4 1
+ ; RPD-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPD-NEXT: 8 2
+ ; RPD-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
+ ; RPD-NEXT: 12 2
+ ; RPD-NEXT: 14 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 11 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
+ ; RPD-NEXT: 9 2
+ ; RPD-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 9 3
+ ; RPD-NEXT: 9 4 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
+ ; RPD-NEXT: 8 2
+ ; RPD-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPD-NEXT: 8 2
+ ; RPD-NEXT: 8 3 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 8 2 dead %17:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 10 1 dead %18:sreg_64 = S_MOV_B64 0
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; RPD-NEXT: 9 1
+ ; RPD-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 10 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 6 3
+ ; RPD-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 8 4 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 4 6 dead %26:vreg_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
+ ; RPD-NEXT: 5 2
+ ; RPD-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPD-NEXT: 7 2
+ ; RPD-NEXT: 8 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 7 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPD-NEXT: 5 2
+ ; RPD-NEXT: 6 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
+ ; RPD-NEXT: 3 2
+ ; RPD-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 3
+ ; RPD-NEXT: 6 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 4 4
+ ; RPD-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 6 7
+ ; RPD-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
+ ; RPD-NEXT: 6 8
+ ; RPD-NEXT: 8 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 4 9 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; RPD-NEXT: 4 5
+ ; RPD-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 6 7 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 5 5
+ ; RPD-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
+ ; RPD-NEXT: 5 6
+ ; RPD-NEXT: 7 7 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 3 6 dead %51:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; RPD-NEXT: 3 5
+ ; RPD-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 6
+ ; RPD-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
+ ; RPD-NEXT: 3 6
+ ; RPD-NEXT: 5 7 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 5 5
+ ; RPD-NEXT: 5 6 dead %56:vgpr_32 = COPY %33:sgpr_32
+ ; RPD-NEXT: 4 5
+ ; RPD-NEXT: 6 6 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 6 dead %58:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 3 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 4
+ ; RPD-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 6 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 4 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 4 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 4 4
+ ; RPD-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
+ ; RPD-NEXT: 4 5
+ ; RPD-NEXT: 6 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 8 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 3 undef %70.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 3 dead %70.sub0:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 3 5 dead %74:vgpr_32 = COPY %61:sgpr_32
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 8 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %83:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %84:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %85:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %86:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 2 4 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 2 4 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %94:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %95:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %96:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %97:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %98:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %99:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %100:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 2 5 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 dead %105:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 2 6 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 dead %110:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 7 $vcc = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %115:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %116:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %117:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %118:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %119:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %121:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 4 $sgpr4 = COPY $sgpr101
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 2 4 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_ENDPGM 0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
More information about the llvm-commits
mailing list