[flang-commits] [clang-tools-extra] [llvm] [mlir] [clang] [compiler-rt] [flang] [lldb] [openmp] [lld] [libcxx] [AMDGPU] GCNRegPressure printing pass for testing. (PR #70031)
Valery Pykhtin via flang-commits
flang-commits at lists.llvm.org
Wed Nov 1 12:39:09 PDT 2023
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/70031
>From 89aec40e7e5c63e6c828d3e25d351f1f58b52c44 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Thu, 19 Oct 2023 10:07:44 +0200
Subject: [PATCH 1/8] [AMDGPU] GCNRegPressure printing pass for testing.
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 62 +++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 17 +
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 416 +++++++++++++++++-
5 files changed, 497 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
+ initializeGCNRegPressurePrinterPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..cd939a2b9f373e6 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
+#include "AMDGPU.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -487,3 +488,64 @@ LLVM_DUMP_METHOD
void GCNRegPressure::dump() const { dbgs() << print(*this); }
#endif
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ GCNUpwardRPTracker RPT(LIS);
+
+ auto &OS = dbgs();
+
+ OS << "---\nname: " << MF.getName() << "\nbody: |\n";
+
+ SmallVector<GCNRegPressure, 16> RPAtInstr;
+ SmallVector<GCNRegPressure, 16> RPAfterInstr;
+
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ RPAtInstr.clear();
+ RPAfterInstr.clear();
+
+ RPAtInstr.reserve(MBB.size());
+ RPAfterInstr.reserve(MBB.size() + 1);
+
+ RPT.reset(MBB.instr_back());
+ RPAfterInstr.push_back(RPT.getPressure());
+ for (auto &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ RPAtInstr.push_back(RPT.moveMaxPressure());
+ RPAfterInstr.push_back(RPT.getPressure());
+ }
+
+ auto printRP = [&](const GCNRegPressure &RP) {
+ // Leading spaces are important for YAML syntax here
+ OS << " " << format("%-5d", RP.getSGPRNum()) << ' '
+ << format("%-5d", RP.getVGPRNum(false));
+ };
+
+ MBB.printName(OS);
+ OS << ":\n";
+ OS << " SGPR VGPR\n";
+ unsigned I = RPAfterInstr.size() - 1;
+ printRP(RPAfterInstr[I]);
+ OS << '\n';
+ for (auto &MI : MBB) {
+ printRP(RPAtInstr[--I]);
+ OS << " ";
+ MI.print(OS);
+ printRP(RPAfterInstr[I]);
+ OS << '\n';
+ }
+ }
+ OS << "...\n";
+ return false;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..f2256f68c2c7037 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
+ GCNRegPressure getPressure() const { return CurPressure; }
+
// returns MaxPressure, resetting it
decltype(MaxPressure) moveMaxPressure() {
auto Res = MaxPressure;
@@ -277,6 +279,21 @@ Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
const TargetRegisterInfo *TRI);
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+ static char ID;
+
+public:
+ GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index f8c7be8e414ca15..c00d0702b73b663 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,6 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -171,8 +173,6 @@
...
---
-# CHECK: name: sched_dbg_value_crash
-# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
name: sched_dbg_value_crash
alignment: 1
@@ -198,6 +198,418 @@ body: |
bb.0.bb:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+ ; CHECK-LABEL: name: sched_dbg_value_crash
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 24, 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[DEF]], [[COPY2]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 12, [[S_LOAD_DWORDX2_IMM]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 32, 0
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY1]], 4, 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 4, 0, implicit $exec
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MAD_I64_I32_e64_2:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_3:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[GLOBAL_LOAD_DWORD]], [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 0, 0
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 4, 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 32, 0, implicit $exec
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sreg_64_xexec = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32_xm0 = S_LSHR_B32 [[S_LOAD_DWORDX2_IMM3]].sub0, 16, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_LOAD_DWORD_IMM]], 2, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_LOAD_DWORDX2_IMM5]].sub0, [[S_LSHL_B64_]].sub0, implicit-def $scc
+ ; CHECK-NEXT: dead undef [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[S_LSHL_B64_]].sub1, implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[S_LSHL_B64_1:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_LOAD_DWORD_IMM]], 2, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 0, [[S_LSHL_B64_1]].sub0, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sgpr_32 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[S_LSHL_B64_1]].sub1, implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
+ ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[S_LOAD_DWORDX2_IMM5]].sub0, [[DEF2]].sub0, implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sgpr_32 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[DEF2]].sub1, implicit-def dead $scc, implicit $scc
+ ; CHECK-NEXT: undef [[V_ASHRREV_I32_e32_:%[0-9]+]].sub1:vreg_64 = V_ASHRREV_I32_e32 31, [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e32_:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[V_ASHRREV_I32_e32_]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 0, [[V_LSHLREV_B64_e64_]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[V_LSHLREV_B64_e64_]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD1:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_2]], 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_2:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_2]], [[DEF4]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 0, [[DEF4]].sub1, [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_1]]
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_4:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_2]], [[DEF3]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY6]], [[DEF3]].sub1, [[V_ADD_CO_U32_e64_5]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[V_ADD_CO_U32_e64_4]], 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[V_ADD_CO_U32_e64_2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: dead [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[DEF7:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[DEF10:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[DEF10:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 0, [[DEF11]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MUL_LO_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 0, [[COPY2]], implicit $exec
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[DEF20]], 0, [[DEF20]], 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, [[V_MUL_LO_I32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, [[V_ADD_CO_U32_e32_1]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vcc = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, [[DEF22]], 0, [[V_FMA_F32_e64_1]], 0, [[DEF21]], 0, 0, implicit $vcc, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, [[DEF20]], 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_5:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[DEF20]], 0, [[DEF20]], 0, [[DEF30]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_RCP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_6:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_7:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: DBG_VALUE [[DEF28]], $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
+ ; CHECK-NEXT: $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 0, [[V_FMA_F32_e64_1]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead [[V_MUL_LO_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 [[COPY3]], [[S_LOAD_DWORDX2_IMM3]].sub1, implicit $exec
+ ; CHECK-NEXT: dead [[V_MAD_I64_I32_e64_4:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_5:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 48, [[S_LOAD_DWORDX2_IMM1]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[S_MUL_I32_:%[0-9]+]]:sreg_32_xm0 = S_MUL_I32 [[S_LSHR_B32_]], [[S_LOAD_DWORDX2_IMM3]].sub1
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
+ ; CHECK-NEXT: dead [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
+ ; CHECK-NEXT: dead [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_1]]
+ ; CHECK-NEXT: dead [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, [[V_DIV_FMAS_F32_e64_]], 0, [[DEF20]], 0, [[DEF30]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[DEF7]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_6:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_1]], [[V_LSHLREV_B64_e64_1]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_6:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY7]], [[V_LSHLREV_B64_e64_1]].sub1, [[V_ADD_CO_U32_e64_7]], 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF15]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF14]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF13]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF12]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[GLOBAL_LOAD_DWORD1]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ASHRREV_I32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ASHRREV_I32_e32 31, [[GLOBAL_LOAD_DWORDX2_2]].sub0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ASHRREV_I32_e32_1:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_2]].sub0
+ ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_8:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_9:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 0, [[V_LSHLREV_B64_e64_2]].sub0, 0, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_8:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 0, [[V_LSHLREV_B64_e64_2]].sub1, [[V_ADD_CO_U32_e64_9]], 0, implicit $exec
+ ; CHECK-NEXT: $sgpr4 = COPY $sgpr101
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF38]]
+ ; CHECK-NEXT: $vgpr3 = COPY [[DEF40]]
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[SI_PC_ADD_REL_OFFSET]], @func, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; CHECK-NEXT: dead [[V_MAD_I64_I32_e64_6:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_7:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; RP-LABEL: name: sched_dbg_value_crash
+ ; RP: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
+ ; RP-NEXT: 4 1
+ ; RP-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RP-NEXT: 8 2
+ ; RP-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
+ ; RP-NEXT: 12 2
+ ; RP-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
+ ; RP-NEXT: 10 2
+ ; RP-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
+ ; RP-NEXT: 9 2
+ ; RP-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
+ ; RP-NEXT: 9 3
+ ; RP-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
+ ; RP-NEXT: 8 2
+ ; RP-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RP-NEXT: 8 2
+ ; RP-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; RP-NEXT: 9 1
+ ; RP-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RP-NEXT: 8 1
+ ; RP-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 6 3
+ ; RP-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
+ ; RP-NEXT: 5 2
+ ; RP-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RP-NEXT: 7 2
+ ; RP-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
+ ; RP-NEXT: 6 2
+ ; RP-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RP-NEXT: 5 2
+ ; RP-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
+ ; RP-NEXT: 3 2
+ ; RP-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RP-NEXT: 4 2
+ ; RP-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 4 3
+ ; RP-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 4 4
+ ; RP-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 6 7
+ ; RP-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
+ ; RP-NEXT: 6 8
+ ; RP-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 4 7
+ ; RP-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; RP-NEXT: 4 5
+ ; RP-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
+ ; RP-NEXT: 4 6
+ ; RP-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 5 5
+ ; RP-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
+ ; RP-NEXT: 5 6
+ ; RP-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; RP-NEXT: 3 5
+ ; RP-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 3 6
+ ; RP-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
+ ; RP-NEXT: 3 6
+ ; RP-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 5 5
+ ; RP-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
+ ; RP-NEXT: 4 5
+ ; RP-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 4 4
+ ; RP-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 4 4
+ ; RP-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
+ ; RP-NEXT: 4 5
+ ; RP-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
+ ; RP-NEXT: 3 4
+ ; RP-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 7 $vcc = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 0 DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RP-NEXT: 0 4
+ ; RP-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 $sgpr4 = COPY $sgpr101
+ ; RP-NEXT: 2 4
+ ; RP-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; RP-NEXT: 2 3
+ ; RP-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
+ ; RP-NEXT: 2 2
+ ; RP-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 0 S_ENDPGM 0
+ ; RP-NEXT: 0 0
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
>From a9a33cd965744091b9b0af950e50e2f955ac2534 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 24 Oct 2023 11:25:29 +0200
Subject: [PATCH 2/8] Various improvents: * Added printing of live-in,
live-out and live-through sets. * Empty BB aren't skipped now.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 67 +++++----
...ched-assert-onlydbg-value-empty-region.mir | 142 +++++++++++++++---
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 5 +-
3 files changed, 168 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index cd939a2b9f373e6..1db4e9dd151d847 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -498,54 +498,67 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
GCNUpwardRPTracker RPT(LIS);
auto &OS = dbgs();
+// Leading spaces are important for YAML syntax.
+#define PFX " "
+
OS << "---\nname: " << MF.getName() << "\nbody: |\n";
- SmallVector<GCNRegPressure, 16> RPAtInstr;
- SmallVector<GCNRegPressure, 16> RPAfterInstr;
+ auto printRP = [](const GCNRegPressure &RP) {
+ return Printable([&RP](raw_ostream &OS) {
+ OS << format(PFX " %-5d", RP.getSGPRNum())
+ << format(" %-5d", RP.getVGPRNum(false));
+ });
+ };
+
+ // Register pressure before and at an instruction (in program order).
+ SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
for (auto &MBB : MF) {
- if (MBB.empty())
+ OS << PFX;
+ MBB.printName(OS);
+ OS << ":\n";
+
+ if (MBB.empty()) {
+ SlotIndex MBBSI = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+ GCNRPTracker::LiveRegSet LRThrough = getLiveRegs(MBBSI, LIS, MRI);
+ GCNRegPressure RP = getRegPressure(MRI, LRThrough);
+ OS << PFX " Live-through:" << llvm::print(LRThrough, MRI);
+ OS << PFX " SGPR VGPR\n" << printRP(RP) << '\n';
continue;
+ }
- RPAtInstr.clear();
- RPAfterInstr.clear();
+ RPT.reset(MBB.instr_back());
+ RPT.moveMaxPressure(); // Clear max pressure.
- RPAtInstr.reserve(MBB.size());
- RPAfterInstr.reserve(MBB.size() + 1);
+ GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
+ GCNRegPressure RPAtMBBEnd = RPT.getPressure();
- RPT.reset(MBB.instr_back());
- RPAfterInstr.push_back(RPT.getPressure());
+ RP.clear();
+ RP.reserve(MBB.size());
for (auto &MI : reverse(MBB)) {
RPT.recede(MI);
- RPAtInstr.push_back(RPT.moveMaxPressure());
- RPAfterInstr.push_back(RPT.getPressure());
+ RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
}
- auto printRP = [&](const GCNRegPressure &RP) {
- // Leading spaces are important for YAML syntax here
- OS << " " << format("%-5d", RP.getSGPRNum()) << ' '
- << format("%-5d", RP.getVGPRNum(false));
- };
-
- MBB.printName(OS);
- OS << ":\n";
- OS << " SGPR VGPR\n";
- unsigned I = RPAfterInstr.size() - 1;
- printRP(RPAfterInstr[I]);
- OS << '\n';
+ OS << PFX " Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+ OS << PFX " SGPR VGPR\n";
+ auto I = RP.rbegin();
for (auto &MI : MBB) {
- printRP(RPAtInstr[--I]);
- OS << " ";
+ auto &[RPBeforeInstr, RPAtInstr] = *I++;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
MI.print(OS);
- printRP(RPAfterInstr[I]);
- OS << '\n';
}
+ OS << printRP(RPAtMBBEnd) << '\n';
+ OS << PFX " Live-out:" << llvm::print(LRAtMBBEnd, MRI);
}
OS << "...\n";
return false;
+
+#undef PFX
}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index e4f56cc328e4782..138c8e785dec280 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
# The sequence of DBG_VALUEs forms a scheduling region with 0 real
# instructions. The RegPressure tracker would end up skipping over any
@@ -27,33 +28,33 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ; CHECK-NEXT: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_F32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
- ; CHECK-NEXT: undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
- ; CHECK-NEXT: undef %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
- ; CHECK-NEXT: %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
- ; CHECK-NEXT: undef %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_F32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[V_ADD_F32_e32_1]], [[V_MOV_B32_e32_]], 32, 0, implicit $exec
+ ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
- ; CHECK-NEXT: %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
- ; CHECK-NEXT: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
- ; CHECK-NEXT: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[GLOBAL_LOAD_DWORD2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_1]]
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -65,9 +66,114 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; RP-LABEL: name: only_dbg_value_sched_region
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RP-NEXT: 0 8
+ ; RP-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 8
+ ; RP-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 11
+ ; RP-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 12
+ ; RP-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 13
+ ; RP-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 14
+ ; RP-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 16
+ ; RP-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 18
+ ; RP-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 19
+ ; RP-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RP-NEXT: 0 21
+ ; RP-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RP-NEXT: 0 16
+ ; RP-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 15
+ ; RP-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 14
+ ; RP-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 12
+ ; RP-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 10
+ ; RP-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 10
+ ; RP-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: bb.3:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 0 S_ENDPGM 0
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
bb.0:
liveins: $vgpr0
@@ -111,6 +217,8 @@ body: |
DBG_VALUE
S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ bb.3:
+
bb.2:
S_NOP 0, implicit %0
S_NOP 0, implicit %16
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index c00d0702b73b663..3f33e795e1e6ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -197,7 +197,6 @@ constants:
body: |
bb.0.bb:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
-
; CHECK-LABEL: name: sched_dbg_value_crash
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
; CHECK-NEXT: {{ $}}
@@ -338,7 +337,8 @@ body: |
; CHECK-NEXT: S_ENDPGM 0
;
; RP-LABEL: name: sched_dbg_value_crash
- ; RP: SGPR VGPR
+ ; RP: Live-in:
+ ; RP-NEXT: SGPR VGPR
; RP-NEXT: 0 0
; RP-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
; RP-NEXT: 2 0
@@ -610,6 +610,7 @@ body: |
; RP-NEXT: 0 0
; RP-NEXT: 0 0 S_ENDPGM 0
; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
>From 814ccb9546ef681b8f231bb0569eb050b5b72995 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Wed, 25 Oct 2023 23:16:42 +0200
Subject: [PATCH 3/8] Added: * downward tracker * skip dbg values
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 64 +-
...ched-assert-onlydbg-value-empty-region.mir | 299 ++++---
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 827 ++++++++++++------
3 files changed, 797 insertions(+), 393 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1db4e9dd151d847..d2f8f4cf5dcc581 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -489,6 +489,11 @@ void GCNRegPressure::dump() const { dbgs() << print(*this); }
#endif
+static cl::opt<bool> UseDownwardTracker(
+ "amdgpu-print-rp-downward",
+ cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
+ cl::init(false), cl::Hidden);
+
char llvm::GCNRegPressurePrinter::ID = 0;
char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
@@ -500,8 +505,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
- GCNUpwardRPTracker RPT(LIS);
-
+
auto &OS = dbgs();
// Leading spaces are important for YAML syntax.
@@ -520,6 +524,9 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
for (auto &MBB : MF) {
+ RP.clear();
+ RP.reserve(MBB.size());
+
OS << PFX;
MBB.printName(OS);
OS << ":\n";
@@ -533,25 +540,52 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- RPT.reset(MBB.instr_back());
- RPT.moveMaxPressure(); // Clear max pressure.
+ GCNRPTracker::LiveRegSet LRAtMBBBegin, LRAtMBBEnd;
+ GCNRegPressure RPAtMBBEnd;
+
+ if (UseDownwardTracker) {
+ GCNDownwardRPTracker RPT(LIS);
+ RPT.reset(MBB.instr_front());
- GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
- GCNRegPressure RPAtMBBEnd = RPT.getPressure();
+ LRAtMBBBegin = RPT.getLiveRegs();
- RP.clear();
- RP.reserve(MBB.size());
- for (auto &MI : reverse(MBB)) {
- RPT.recede(MI);
- RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ while (!RPT.advanceBeforeNext()) {
+ GCNRegPressure RPBeforeMI = RPT.getPressure();
+ RPT.advanceToNext();
+ RP.emplace_back(RPBeforeMI, RPT.getPressure());
+ }
+
+ LRAtMBBEnd = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+
+ } else {
+ GCNUpwardRPTracker RPT(LIS);
+ RPT.reset(MBB.instr_back());
+ RPT.moveMaxPressure(); // Clear max pressure.
+
+ LRAtMBBEnd = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+
+ for (auto &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ if (!MI.isDebugInstr())
+ RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ }
+
+ LRAtMBBBegin = RPT.getLiveRegs();
}
- OS << PFX " Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+ OS << PFX " Live-in:" << llvm::print(LRAtMBBBegin, MRI);
OS << PFX " SGPR VGPR\n";
- auto I = RP.rbegin();
+ int I = 0;
for (auto &MI : MBB) {
- auto &[RPBeforeInstr, RPAtInstr] = *I++;
- OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ if (!MI.isDebugInstr()) {
+ auto &[RPBeforeInstr, RPAtInstr] =
+ RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
+ ++I;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ } else
+ OS << PFX " ";
MI.print(OS);
}
OS << printRP(RPAtMBBEnd) << '\n';
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index 138c8e785dec280..c780f091012e5b5 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
-
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
# The sequence of DBG_VALUEs forms a scheduling region with 0 real
# instructions. The RegPressure tracker would end up skipping over any
# debug instructions, so it would point to the instruction
@@ -74,106 +74,201 @@ body: |
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
;
- ; RP-LABEL: name: only_dbg_value_sched_region
- ; RP: bb.0:
- ; RP-NEXT: Live-in:
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 0
- ; RP-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
- ; RP-NEXT: 0 1
- ; RP-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
- ; RP-NEXT: 0 8
- ; RP-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
- ; RP-NEXT: 0 9
- ; RP-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RP-NEXT: 0 8
- ; RP-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 9
- ; RP-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 11
- ; RP-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 12
- ; RP-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 13
- ; RP-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 14
- ; RP-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 16
- ; RP-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 0 18
- ; RP-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 19
- ; RP-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RP-NEXT: 0 20
- ; RP-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RP-NEXT: 0 21
- ; RP-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
- ; RP-NEXT: 0 20
- ; RP-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
- ; RP-NEXT: 0 20
- ; RP-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
- ; RP-NEXT: 0 16
- ; RP-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 15
- ; RP-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 14
- ; RP-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 12
- ; RP-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 10
- ; RP-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 10
- ; RP-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
- ; RP-NEXT: 0 9
- ; RP-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: bb.1:
- ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 0 DBG_VALUE
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 0 DBG_VALUE
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 0 DBG_VALUE
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RP-NEXT: 0 2
- ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: bb.2:
- ; RP-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 2
- ; RP-NEXT: bb.3:
- ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
- ; RP-NEXT: 0 1
- ; RP-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
- ; RP-NEXT: 0 0
- ; RP-NEXT: 0 0 S_ENDPGM 0
- ; RP-NEXT: 0 0
- ; RP-NEXT: Live-out:
+ ; RPU-LABEL: name: only_dbg_value_sched_region
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 11
+ ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 13
+ ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 18
+ ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 19
+ ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 21
+ ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 15
+ ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: bb.3:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 0 S_ENDPGM 0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: only_dbg_value_sched_region
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 11
+ ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 13
+ ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 18
+ ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 19
+ ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 21
+ ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 15
+ ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: bb.3:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_ENDPGM 0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
bb.0:
liveins: $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index 3f33e795e1e6ad0..28e03ed803763c0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -336,281 +337,555 @@ body: |
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: S_ENDPGM 0
;
- ; RP-LABEL: name: sched_dbg_value_crash
- ; RP: Live-in:
- ; RP-NEXT: SGPR VGPR
- ; RP-NEXT: 0 0
- ; RP-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
- ; RP-NEXT: 2 0
- ; RP-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
- ; RP-NEXT: 4 0
- ; RP-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
- ; RP-NEXT: 4 0
- ; RP-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
- ; RP-NEXT: 4 1
- ; RP-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
- ; RP-NEXT: 4 2
- ; RP-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RP-NEXT: 6 2
- ; RP-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RP-NEXT: 8 2
- ; RP-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RP-NEXT: 10 2
- ; RP-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
- ; RP-NEXT: 12 2
- ; RP-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
- ; RP-NEXT: 10 2
- ; RP-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
- ; RP-NEXT: 10 2
- ; RP-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
- ; RP-NEXT: 10 2
- ; RP-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
- ; RP-NEXT: 9 2
- ; RP-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
- ; RP-NEXT: 9 3
- ; RP-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
- ; RP-NEXT: 8 2
- ; RP-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RP-NEXT: 8 2
- ; RP-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RP-NEXT: 8 1
- ; RP-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 8 1
- ; RP-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
- ; RP-NEXT: 8 1
- ; RP-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; RP-NEXT: 9 1
- ; RP-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RP-NEXT: 8 1
- ; RP-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 6 3
- ; RP-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
- ; RP-NEXT: 6 2
- ; RP-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 4 2
- ; RP-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
- ; RP-NEXT: 4 2
- ; RP-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
- ; RP-NEXT: 5 2
- ; RP-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
- ; RP-NEXT: 6 2
- ; RP-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RP-NEXT: 7 2
- ; RP-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
- ; RP-NEXT: 6 2
- ; RP-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RP-NEXT: 5 2
- ; RP-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
- ; RP-NEXT: 3 2
- ; RP-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
- ; RP-NEXT: 4 2
- ; RP-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RP-NEXT: 4 2
- ; RP-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
- ; RP-NEXT: 4 2
- ; RP-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RP-NEXT: 4 2
- ; RP-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 4 3
- ; RP-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RP-NEXT: 4 4
- ; RP-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
- ; RP-NEXT: 4 6
- ; RP-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
- ; RP-NEXT: 4 7
- ; RP-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
- ; RP-NEXT: 4 7
- ; RP-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
- ; RP-NEXT: 4 7
- ; RP-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 6 7
- ; RP-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
- ; RP-NEXT: 6 8
- ; RP-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 4 7
- ; RP-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
- ; RP-NEXT: 4 5
- ; RP-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 4 6
- ; RP-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
- ; RP-NEXT: 4 6
- ; RP-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
- ; RP-NEXT: 4 6
- ; RP-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 5 5
- ; RP-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
- ; RP-NEXT: 5 6
- ; RP-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 3 4
- ; RP-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 3 4
- ; RP-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
- ; RP-NEXT: 3 5
- ; RP-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 3 6
- ; RP-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
- ; RP-NEXT: 3 6
- ; RP-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 5 5
- ; RP-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
- ; RP-NEXT: 4 5
- ; RP-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 4 4
- ; RP-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
- ; RP-NEXT: 3 4
- ; RP-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 4 4
- ; RP-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
- ; RP-NEXT: 4 5
- ; RP-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
- ; RP-NEXT: 2 4
- ; RP-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
- ; RP-NEXT: 3 4
- ; RP-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 6
- ; RP-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 7 $vcc = IMPLICIT_DEF
- ; RP-NEXT: 0 7
- ; RP-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
- ; RP-NEXT: 0 5
- ; RP-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 3
- ; RP-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 0 DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
- ; RP-NEXT: 0 4
- ; RP-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RP-NEXT: 0 4
- ; RP-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 $sgpr4 = COPY $sgpr101
- ; RP-NEXT: 2 4
- ; RP-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
- ; RP-NEXT: 2 3
- ; RP-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
- ; RP-NEXT: 2 2
- ; RP-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RP-NEXT: 0 2
- ; RP-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RP-NEXT: 0 0
- ; RP-NEXT: 0 0 S_ENDPGM 0
- ; RP-NEXT: 0 0
- ; RP-NEXT: Live-out:
+ ; RPU-LABEL: name: sched_dbg_value_crash
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
+ ; RPU-NEXT: 4 1
+ ; RPU-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPU-NEXT: 8 2
+ ; RPU-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
+ ; RPU-NEXT: 12 2
+ ; RPU-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
+ ; RPU-NEXT: 10 2
+ ; RPU-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
+ ; RPU-NEXT: 9 2
+ ; RPU-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 9 3
+ ; RPU-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
+ ; RPU-NEXT: 8 2
+ ; RPU-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPU-NEXT: 8 2
+ ; RPU-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; RPU-NEXT: 9 1
+ ; RPU-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPU-NEXT: 8 1
+ ; RPU-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 6 3
+ ; RPU-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
+ ; RPU-NEXT: 5 2
+ ; RPU-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPU-NEXT: 7 2
+ ; RPU-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
+ ; RPU-NEXT: 6 2
+ ; RPU-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPU-NEXT: 5 2
+ ; RPU-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
+ ; RPU-NEXT: 3 2
+ ; RPU-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPU-NEXT: 4 2
+ ; RPU-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 3
+ ; RPU-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 4 4
+ ; RPU-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 6 7
+ ; RPU-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
+ ; RPU-NEXT: 6 8
+ ; RPU-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 4 7
+ ; RPU-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; RPU-NEXT: 4 5
+ ; RPU-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
+ ; RPU-NEXT: 4 6
+ ; RPU-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 5 5
+ ; RPU-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
+ ; RPU-NEXT: 5 6
+ ; RPU-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; RPU-NEXT: 3 5
+ ; RPU-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 6
+ ; RPU-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
+ ; RPU-NEXT: 3 6
+ ; RPU-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 5 5
+ ; RPU-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
+ ; RPU-NEXT: 4 5
+ ; RPU-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 4 4
+ ; RPU-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 4 4
+ ; RPU-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
+ ; RPU-NEXT: 4 5
+ ; RPU-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
+ ; RPU-NEXT: 3 4
+ ; RPU-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 7 $vcc = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 $sgpr4 = COPY $sgpr101
+ ; RPU-NEXT: 2 4
+ ; RPU-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; RPU-NEXT: 2 3
+ ; RPU-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
+ ; RPU-NEXT: 2 2
+ ; RPU-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 0 S_ENDPGM 0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: sched_dbg_value_crash
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 4 1 dead %2:vgpr_32 = COPY $vgpr2
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
+ ; RPD-NEXT: 4 1
+ ; RPD-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPD-NEXT: 8 2
+ ; RPD-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
+ ; RPD-NEXT: 12 2
+ ; RPD-NEXT: 14 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
+ ; RPD-NEXT: 10 2
+ ; RPD-NEXT: 11 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
+ ; RPD-NEXT: 9 2
+ ; RPD-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 9 3
+ ; RPD-NEXT: 9 4 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
+ ; RPD-NEXT: 8 2
+ ; RPD-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPD-NEXT: 8 2
+ ; RPD-NEXT: 8 3 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 8 2 dead %17:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 10 1 dead %18:sreg_64 = S_MOV_B64 0
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; RPD-NEXT: 9 1
+ ; RPD-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
+ ; RPD-NEXT: 8 1
+ ; RPD-NEXT: 10 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 6 3
+ ; RPD-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 8 4 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 4 6 dead %26:vreg_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
+ ; RPD-NEXT: 5 2
+ ; RPD-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPD-NEXT: 7 2
+ ; RPD-NEXT: 8 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
+ ; RPD-NEXT: 6 2
+ ; RPD-NEXT: 7 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPD-NEXT: 5 2
+ ; RPD-NEXT: 6 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
+ ; RPD-NEXT: 3 2
+ ; RPD-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPD-NEXT: 4 2
+ ; RPD-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 3
+ ; RPD-NEXT: 6 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 4 4
+ ; RPD-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 6 7
+ ; RPD-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
+ ; RPD-NEXT: 6 8
+ ; RPD-NEXT: 8 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 4 7
+ ; RPD-NEXT: 4 9 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
+ ; RPD-NEXT: 4 5
+ ; RPD-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
+ ; RPD-NEXT: 4 6
+ ; RPD-NEXT: 6 7 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 5 5
+ ; RPD-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
+ ; RPD-NEXT: 5 6
+ ; RPD-NEXT: 7 7 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 3 6 dead %51:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
+ ; RPD-NEXT: 3 5
+ ; RPD-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 6
+ ; RPD-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
+ ; RPD-NEXT: 3 6
+ ; RPD-NEXT: 5 7 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 5 5
+ ; RPD-NEXT: 5 6 dead %56:vgpr_32 = COPY %33:sgpr_32
+ ; RPD-NEXT: 4 5
+ ; RPD-NEXT: 6 6 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 6 dead %58:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 3 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 4 4
+ ; RPD-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 6 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 4 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 4 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 4 4
+ ; RPD-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
+ ; RPD-NEXT: 4 5
+ ; RPD-NEXT: 6 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 8 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 3 undef %70.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 3 dead %70.sub0:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
+ ; RPD-NEXT: 3 4
+ ; RPD-NEXT: 3 5 dead %74:vgpr_32 = COPY %61:sgpr_32
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 4 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 8 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %83:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %84:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %85:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %86:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 2 4 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 2 4 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %94:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %95:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %96:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %97:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %98:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %99:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %100:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 2 5 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 dead %105:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 2 6 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 dead %110:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 7 $vcc = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %115:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %116:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %117:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %118:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 dead %119:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 dead %121:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPD-NEXT: 0 4
+ ; RPD-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 4 $sgpr4 = COPY $sgpr101
+ ; RPD-NEXT: 2 4
+ ; RPD-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
+ ; RPD-NEXT: 2 3
+ ; RPD-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
+ ; RPD-NEXT: 2 2
+ ; RPD-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 2 4 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_ENDPGM 0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
>From abb8cb249f584ed5e2807ee1ec32bc9f1c5fa2c2 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Thu, 26 Oct 2023 16:12:30 +0200
Subject: [PATCH 4/8] restore tests
---
...ched-assert-onlydbg-value-empty-region.mir | 239 +-----
.../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 694 +-----------------
2 files changed, 21 insertions(+), 912 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index c780f091012e5b5..e4f56cc328e4782 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,7 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
+
# The sequence of DBG_VALUEs forms a scheduling region with 0 real
# instructions. The RegPressure tracker would end up skipping over any
# debug instructions, so it would point to the instruction
@@ -28,33 +27,33 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ; CHECK-NEXT: undef [[V_ADD_F32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; CHECK-NEXT: undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
- ; CHECK-NEXT: undef [[V_ADD_F32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[V_ADD_F32_e32_1]], [[V_MOV_B32_e32_]], 32, 0, implicit $exec
- ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: undef %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
+ ; CHECK-NEXT: undef %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+ ; CHECK-NEXT: %11.sub1:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[GLOBAL_LOAD_DWORD2]], 0, 0, implicit $exec
- ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
- ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
+ ; CHECK-NEXT: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
- ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_1]]
- ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -66,209 +65,9 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; RPU-LABEL: name: only_dbg_value_sched_region
- ; RPU: bb.0:
- ; RPU-NEXT: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
- ; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
- ; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
- ; RPU-NEXT: 0 7
- ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
- ; RPU-NEXT: 0 8
- ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
- ; RPU-NEXT: 0 9
- ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 8
- ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 7
- ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 7
- ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 9
- ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 11
- ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 12
- ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 13
- ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 14
- ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 16
- ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 0 18
- ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 19
- ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RPU-NEXT: 0 20
- ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RPU-NEXT: 0 21
- ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 20
- ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 20
- ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
- ; RPU-NEXT: 0 16
- ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 15
- ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 14
- ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 12
- ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
- ; RPU-NEXT: 0 9
- ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: bb.1:
- ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RPU-NEXT: DBG_VALUE
- ; RPU-NEXT: DBG_VALUE
- ; RPU-NEXT: DBG_VALUE
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: bb.2:
- ; RPU-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: bb.3:
- ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
- ; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 0 S_ENDPGM 0
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ;
- ; RPD-LABEL: name: only_dbg_value_sched_region
- ; RPD: bb.0:
- ; RPD-NEXT: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
- ; RPD-NEXT: 0 1
- ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
- ; RPD-NEXT: 0 6
- ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
- ; RPD-NEXT: 0 7
- ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
- ; RPD-NEXT: 0 8
- ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
- ; RPD-NEXT: 0 9
- ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 8
- ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 7
- ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 6
- ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 7
- ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 9
- ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 11
- ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 12
- ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 13
- ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 14
- ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 16
- ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 0 18
- ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 19
- ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RPD-NEXT: 0 20
- ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; RPD-NEXT: 0 21
- ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 20
- ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 20
- ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
- ; RPD-NEXT: 0 16
- ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 15
- ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 14
- ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 12
- ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 10
- ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 10
- ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
- ; RPD-NEXT: 0 9
- ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: bb.1:
- ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RPD-NEXT: DBG_VALUE
- ; RPD-NEXT: DBG_VALUE
- ; RPD-NEXT: DBG_VALUE
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: bb.2:
- ; RPD-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: bb.3:
- ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
- ; RPD-NEXT: 0 1
- ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 0 0 S_ENDPGM 0
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
bb.0:
liveins: $vgpr0
@@ -312,8 +111,6 @@ body: |
DBG_VALUE
S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
- bb.3:
-
bb.2:
S_NOP 0, implicit %0
S_NOP 0, implicit %16
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index 28e03ed803763c0..f8c7be8e414ca15 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,7 +1,4 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
-# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -174,6 +171,8 @@
...
---
+# CHECK: name: sched_dbg_value_crash
+# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
name: sched_dbg_value_crash
alignment: 1
@@ -198,694 +197,7 @@ constants:
body: |
bb.0.bb:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
- ; CHECK-LABEL: name: sched_dbg_value_crash
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 24, 0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[DEF]], [[COPY2]], implicit-def dead $vcc, implicit $exec
- ; CHECK-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 12, [[S_LOAD_DWORDX2_IMM]], 0, implicit $exec
- ; CHECK-NEXT: dead [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 32, 0
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY1]], 4, 0
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 4, 0, implicit $exec
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MAD_I64_I32_e64_2:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_3:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[GLOBAL_LOAD_DWORD]], [[DEF1]], 0, 0, implicit $exec
- ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 0, 0
- ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 4, 0
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 32, 0, implicit $exec
- ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sreg_64_xexec = S_MOV_B32 0
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32_xm0 = S_LSHR_B32 [[S_LOAD_DWORDX2_IMM3]].sub0, 16, implicit-def dead $scc
- ; CHECK-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_LOAD_DWORD_IMM]], 2, implicit-def dead $scc
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_LOAD_DWORDX2_IMM5]].sub0, [[S_LSHL_B64_]].sub0, implicit-def $scc
- ; CHECK-NEXT: dead undef [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[S_LSHL_B64_]].sub1, implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[S_LSHL_B64_1:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[S_LOAD_DWORD_IMM]], 2, implicit-def dead $scc
- ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 0, [[S_LSHL_B64_1]].sub0, implicit-def $scc
- ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sgpr_32 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[S_LSHL_B64_1]].sub1, implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
- ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[S_LOAD_DWORDX2_IMM5]].sub0, [[DEF2]].sub0, implicit-def $scc
- ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sgpr_32 = S_ADDC_U32 [[S_LOAD_DWORDX2_IMM5]].sub1, [[DEF2]].sub1, implicit-def dead $scc, implicit $scc
- ; CHECK-NEXT: undef [[V_ASHRREV_I32_e32_:%[0-9]+]].sub1:vreg_64 = V_ASHRREV_I32_e32 31, [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec
- ; CHECK-NEXT: [[V_ASHRREV_I32_e32_:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0
- ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[V_ASHRREV_I32_e32_]], implicit $exec
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 0, [[V_LSHLREV_B64_e64_]].sub0, 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[V_LSHLREV_B64_e64_]].sub1, [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
- ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
- ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD1:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_2]], 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_2:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_2]], [[DEF4]].sub0, 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 0, [[DEF4]].sub1, [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_1]]
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_4:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_2]], [[DEF3]].sub0, 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY6]], [[DEF3]].sub1, [[V_ADD_CO_U32_e64_5]], 0, implicit $exec
- ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[V_ADD_CO_U32_e64_4]], 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
- ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[V_ADD_CO_U32_e64_2]], 0, 0, implicit $exec
- ; CHECK-NEXT: dead [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; CHECK-NEXT: dead [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[DEF7:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub1:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[DEF10:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead undef [[DEF10:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 0, [[DEF11]], implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MUL_LO_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 0, [[COPY2]], implicit $exec
- ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[DEF20]], 0, [[DEF20]], 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, [[V_MUL_LO_I32_e64_]], implicit-def dead $vcc, implicit $exec
- ; CHECK-NEXT: dead [[V_ADD_CO_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, [[V_ADD_CO_U32_e32_1]], implicit-def dead $vcc, implicit $exec
- ; CHECK-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: $vcc = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, [[DEF22]], 0, [[V_FMA_F32_e64_1]], 0, [[DEF21]], 0, 0, implicit $vcc, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, [[DEF20]], 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_5:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[DEF20]], 0, [[DEF20]], 0, [[DEF30]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_RCP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[V_DIV_SCALE_F32_e64_6:%[0-9]+]]:vgpr_32, dead [[V_DIV_SCALE_F32_e64_7:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: DBG_VALUE [[DEF28]], $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
- ; CHECK-NEXT: $vgpr1_vgpr2 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 0, [[V_FMA_F32_e64_1]], implicit $mode, implicit $exec
- ; CHECK-NEXT: dead [[V_MUL_LO_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 [[COPY3]], [[S_LOAD_DWORDX2_IMM3]].sub1, implicit $exec
- ; CHECK-NEXT: dead [[V_MAD_I64_I32_e64_4:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_5:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 48, [[S_LOAD_DWORDX2_IMM1]], 0, implicit $exec
- ; CHECK-NEXT: dead [[S_MUL_I32_:%[0-9]+]]:sreg_32_xm0 = S_MUL_I32 [[S_LSHR_B32_]], [[S_LOAD_DWORDX2_IMM3]].sub1
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
- ; CHECK-NEXT: dead [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_]]
- ; CHECK-NEXT: dead [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_ADDC_U32_1]]
- ; CHECK-NEXT: dead [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, [[V_DIV_FMAS_F32_e64_]], 0, [[DEF20]], 0, [[DEF30]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF7:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1
- ; CHECK-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[DEF7]], implicit $exec
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_6:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_U32_1]], [[V_LSHLREV_B64_e64_1]].sub0, 0, implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_6:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY7]], [[V_LSHLREV_B64_e64_1]].sub1, [[V_ADD_CO_U32_e64_7]], 0, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF15]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF14]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF13]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN [[DEF12]], %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
- ; CHECK-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 3, [[GLOBAL_LOAD_DWORD1]], implicit $exec
- ; CHECK-NEXT: undef [[V_ASHRREV_I32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ASHRREV_I32_e32 31, [[GLOBAL_LOAD_DWORDX2_2]].sub0, implicit $exec
- ; CHECK-NEXT: dead undef [[V_ASHRREV_I32_e32_1:%[0-9]+]].sub0:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_2]].sub0
- ; CHECK-NEXT: undef [[V_ADD_CO_U32_e64_8:%[0-9]+]].sub0:vreg_64, [[V_ADD_CO_U32_e64_9:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 0, [[V_LSHLREV_B64_e64_2]].sub0, 0, implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_CO_U32_e64_8:%[0-9]+]].sub1:vreg_64, dead [[V_ADDC_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 0, [[V_LSHLREV_B64_e64_2]].sub1, [[V_ADD_CO_U32_e64_9]], 0, implicit $exec
- ; CHECK-NEXT: $sgpr4 = COPY $sgpr101
- ; CHECK-NEXT: $vgpr0 = COPY [[DEF38]]
- ; CHECK-NEXT: $vgpr3 = COPY [[DEF40]]
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL [[SI_PC_ADD_REL_OFFSET]], @func, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
- ; CHECK-NEXT: dead [[V_MAD_I64_I32_e64_6:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_7:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], [[DEF1]], 0, 0, implicit $exec
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: S_ENDPGM 0
- ;
- ; RPU-LABEL: name: sched_dbg_value_crash
- ; RPU: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
- ; RPU-NEXT: 4 0
- ; RPU-NEXT: 4 0 dead %2:vgpr_32 = COPY $vgpr2
- ; RPU-NEXT: 4 0
- ; RPU-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
- ; RPU-NEXT: 4 1
- ; RPU-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RPU-NEXT: 6 2
- ; RPU-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RPU-NEXT: 8 2
- ; RPU-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RPU-NEXT: 10 2
- ; RPU-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
- ; RPU-NEXT: 12 2
- ; RPU-NEXT: 12 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
- ; RPU-NEXT: 10 2
- ; RPU-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
- ; RPU-NEXT: 10 2
- ; RPU-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
- ; RPU-NEXT: 10 2
- ; RPU-NEXT: 10 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
- ; RPU-NEXT: 9 2
- ; RPU-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
- ; RPU-NEXT: 9 3
- ; RPU-NEXT: 9 3 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
- ; RPU-NEXT: 8 2
- ; RPU-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RPU-NEXT: 8 2
- ; RPU-NEXT: 8 2 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RPU-NEXT: 8 1
- ; RPU-NEXT: 8 1 dead %17:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 8 1
- ; RPU-NEXT: 8 1 dead %18:sreg_64 = S_MOV_B64 0
- ; RPU-NEXT: 8 1
- ; RPU-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; RPU-NEXT: 9 1
- ; RPU-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RPU-NEXT: 8 1
- ; RPU-NEXT: 8 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 6 3
- ; RPU-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
- ; RPU-NEXT: 6 2
- ; RPU-NEXT: 6 2 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 4 2 dead %26:vreg_128 = IMPLICIT_DEF
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
- ; RPU-NEXT: 5 2
- ; RPU-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
- ; RPU-NEXT: 6 2
- ; RPU-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RPU-NEXT: 7 2
- ; RPU-NEXT: 7 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
- ; RPU-NEXT: 6 2
- ; RPU-NEXT: 6 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RPU-NEXT: 5 2
- ; RPU-NEXT: 5 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
- ; RPU-NEXT: 3 2
- ; RPU-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RPU-NEXT: 4 2
- ; RPU-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 4 3
- ; RPU-NEXT: 4 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RPU-NEXT: 4 4
- ; RPU-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
- ; RPU-NEXT: 4 6
- ; RPU-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
- ; RPU-NEXT: 4 7
- ; RPU-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
- ; RPU-NEXT: 4 7
- ; RPU-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
- ; RPU-NEXT: 4 7
- ; RPU-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
- ; RPU-NEXT: 6 7
- ; RPU-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
- ; RPU-NEXT: 6 8
- ; RPU-NEXT: 6 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 4 7
- ; RPU-NEXT: 4 7 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
- ; RPU-NEXT: 4 5
- ; RPU-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 4 6
- ; RPU-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
- ; RPU-NEXT: 4 6
- ; RPU-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
- ; RPU-NEXT: 4 6
- ; RPU-NEXT: 6 6 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
- ; RPU-NEXT: 5 5
- ; RPU-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
- ; RPU-NEXT: 5 6
- ; RPU-NEXT: 5 6 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 3 4
- ; RPU-NEXT: 3 4 dead %51:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 3 4
- ; RPU-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
- ; RPU-NEXT: 3 5
- ; RPU-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 3 6
- ; RPU-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
- ; RPU-NEXT: 3 6
- ; RPU-NEXT: 5 6 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
- ; RPU-NEXT: 5 5
- ; RPU-NEXT: 5 5 dead %56:vgpr_32 = COPY %33:sgpr_32
- ; RPU-NEXT: 4 5
- ; RPU-NEXT: 4 5 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 4 dead %58:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 4 4
- ; RPU-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
- ; RPU-NEXT: 3 4
- ; RPU-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 5 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
- ; RPU-NEXT: 2 3
- ; RPU-NEXT: 2 3 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
- ; RPU-NEXT: 2 3
- ; RPU-NEXT: 2 3 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
- ; RPU-NEXT: 2 2
- ; RPU-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
- ; RPU-NEXT: 4 4
- ; RPU-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
- ; RPU-NEXT: 4 5
- ; RPU-NEXT: 4 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 4 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
- ; RPU-NEXT: 2 2
- ; RPU-NEXT: 2 2 undef %70.sub1:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 2 2
- ; RPU-NEXT: 2 2 dead %70.sub0:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 2 2
- ; RPU-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
- ; RPU-NEXT: 3 4
- ; RPU-NEXT: 3 4 dead %74:vgpr_32 = COPY %61:sgpr_32
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %83:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %84:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %85:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %86:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %94:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %95:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %96:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %97:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %98:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %99:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %100:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 dead %105:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 5 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 5 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 5 dead %110:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 7
- ; RPU-NEXT: 0 7 $vcc = IMPLICIT_DEF
- ; RPU-NEXT: 0 7
- ; RPU-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 5
- ; RPU-NEXT: 0 5 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %115:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %116:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %117:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %118:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %119:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 3 dead %121:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: 0 3
- ; RPU-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
- ; RPU-NEXT: DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 4 $sgpr4 = COPY $sgpr101
- ; RPU-NEXT: 2 4
- ; RPU-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
- ; RPU-NEXT: 2 3
- ; RPU-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
- ; RPU-NEXT: 2 3
- ; RPU-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
- ; RPU-NEXT: 2 2
- ; RPU-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: 0 2 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 0 S_ENDPGM 0
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ;
- ; RPD-LABEL: name: sched_dbg_value_crash
- ; RPD: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 2 0 %4:sgpr_64 = COPY $sgpr6_sgpr7
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 4 0 %3:sgpr_64 = COPY $sgpr4_sgpr5
- ; RPD-NEXT: 4 0
- ; RPD-NEXT: 4 1 dead %2:vgpr_32 = COPY $vgpr2
- ; RPD-NEXT: 4 0
- ; RPD-NEXT: 4 1 %1:vgpr_32 = COPY $vgpr1
- ; RPD-NEXT: 4 1
- ; RPD-NEXT: 4 2 %0:vgpr_32 = COPY $vgpr0
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 6 2 %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RPD-NEXT: 6 2
- ; RPD-NEXT: 8 2 %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RPD-NEXT: 8 2
- ; RPD-NEXT: 10 2 %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
- ; RPD-NEXT: 10 2
- ; RPD-NEXT: 12 2 %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 24, 0
- ; RPD-NEXT: 12 2
- ; RPD-NEXT: 14 2 dead %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4:sgpr_64, 32, 0
- ; RPD-NEXT: 10 2
- ; RPD-NEXT: 12 2 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3:sgpr_64, 4, 0
- ; RPD-NEXT: 10 2
- ; RPD-NEXT: 11 2 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0:sreg_64_xexec, 16, implicit-def dead $scc
- ; RPD-NEXT: 10 2
- ; RPD-NEXT: 11 2 dead %12:sreg_32_xm0 = S_MUL_I32 %11:sreg_32_xm0, %10.sub1:sreg_64_xexec
- ; RPD-NEXT: 9 2
- ; RPD-NEXT: 9 3 %13:vgpr_32 = V_MUL_LO_I32_e64 0, %0:vgpr_32, implicit $exec
- ; RPD-NEXT: 9 3
- ; RPD-NEXT: 9 4 dead %14:vgpr_32 = V_MUL_LO_I32_e64 %1:vgpr_32, %10.sub1:sreg_64_xexec, implicit $exec
- ; RPD-NEXT: 8 2
- ; RPD-NEXT: 8 3 %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RPD-NEXT: 8 2
- ; RPD-NEXT: 8 3 dead %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RPD-NEXT: 8 1
- ; RPD-NEXT: 8 2 dead %17:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 8 1
- ; RPD-NEXT: 10 1 dead %18:sreg_64 = S_MOV_B64 0
- ; RPD-NEXT: 8 1
- ; RPD-NEXT: 9 1 %19:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; RPD-NEXT: 9 1
- ; RPD-NEXT: 9 2 %20:vgpr_32 = V_ADD_CO_U32_e32 %19:sreg_32_xm0_xexec, %0:vgpr_32, implicit-def dead $vcc, implicit $exec
- ; RPD-NEXT: 8 1
- ; RPD-NEXT: 10 3 %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 12, %7:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 6 3
- ; RPD-NEXT: 6 4 %23:vgpr_32 = GLOBAL_LOAD_DWORD %21:vreg_64, 4, 0, implicit $exec
- ; RPD-NEXT: 6 2
- ; RPD-NEXT: 8 4 dead %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, 48, %8:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 4 6 dead %26:vreg_128 = IMPLICIT_DEF
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 5 2 undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 0, 0
- ; RPD-NEXT: 5 2
- ; RPD-NEXT: 6 2 %27.sub1:sreg_64_xexec = S_MOV_B32 0
- ; RPD-NEXT: 6 2
- ; RPD-NEXT: 8 2 %28:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RPD-NEXT: 7 2
- ; RPD-NEXT: 8 2 undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0:sreg_64_xexec, %28.sub0:sreg_64, implicit-def $scc
- ; RPD-NEXT: 6 2
- ; RPD-NEXT: 7 2 dead %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %28.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RPD-NEXT: 5 2
- ; RPD-NEXT: 6 2 undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6:sreg_64_xexec, 4, 0
- ; RPD-NEXT: 3 2
- ; RPD-NEXT: 4 2 %27.sub0:sreg_64_xexec = IMPLICIT_DEF
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 6 2 %31:sreg_64 = S_LSHL_B64 %27:sreg_64_xexec, 2, implicit-def dead $scc
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 5 2 %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0:sreg_64, implicit-def $scc
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 5 2 %33:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %31.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RPD-NEXT: 4 2
- ; RPD-NEXT: 4 3 %34:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 4 3
- ; RPD-NEXT: 6 5 %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RPD-NEXT: 4 4
- ; RPD-NEXT: 4 6 %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 32, 0, implicit $exec
- ; RPD-NEXT: 4 6
- ; RPD-NEXT: 4 7 undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0:vreg_64, implicit $exec
- ; RPD-NEXT: 4 7
- ; RPD-NEXT: 4 8 %38.sub0:vreg_64 = COPY %37.sub0:vreg_64
- ; RPD-NEXT: 4 7
- ; RPD-NEXT: 4 9 %39:vreg_64 = V_LSHLREV_B64_e64 3, %38:vreg_64, implicit $exec
- ; RPD-NEXT: 4 7
- ; RPD-NEXT: 6 8 undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0:vreg_64, 0, implicit $exec
- ; RPD-NEXT: 6 7
- ; RPD-NEXT: 6 8 %42:vgpr_32 = COPY %33:sgpr_32
- ; RPD-NEXT: 6 8
- ; RPD-NEXT: 8 9 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42:vgpr_32, %39.sub1:vreg_64, %41:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 4 7
- ; RPD-NEXT: 4 9 dead %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34, addrspace 1)
- ; RPD-NEXT: 4 5
- ; RPD-NEXT: 4 6 undef %45.sub1:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 4 6
- ; RPD-NEXT: 4 7 %45.sub0:vreg_64 = COPY %37.sub1:vreg_64
- ; RPD-NEXT: 4 6
- ; RPD-NEXT: 4 8 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45:vreg_64, implicit $exec
- ; RPD-NEXT: 4 6
- ; RPD-NEXT: 6 7 undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32:sreg_32_xm0, %46.sub0:vreg_64, 0, implicit $exec
- ; RPD-NEXT: 5 5
- ; RPD-NEXT: 5 6 %49:vgpr_32 = COPY %33:sgpr_32
- ; RPD-NEXT: 5 6
- ; RPD-NEXT: 7 7 dead %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49:vgpr_32, %46.sub1:vreg_64, %48:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 3 4
- ; RPD-NEXT: 3 6 dead %51:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 3 4
- ; RPD-NEXT: 3 5 undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35:vreg_64, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8, addrspace 1)
- ; RPD-NEXT: 3 5
- ; RPD-NEXT: 3 6 %52.sub1:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 3 6
- ; RPD-NEXT: 3 8 %53:vreg_64 = V_LSHLREV_B64_e64 3, %52:vreg_64, implicit $exec
- ; RPD-NEXT: 3 6
- ; RPD-NEXT: 5 7 undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0:vreg_64, 0, implicit $exec
- ; RPD-NEXT: 5 5
- ; RPD-NEXT: 5 6 dead %56:vgpr_32 = COPY %33:sgpr_32
- ; RPD-NEXT: 4 5
- ; RPD-NEXT: 6 6 dead %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1:vreg_64, %55:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 2 6 dead %58:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 3 4 dead %30.sub1:sreg_64_xexec = IMPLICIT_DEF
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 4 4 %59:sreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 4 4
- ; RPD-NEXT: 5 4 %60:sreg_32_xm0 = S_ADD_U32 %5.sub0:sreg_64_xexec, %59.sub0:sreg_64, implicit-def $scc
- ; RPD-NEXT: 3 4
- ; RPD-NEXT: 4 4 %61:sgpr_32 = S_ADDC_U32 %5.sub1:sreg_64_xexec, %59.sub1:sreg_64, implicit-def dead $scc, implicit killed $scc
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 2 6 %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4, addrspace 1)
- ; RPD-NEXT: 2 3
- ; RPD-NEXT: 2 4 undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0:vreg_64, implicit $exec
- ; RPD-NEXT: 2 3
- ; RPD-NEXT: 2 4 dead %63.sub0:vreg_64 = COPY %62.sub0:vreg_64
- ; RPD-NEXT: 2 2
- ; RPD-NEXT: 2 4 %64:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 4 5 undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %64.sub0:vreg_64, 0, implicit $exec
- ; RPD-NEXT: 4 4
- ; RPD-NEXT: 4 5 %67:vgpr_32 = COPY %61:sgpr_32
- ; RPD-NEXT: 4 5
- ; RPD-NEXT: 6 6 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67:vgpr_32, %64.sub1:vreg_64, %66:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 2 8 dead %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58, addrspace 1)
- ; RPD-NEXT: 2 2
- ; RPD-NEXT: 2 3 undef %70.sub1:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 2 2
- ; RPD-NEXT: 2 3 dead %70.sub0:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 2 2
- ; RPD-NEXT: 2 4 %71:vreg_64 = IMPLICIT_DEF
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 4 5 undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60:sreg_32_xm0, %71.sub0:vreg_64, 0, implicit $exec
- ; RPD-NEXT: 3 4
- ; RPD-NEXT: 3 5 dead %74:vgpr_32 = COPY %61:sgpr_32
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 4 5 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1:vreg_64, %73:sreg_64_xexec, 0, implicit $exec
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 8 dead %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72:vreg_64, 0, 0, implicit $exec
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 %77:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 %78:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 5 dead %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77:vgpr_32, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 %80:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 5 %81:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 6 %82:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 6
- ; RPD-NEXT: 0 6 BUFFER_STORE_DWORD_OFFEN %82:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, implicit $exec
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 5 BUFFER_STORE_DWORD_OFFEN %81:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, implicit $exec
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 4 BUFFER_STORE_DWORD_OFFEN %80:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, implicit $exec
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 3 BUFFER_STORE_DWORD_OFFEN %78:vgpr_32, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, implicit $exec
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %83:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %84:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %85:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %86:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 %87:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 2 4 dead %88:vgpr_32, dead %89:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %90:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %91:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 2 4 dead %92:vgpr_32, dead %93:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %87:vgpr_32, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %94:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %95:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %96:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %97:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %98:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %99:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %100:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 %101:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 2 5 dead %102:vgpr_32, dead %103:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %87:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 5 dead %104:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 5 dead %105:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 5 %106:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 2 6 dead %107:vgpr_32, dead %108:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 6 dead %109:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %106:vgpr_32, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 6 dead %110:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 6 %111:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 6
- ; RPD-NEXT: 0 7 %112:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 7
- ; RPD-NEXT: 0 7 $vcc = IMPLICIT_DEF
- ; RPD-NEXT: 0 7
- ; RPD-NEXT: 0 8 %113:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %112:vgpr_32, 0, %106:vgpr_32, 0, %111:vgpr_32, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 5
- ; RPD-NEXT: 0 6 dead %114:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %113:vgpr_32, 0, %87:vgpr_32, 0, %101:vgpr_32, 0, 0, implicit $mode, implicit $exec
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %115:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %116:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %117:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %118:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 dead %119:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 3 %120:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 dead %121:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: 0 3
- ; RPD-NEXT: 0 4 %122:vgpr_32 = IMPLICIT_DEF
- ; RPD-NEXT: DBG_VALUE %99:vgpr_32, $noreg, !"bar", !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8; foo.cl:102:8 line no:102
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 0 4 ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RPD-NEXT: 0 4
- ; RPD-NEXT: 2 4 %123:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 2 4 $sgpr4 = COPY $sgpr101
- ; RPD-NEXT: 2 4
- ; RPD-NEXT: 2 4 $vgpr0 = COPY %120:vgpr_32
- ; RPD-NEXT: 2 3
- ; RPD-NEXT: 2 3 $vgpr1_vgpr2 = IMPLICIT_DEF
- ; RPD-NEXT: 2 3
- ; RPD-NEXT: 2 3 $vgpr3 = COPY %122:vgpr_32
- ; RPD-NEXT: 2 2
- ; RPD-NEXT: 2 2 dead $sgpr30_sgpr31 = SI_CALL %123:sreg_64, @func, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 2 ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 2 4 dead %124:vreg_64, dead %125:sreg_64 = V_MAD_I64_I32_e64 %20:vgpr_32, %34:vgpr_32, 0, 0, implicit $exec
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 0 0 S_ENDPGM 0
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
+
%4:sgpr_64 = COPY $sgpr6_sgpr7
%3:sgpr_64 = COPY $sgpr4_sgpr5
%2:vgpr_32 = COPY $vgpr2
>From cd242e621048ada5fb5f63a175969274ca7ec0fc Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Thu, 26 Oct 2023 16:38:49 +0200
Subject: [PATCH 5/8] Improved version + comments.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 99 +++--
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 22 +-
.../CodeGen/AMDGPU/regpressure_printer.mir | 388 ++++++++++++++++++
3 files changed, 471 insertions(+), 38 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index d2f8f4cf5dcc581..26b6cb393269cb4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -32,6 +32,17 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
return true;
}
+GCNRPTracker::LiveRegSet
+llvm::getIntersection(const GCNRPTracker::LiveRegSet &LR1,
+ const GCNRPTracker::LiveRegSet &LR2) {
+ GCNRPTracker::LiveRegSet Intersection;
+ for (auto [Reg, Mask] : LR1) {
+ LaneBitmask MaskIntersection = Mask & LR2.lookup(Reg);
+ if (MaskIntersection.any())
+ Intersection[Reg] = MaskIntersection;
+ }
+ return Intersection;
+}
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
@@ -270,6 +281,14 @@ void GCNUpwardRPTracker::reset(const MachineInstr &MI,
GCNRPTracker::reset(MI, LiveRegsCopy, true);
}
+void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
+ const LiveRegSet &LiveRegs_) {
+ MRI = &MRI_;
+ LiveRegs = LiveRegs_;
+ MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
+}
+
+
void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(MRI && "call reset first");
@@ -423,15 +442,16 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
LLVM_DUMP_METHOD
Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedLR,
- const TargetRegisterInfo *TRI) {
- return Printable([&LISLR, &TrackedLR, TRI](raw_ostream &OS) {
+ const TargetRegisterInfo *TRI,
+ StringRef Pfx) {
+ return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
for (auto const &P : TrackedLR) {
auto I = LISLR.find(P.first);
if (I == LISLR.end()) {
- OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+ OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
<< " isn't found in LIS reported set\n";
} else if (I->second != P.second) {
- OS << " " << printReg(P.first, TRI)
+ OS << Pfx << printReg(P.first, TRI)
<< " masks doesn't match: LIS reported " << PrintLaneMask(I->second)
<< ", tracked " << PrintLaneMask(P.second) << '\n';
}
@@ -439,7 +459,7 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
for (auto const &P : LISLR) {
auto I = TrackedLR.find(P.first);
if (I == TrackedLR.end()) {
- OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+ OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
<< " isn't found in tracked set\n";
}
}
@@ -500,12 +520,10 @@ char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
-
+
auto &OS = dbgs();
// Leading spaces are important for YAML syntax.
@@ -520,6 +538,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
});
};
+ auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR,
+ const GCNRPTracker::LiveRegSet &LISLR) {
+ if (LISLR != TrackedLR) {
+ OS << PFX " mis LIS: " << llvm::print(LISLR, MRI)
+ << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
+ }
+ };
+
// Register pressure before and at an instruction (in program order).
SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
@@ -531,41 +557,39 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
MBB.printName(OS);
OS << ":\n";
- if (MBB.empty()) {
- SlotIndex MBBSI = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
- GCNRPTracker::LiveRegSet LRThrough = getLiveRegs(MBBSI, LIS, MRI);
- GCNRegPressure RP = getRegPressure(MRI, LRThrough);
- OS << PFX " Live-through:" << llvm::print(LRThrough, MRI);
- OS << PFX " SGPR VGPR\n" << printRP(RP) << '\n';
- continue;
- }
+ SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+ SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
GCNRPTracker::LiveRegSet LRAtMBBBegin, LRAtMBBEnd;
GCNRegPressure RPAtMBBEnd;
-
- if (UseDownwardTracker) {
- GCNDownwardRPTracker RPT(LIS);
- RPT.reset(MBB.instr_front());
- LRAtMBBBegin = RPT.getLiveRegs();
+ if (UseDownwardTracker) {
+ if (MBB.empty()) {
+ LRAtMBBBegin = LRAtMBBEnd = getLiveRegs(MBBStartSlot, LIS, MRI);
+ RPAtMBBEnd = getRegPressure(MRI, LRAtMBBBegin);
+ } else {
+ GCNDownwardRPTracker RPT(LIS);
+ RPT.reset(MBB.front());
+
+ LRAtMBBBegin = RPT.getLiveRegs();
+
+ while (!RPT.advanceBeforeNext()) {
+ GCNRegPressure RPBeforeMI = RPT.getPressure();
+ RPT.advanceToNext();
+ RP.emplace_back(RPBeforeMI, RPT.getPressure());
+ }
- while (!RPT.advanceBeforeNext()) {
- GCNRegPressure RPBeforeMI = RPT.getPressure();
- RPT.advanceToNext();
- RP.emplace_back(RPBeforeMI, RPT.getPressure());
+ LRAtMBBEnd = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
}
-
- LRAtMBBEnd = RPT.getLiveRegs();
- RPAtMBBEnd = RPT.getPressure();
-
} else {
GCNUpwardRPTracker RPT(LIS);
- RPT.reset(MBB.instr_back());
+ RPT.reset(MRI, MBBEndSlot);
RPT.moveMaxPressure(); // Clear max pressure.
LRAtMBBEnd = RPT.getLiveRegs();
RPAtMBBEnd = RPT.getPressure();
-
+
for (auto &MI : reverse(MBB)) {
RPT.recede(MI);
if (!MI.isDebugInstr())
@@ -575,7 +599,10 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
LRAtMBBBegin = RPT.getLiveRegs();
}
- OS << PFX " Live-in:" << llvm::print(LRAtMBBBegin, MRI);
+ OS << PFX " Live-in: " << llvm::print(LRAtMBBBegin, MRI);
+ if (!UseDownwardTracker)
+ ReportLISMismatchIfAny(LRAtMBBBegin, getLiveRegs(MBBStartSlot, LIS, MRI));
+
OS << PFX " SGPR VGPR\n";
int I = 0;
for (auto &MI : MBB) {
@@ -589,7 +616,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
MI.print(OS);
}
OS << printRP(RPAtMBBEnd) << '\n';
+
OS << PFX " Live-out:" << llvm::print(LRAtMBBEnd, MRI);
+ if (UseDownwardTracker)
+ ReportLISMismatchIfAny(LRAtMBBEnd, getLiveRegs(MBBEndSlot, LIS, MRI));
+
+ GCNRPTracker::LiveRegSet LRThr = getIntersection(LRAtMBBBegin, LRAtMBBEnd);
+ OS << PFX " Live-thr:" << llvm::print(LRThr, MRI);
}
OS << "...\n";
return false;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index f2256f68c2c7037..e46a8bc56827041 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -142,6 +142,10 @@ class GCNRPTracker {
}
};
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
class GCNUpwardRPTracker : public GCNRPTracker {
public:
GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
@@ -150,6 +154,14 @@ class GCNUpwardRPTracker : public GCNRPTracker {
// filling live regs upon this point using LIS
void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
+ // reset tracker and set live register set to the specified value.
+ void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
+ // reset tracker at the specified slot index.
+ void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) {
+ reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_));
+ }
+
// move to the state just above the MI
void recede(const MachineInstr &MI);
@@ -198,10 +210,6 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
- const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
-
/// creates a map MachineInstr -> LiveRegSet
/// R - range of iterators on instructions
/// After - upon entry or exit of every instruction
@@ -270,6 +278,9 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
bool isEqual(const GCNRPTracker::LiveRegSet &S1,
const GCNRPTracker::LiveRegSet &S2);
+GCNRPTracker::LiveRegSet getIntersection(const GCNRPTracker::LiveRegSet &LR1,
+ const GCNRPTracker::LiveRegSet &LR2);
+
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr);
Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
@@ -277,7 +288,8 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
- const TargetRegisterInfo *TRI);
+ const TargetRegisterInfo *TRI,
+ StringRef Pfx = " ");
struct GCNRegPressurePrinter : public MachineFunctionPass {
static char ID;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
new file mode 100644
index 000000000000000..e2d38273e958be8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -0,0 +1,388 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
+
+
+---
+name: trivial
+tracksRegLiveness: true
+body: |
+ ; RP-LABEL: name: trivial
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 2 1 %1:sgpr_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 1
+ ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 1
+ ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: Live-thr: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 1
+ ; RP-NEXT: 2 1 S_NOP 0, implicit %0:vgpr_32, implicit %1:sgpr_64
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ bb.0:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:sgpr_64 = IMPLICIT_DEF
+ bb.1:
+
+ bb.2:
+ S_NOP 0, implicit %0, implicit %1
+...
+
+# This testcase shows the problem with LiveIntervals: it doesn't create
+# subranges for undefined but used subregisters. Upward tracker is able to see
+# the use of undefined subregister and tracks it correctly.
+---
+name: upward_problem_lis_subregs_mismatch
+tracksRegLiveness: true
+body: |
+ ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: Live-thr: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-in: %0:000000000000000F %1:000000000000000F
+ ; RPU-NEXT: mis LIS: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F
+ ; RPU-NEXT: %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ;
+ ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: Live-thr: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ bb.0:
+ undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+
+ bb.1:
+
+ bb.2:
+ S_NOP 0, implicit %0, implicit %1
+...
+---
+name: only_dbg_value_sched_region
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ waveLimiter: true
+body: |
+ ; RPU-LABEL: name: only_dbg_value_sched_region
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 11
+ ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 13
+ ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 18
+ ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 19
+ ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 21
+ ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 15
+ ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.3:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 0 S_ENDPGM 0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ;
+ ; RPD-LABEL: name: only_dbg_value_sched_region
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 11
+ ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 13
+ ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 18
+ ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 19
+ ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 21
+ ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 15
+ ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.3:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_ENDPGM 0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ bb.0:
+ liveins: $vgpr0
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, implicit $exec
+ undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec
+ %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ %5:vreg_64 = COPY %2
+ undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec
+ %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec
+ %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, implicit $exec
+ %8:vreg_64 = IMPLICIT_DEF
+ %9:vreg_64 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vreg_64 = IMPLICIT_DEF
+ %15:vreg_64 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec
+ %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec
+ DBG_VALUE
+ GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
+ %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, implicit $exec
+ %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, implicit $exec
+ %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
+ DBG_VALUE
+ DBG_VALUE
+ %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, implicit $exec
+ %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec
+ %23:vreg_64 = V_LSHLREV_B64_e64 2, %8, implicit $exec
+ S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17
+ GLOBAL_STORE_DWORD %15, %18, 0, 0, implicit $exec
+
+ bb.1:
+ DBG_VALUE
+ S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ DBG_VALUE
+ DBG_VALUE
+ S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ DBG_VALUE
+
+ bb.3:
+
+ bb.2:
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %16
+ S_ENDPGM 0
+...
+
>From 8f8857a0d8ef010c8df0784a01230dff13567263 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Fri, 27 Oct 2023 11:30:26 +0200
Subject: [PATCH 6/8] * Fixed release build by removing NDEBUG guards around
printing routines. * Fixed formatting.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 17 +++--------------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 6 ++----
2 files changed, 5 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 26b6cb393269cb4..27a73de82c07626 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -147,8 +147,6 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
O.getVGPRNum(ST.hasGFX90AInsts()));
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
return Printable([&RP, ST](raw_ostream &OS) {
OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' '
@@ -167,7 +165,6 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
OS << '\n';
});
}
-#endif
static LaneBitmask getDefRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI) {
@@ -288,7 +285,6 @@ void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}
-
void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(MRI && "call reset first");
@@ -438,12 +434,9 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
return advance(End);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedLR,
- const TargetRegisterInfo *TRI,
- StringRef Pfx) {
+ const TargetRegisterInfo *TRI, StringRef Pfx) {
return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
for (auto const &P : TrackedLR) {
auto I = LISLR.find(P.first);
@@ -488,7 +481,6 @@ bool GCNUpwardRPTracker::isValid() const {
return true;
}
-LLVM_DUMP_METHOD
Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI) {
return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
@@ -504,11 +496,8 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
});
}
-LLVM_DUMP_METHOD
void GCNRegPressure::dump() const { dbgs() << print(*this); }
-#endif
-
static cl::opt<bool> UseDownwardTracker(
"amdgpu-print-rp-downward",
cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
@@ -542,9 +531,9 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
const GCNRPTracker::LiveRegSet &LISLR) {
if (LISLR != TrackedLR) {
OS << PFX " mis LIS: " << llvm::print(LISLR, MRI)
- << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
+ << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
}
- };
+ };
// Register pressure before and at an instruction (in program order).
SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e46a8bc56827041..41f58fb53440856 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -142,8 +142,7 @@ class GCNRPTracker {
}
};
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
- const LiveIntervals &LIS,
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
class GCNUpwardRPTracker : public GCNRPTracker {
@@ -288,8 +287,7 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
- const TargetRegisterInfo *TRI,
- StringRef Pfx = " ");
+ const TargetRegisterInfo *TRI, StringRef Pfx = " ");
struct GCNRegPressurePrinter : public MachineFunctionPass {
static char ID;
>From 839e32579c4b54d377b596a04c4ad2da84e43501 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Mon, 30 Oct 2023 08:47:09 +0100
Subject: [PATCH 7/8] Fixed live-throught register set calculation, added a
test.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 77 ++++++++++-----
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 3 -
.../CodeGen/AMDGPU/regpressure_printer.mir | 97 +++++++++++++++++++
3 files changed, 148 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 27a73de82c07626..6bb2b8fccfb938b 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -32,18 +32,6 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
return true;
}
-GCNRPTracker::LiveRegSet
-llvm::getIntersection(const GCNRPTracker::LiveRegSet &LR1,
- const GCNRPTracker::LiveRegSet &LR2) {
- GCNRPTracker::LiveRegSet Intersection;
- for (auto [Reg, Mask] : LR1) {
- LaneBitmask MaskIntersection = Mask & LR2.lookup(Reg);
- if (MaskIntersection.any())
- Intersection[Reg] = MaskIntersection;
- }
- return Intersection;
-}
-
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
@@ -508,6 +496,34 @@ char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+// Return lanemask of Reg's subregs that are live-through at [Begin, End] and
+// are fully covered by Mask.
+static LaneBitmask
+getRegLiveThroughMask(const MachineRegisterInfo &MRI, const LiveIntervals &LIS,
+ Register Reg, SlotIndex Begin, SlotIndex End,
+ LaneBitmask Mask = LaneBitmask::getAll()) {
+
+ auto IsInOneSegment = [Begin, End](const LiveRange &LR) -> bool {
+ auto *Segment = LR.getSegmentContaining(Begin);
+ return Segment && Segment->contains(End);
+ };
+
+ LaneBitmask LiveThroughMask;
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ for (auto &SR : LI.subranges()) {
+ if ((SR.LaneMask & Mask) == SR.LaneMask && IsInOneSegment(SR))
+ LiveThroughMask |= SR.LaneMask;
+ }
+ } else {
+ LaneBitmask RegMask = MRI.getMaxLaneMaskForVReg(Reg);
+ if ((RegMask & Mask) == RegMask && IsInOneSegment(LI))
+ LiveThroughMask = RegMask;
+ }
+
+ return LiveThroughMask;
+}
+
bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
@@ -549,18 +565,18 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
- GCNRPTracker::LiveRegSet LRAtMBBBegin, LRAtMBBEnd;
+ GCNRPTracker::LiveRegSet LiveIn, LiveOut;
GCNRegPressure RPAtMBBEnd;
if (UseDownwardTracker) {
if (MBB.empty()) {
- LRAtMBBBegin = LRAtMBBEnd = getLiveRegs(MBBStartSlot, LIS, MRI);
- RPAtMBBEnd = getRegPressure(MRI, LRAtMBBBegin);
+ LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
+ RPAtMBBEnd = getRegPressure(MRI, LiveIn);
} else {
GCNDownwardRPTracker RPT(LIS);
RPT.reset(MBB.front());
- LRAtMBBBegin = RPT.getLiveRegs();
+ LiveIn = RPT.getLiveRegs();
while (!RPT.advanceBeforeNext()) {
GCNRegPressure RPBeforeMI = RPT.getPressure();
@@ -568,7 +584,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RP.emplace_back(RPBeforeMI, RPT.getPressure());
}
- LRAtMBBEnd = RPT.getLiveRegs();
+ LiveOut = RPT.getLiveRegs();
RPAtMBBEnd = RPT.getPressure();
}
} else {
@@ -576,7 +592,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RPT.reset(MRI, MBBEndSlot);
RPT.moveMaxPressure(); // Clear max pressure.
- LRAtMBBEnd = RPT.getLiveRegs();
+ LiveOut = RPT.getLiveRegs();
RPAtMBBEnd = RPT.getPressure();
for (auto &MI : reverse(MBB)) {
@@ -585,12 +601,12 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
}
- LRAtMBBBegin = RPT.getLiveRegs();
+ LiveIn = RPT.getLiveRegs();
}
- OS << PFX " Live-in: " << llvm::print(LRAtMBBBegin, MRI);
+ OS << PFX " Live-in: " << llvm::print(LiveIn, MRI);
if (!UseDownwardTracker)
- ReportLISMismatchIfAny(LRAtMBBBegin, getLiveRegs(MBBStartSlot, LIS, MRI));
+ ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI));
OS << PFX " SGPR VGPR\n";
int I = 0;
@@ -606,12 +622,21 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
OS << printRP(RPAtMBBEnd) << '\n';
- OS << PFX " Live-out:" << llvm::print(LRAtMBBEnd, MRI);
+ OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
if (UseDownwardTracker)
- ReportLISMismatchIfAny(LRAtMBBEnd, getLiveRegs(MBBEndSlot, LIS, MRI));
-
- GCNRPTracker::LiveRegSet LRThr = getIntersection(LRAtMBBBegin, LRAtMBBEnd);
- OS << PFX " Live-thr:" << llvm::print(LRThr, MRI);
+ ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
+
+ GCNRPTracker::LiveRegSet LiveThrough;
+ for (auto [Reg, Mask] : LiveIn) {
+ LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
+ if (MaskIntersection.any()) {
+ LaneBitmask LTMask = getRegLiveThroughMask(
+ MRI, LIS, Reg, MBBStartSlot, MBBEndSlot, MaskIntersection);
+ if (LTMask.any())
+ LiveThrough[Reg] = LTMask;
+ }
+ }
+ OS << PFX " Live-thr:" << llvm::print(LiveThrough, MRI);
}
OS << "...\n";
return false;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 41f58fb53440856..c750fe74749e2b3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -277,9 +277,6 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
bool isEqual(const GCNRPTracker::LiveRegSet &S1,
const GCNRPTracker::LiveRegSet &S2);
-GCNRPTracker::LiveRegSet getIntersection(const GCNRPTracker::LiveRegSet &LR1,
- const GCNRPTracker::LiveRegSet &LR2);
-
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr);
Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index e2d38273e958be8..d4e2e75f383d32b 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -40,6 +40,103 @@ body: |
bb.2:
S_NOP 0, implicit %0, implicit %1
...
+---
+name: live_through_test
+tracksRegLiveness: true
+body: |
+ ; RPU-LABEL: name: live_through_test
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: Live-out: %0:00000000000000F3
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:00000000000000F3
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: Live-out: %0:00000000000000C3
+ ; RPU-NEXT: Live-thr: %0:00000000000000C0
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-in: %0:00000000000000C3
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ;
+ ; RPD-LABEL: name: live_through_test
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: Live-out: %0:00000000000000F3
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:00000000000000F3
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: Live-out: %0:00000000000000C3
+ ; RPD-NEXT: Live-thr: %0:00000000000000C0
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-in: %0:00000000000000C3
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ bb.0:
+ %0:sgpr_128 = IMPLICIT_DEF
+ bb.1:
+
+ S_NOP 0, implicit %0.sub0 ; kill sub0
+ %0.sub0 = IMPLICIT_DEF ; redef sub0
+
+ %0.sub1:sgpr_128 = IMPLICIT_DEF ; redef sub1
+
+ S_NOP 0, implicit %0.sub2 ; kill sub2
+ %0.sub2:sgpr_128 = IMPLICIT_DEF ; redef sub2
+ S_NOP 0, implicit %0.sub2 ; kill sub2
+
+ S_NOP 0, implicit %0.sub3 ; use sub3, live-through
+
+ bb.2:
+ S_NOP 0, implicit %0.sub3, implicit %0.sub0
+...
# This testcase shows the problem with LiveIntervals: it doesn't create
# subranges for undefined but used subregisters. Upward tracker is able to see
>From c3d37bd7f1c8fa02cc709ef036700b1f0e356a96 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Wed, 1 Nov 2023 19:50:02 +0100
Subject: [PATCH 8/8] Move live-through register set printing to another patch.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 40 -------------------
.../CodeGen/AMDGPU/regpressure_printer.mir | 23 -----------
2 files changed, 63 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 6bb2b8fccfb938b..a04c470b7b9762f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -496,34 +496,6 @@ char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
-// Return lanemask of Reg's subregs that are live-through at [Begin, End] and
-// are fully covered by Mask.
-static LaneBitmask
-getRegLiveThroughMask(const MachineRegisterInfo &MRI, const LiveIntervals &LIS,
- Register Reg, SlotIndex Begin, SlotIndex End,
- LaneBitmask Mask = LaneBitmask::getAll()) {
-
- auto IsInOneSegment = [Begin, End](const LiveRange &LR) -> bool {
- auto *Segment = LR.getSegmentContaining(Begin);
- return Segment && Segment->contains(End);
- };
-
- LaneBitmask LiveThroughMask;
- const LiveInterval &LI = LIS.getInterval(Reg);
- if (LI.hasSubRanges()) {
- for (auto &SR : LI.subranges()) {
- if ((SR.LaneMask & Mask) == SR.LaneMask && IsInOneSegment(SR))
- LiveThroughMask |= SR.LaneMask;
- }
- } else {
- LaneBitmask RegMask = MRI.getMaxLaneMaskForVReg(Reg);
- if ((RegMask & Mask) == RegMask && IsInOneSegment(LI))
- LiveThroughMask = RegMask;
- }
-
- return LiveThroughMask;
-}
-
bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
@@ -625,18 +597,6 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
if (UseDownwardTracker)
ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
-
- GCNRPTracker::LiveRegSet LiveThrough;
- for (auto [Reg, Mask] : LiveIn) {
- LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
- if (MaskIntersection.any()) {
- LaneBitmask LTMask = getRegLiveThroughMask(
- MRI, LIS, Reg, MBBStartSlot, MBBEndSlot, MaskIntersection);
- if (LTMask.any())
- LiveThrough[Reg] = LTMask;
- }
- }
- OS << PFX " Live-thr:" << llvm::print(LiveThrough, MRI);
}
OS << "...\n";
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index d4e2e75f383d32b..d53050167e98bef 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -17,13 +17,11 @@ body: |
; RP-NEXT: 2 1 %1:sgpr_64 = IMPLICIT_DEF
; RP-NEXT: 2 1
; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F
- ; RP-NEXT: Live-thr:
; RP-NEXT: bb.1:
; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F
; RP-NEXT: SGPR VGPR
; RP-NEXT: 2 1
; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F
- ; RP-NEXT: Live-thr: %0:0000000000000003 %1:000000000000000F
; RP-NEXT: bb.2:
; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F
; RP-NEXT: SGPR VGPR
@@ -31,7 +29,6 @@ body: |
; RP-NEXT: 2 1 S_NOP 0, implicit %0:vgpr_32, implicit %1:sgpr_64
; RP-NEXT: 0 0
; RP-NEXT: Live-out:
- ; RP-NEXT: Live-thr:
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%1:sgpr_64 = IMPLICIT_DEF
@@ -52,7 +49,6 @@ body: |
; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
; RPU-NEXT: 3 0
; RPU-NEXT: Live-out: %0:00000000000000F3
- ; RPU-NEXT: Live-thr:
; RPU-NEXT: bb.1:
; RPU-NEXT: Live-in: %0:00000000000000F3
; RPU-NEXT: SGPR VGPR
@@ -72,7 +68,6 @@ body: |
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
; RPU-NEXT: 2 0
; RPU-NEXT: Live-out: %0:00000000000000C3
- ; RPU-NEXT: Live-thr: %0:00000000000000C0
; RPU-NEXT: bb.2:
; RPU-NEXT: Live-in: %0:00000000000000C3
; RPU-NEXT: SGPR VGPR
@@ -80,7 +75,6 @@ body: |
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
; RPU-NEXT: 0 0
; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
;
; RPD-LABEL: name: live_through_test
; RPD: bb.0:
@@ -90,7 +84,6 @@ body: |
; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
; RPD-NEXT: 3 0
; RPD-NEXT: Live-out: %0:00000000000000F3
- ; RPD-NEXT: Live-thr:
; RPD-NEXT: bb.1:
; RPD-NEXT: Live-in: %0:00000000000000F3
; RPD-NEXT: SGPR VGPR
@@ -110,7 +103,6 @@ body: |
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
; RPD-NEXT: 2 0
; RPD-NEXT: Live-out: %0:00000000000000C3
- ; RPD-NEXT: Live-thr: %0:00000000000000C0
; RPD-NEXT: bb.2:
; RPD-NEXT: Live-in: %0:00000000000000C3
; RPD-NEXT: SGPR VGPR
@@ -118,7 +110,6 @@ body: |
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
; RPD-NEXT: 0 0
; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
bb.0:
%0:sgpr_128 = IMPLICIT_DEF
bb.1:
@@ -155,13 +146,11 @@ body: |
; RPU-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
; RPU-NEXT: 0 2
; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPU-NEXT: Live-thr:
; RPU-NEXT: bb.1:
; RPU-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 0 2
; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPU-NEXT: Live-thr: %0:0000000000000003 %1:000000000000000C
; RPU-NEXT: bb.2:
; RPU-NEXT: Live-in: %0:000000000000000F %1:000000000000000F
; RPU-NEXT: mis LIS: %0:0000000000000003 %1:000000000000000C
@@ -172,7 +161,6 @@ body: |
; RPU-NEXT: 0 4 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
; RPU-NEXT: 0 0
; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
;
; RPD-LABEL: name: upward_problem_lis_subregs_mismatch
; RPD: bb.0:
@@ -184,13 +172,11 @@ body: |
; RPD-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
; RPD-NEXT: 0 2
; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPD-NEXT: Live-thr:
; RPD-NEXT: bb.1:
; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 0 2
; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPD-NEXT: Live-thr: %0:0000000000000003 %1:000000000000000C
; RPD-NEXT: bb.2:
; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
; RPD-NEXT: SGPR VGPR
@@ -198,7 +184,6 @@ body: |
; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
; RPD-NEXT: 0 0
; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
bb.0:
undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
@@ -288,7 +273,6 @@ body: |
; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
; RPU-NEXT: 0 2
; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: Live-thr:
; RPU-NEXT: bb.1:
; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
; RPU-NEXT: SGPR VGPR
@@ -302,13 +286,11 @@ body: |
; RPU-NEXT: DBG_VALUE
; RPU-NEXT: 0 2
; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
; RPU-NEXT: bb.2:
; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
; RPU-NEXT: SGPR VGPR
; RPU-NEXT: 0 2
; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPU-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
; RPU-NEXT: bb.3:
; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
; RPU-NEXT: SGPR VGPR
@@ -320,7 +302,6 @@ body: |
; RPU-NEXT: 0 0 S_ENDPGM 0
; RPU-NEXT: 0 0
; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
;
; RPD-LABEL: name: only_dbg_value_sched_region
; RPD: bb.0:
@@ -395,7 +376,6 @@ body: |
; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
; RPD-NEXT: 0 2
; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: Live-thr:
; RPD-NEXT: bb.1:
; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
; RPD-NEXT: SGPR VGPR
@@ -409,13 +389,11 @@ body: |
; RPD-NEXT: DBG_VALUE
; RPD-NEXT: 0 2
; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
; RPD-NEXT: bb.2:
; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
; RPD-NEXT: SGPR VGPR
; RPD-NEXT: 0 2
; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
- ; RPD-NEXT: Live-thr: %0:0000000000000003 %16:0000000000000003
; RPD-NEXT: bb.3:
; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
; RPD-NEXT: SGPR VGPR
@@ -427,7 +405,6 @@ body: |
; RPD-NEXT: 0 0 S_ENDPGM 0
; RPD-NEXT: 0 0
; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
bb.0:
liveins: $vgpr0
More information about the flang-commits
mailing list