[llvm] [AMDGPU] GCNRegPressure printing pass for testing. (PR #70031)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 04:47:33 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Valery Pykhtin (vpykhtin)
<details>
<summary>Changes</summary>
Take a look at the tests to see the output.
This probably should be also enabled for _llc_ tests, I haven't tried it yet.
---
Patch is 48.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70031.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+75)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+17)
- (modified) llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir (+125-17)
- (modified) llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir (+416-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
+ initializeGCNRegPressurePrinterPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..1db4e9dd151d847 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
+#include "AMDGPU.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -487,3 +488,77 @@ LLVM_DUMP_METHOD
void GCNRegPressure::dump() const { dbgs() << print(*this); }
#endif
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ GCNUpwardRPTracker RPT(LIS);
+
+ auto &OS = dbgs();
+
+// Leading spaces are important for YAML syntax.
+#define PFX " "
+
+ OS << "---\nname: " << MF.getName() << "\nbody: |\n";
+
+ auto printRP = [](const GCNRegPressure &RP) {
+ return Printable([&RP](raw_ostream &OS) {
+ OS << format(PFX " %-5d", RP.getSGPRNum())
+ << format(" %-5d", RP.getVGPRNum(false));
+ });
+ };
+
+ // Register pressure before and at an instruction (in program order).
+ SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
+
+ for (auto &MBB : MF) {
+ OS << PFX;
+ MBB.printName(OS);
+ OS << ":\n";
+
+ if (MBB.empty()) {
+ SlotIndex MBBSI = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+ GCNRPTracker::LiveRegSet LRThrough = getLiveRegs(MBBSI, LIS, MRI);
+ GCNRegPressure RP = getRegPressure(MRI, LRThrough);
+ OS << PFX " Live-through:" << llvm::print(LRThrough, MRI);
+ OS << PFX " SGPR VGPR\n" << printRP(RP) << '\n';
+ continue;
+ }
+
+ RPT.reset(MBB.instr_back());
+ RPT.moveMaxPressure(); // Clear max pressure.
+
+ GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
+ GCNRegPressure RPAtMBBEnd = RPT.getPressure();
+
+ RP.clear();
+ RP.reserve(MBB.size());
+ for (auto &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ }
+
+ OS << PFX " Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+ OS << PFX " SGPR VGPR\n";
+ auto I = RP.rbegin();
+ for (auto &MI : MBB) {
+ auto &[RPBeforeInstr, RPAtInstr] = *I++;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ MI.print(OS);
+ }
+ OS << printRP(RPAtMBBEnd) << '\n';
+ OS << PFX " Live-out:" << llvm::print(LRAtMBBEnd, MRI);
+ }
+ OS << "...\n";
+ return false;
+
+#undef PFX
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..f2256f68c2c7037 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
+ GCNRegPressure getPressure() const { return CurPressure; }
+
// returns MaxPressure, resetting it
decltype(MaxPressure) moveMaxPressure() {
auto Res = MaxPressure;
@@ -277,6 +279,21 @@ Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
const TargetRegisterInfo *TRI);
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+ static char ID;
+
+public:
+ GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index e4f56cc328e4782..138c8e785dec280 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
# The sequence of DBG_VALUEs forms a scheduling region with 0 real
# instructions. The RegPressure tracker would end up skipping over any
@@ -27,33 +28,33 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
- ; CHECK-NEXT: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_F32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
- ; CHECK-NEXT: undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
- ; CHECK-NEXT: undef %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
- ; CHECK-NEXT: %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
- ; CHECK-NEXT: undef %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_F32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[V_ADD_F32_e32_1]], [[V_MOV_B32_e32_]], 32, 0, implicit $exec
+ ; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
- ; CHECK-NEXT: %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
- ; CHECK-NEXT: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
- ; CHECK-NEXT: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[GLOBAL_LOAD_DWORD2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
+ ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
- ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_1]]
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
@@ -65,9 +66,114 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
; CHECK-NEXT: S_NOP 0, implicit [[DEF8]]
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; RP-LABEL: name: only_dbg_value_sched_region
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 3
+ ; RP-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RP-NEXT: 0 8
+ ; RP-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 8
+ ; RP-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 6
+ ; RP-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 7
+ ; RP-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 11
+ ; RP-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 12
+ ; RP-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 13
+ ; RP-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 14
+ ; RP-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 16
+ ; RP-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RP-NEXT: 0 18
+ ; RP-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RP-NEXT: 0 19
+ ; RP-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RP-NEXT: 0 21
+ ; RP-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RP-NEXT: 0 20
+ ; RP-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RP-NEXT: 0 16
+ ; RP-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 15
+ ; RP-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 14
+ ; RP-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 12
+ ; RP-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 10
+ ; RP-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RP-NEXT: 0 10
+ ; RP-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RP-NEXT: 0 9
+ ; RP-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RP-NEXT: 0 5
+ ; RP-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 0 DBG_VALUE
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-through: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: bb.3:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 0 S_ENDPGM 0
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
bb.0:
liveins: $vgpr0
@@ -111,6 +217,8 @@ body: |
DBG_VALUE
S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ bb.3:
+
bb.2:
S_NOP 0, implicit %0
S_NOP 0, implicit %16
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index f8c7be8e414ca15..3f33e795e1e6ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,6 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -171,8 +173,6 @@
...
---
-# CHECK: name: sched_dbg_value_crash
-# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
name: sched_dbg_value_crash
alignment: 1
@@ -197,7 +197,420 @@ constants:
body: |
bb.0.bb:
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
-
+ ; CHECK-LABEL: name: sched_dbg_value_crash
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 24, 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[DEF]], [[COPY2]], implicit-def dead $vcc, implicit $exec
+ ; CHECK-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 12, [[S_LOAD_DWORDX2_IMM]], 0, implicit $exec
+ ; CHECK-NEXT: dead [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 32, 0
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY1]], 4, 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 4, 0, implicit $exec
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MAD_I64_I32_e64_2:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_3:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[GLOBAL_LOAD_DWORD]], [[DEF1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 0, 0
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 4, 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 32, 0, implicit $exec
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sreg_64_xexec = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32_xm0 = S_LSHR_B32 [[S_LOAD_DWORDX2_IMM3]].sub0, 16, implicit-def d...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70031
More information about the llvm-commits
mailing list