[llvm] [AMDGPU] GCNRegPressure printing pass for testing. (PR #70031)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 04:47:33 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Valery Pykhtin (vpykhtin)

<details>
<summary>Changes</summary>

Take a look at the tests to see the output.

This probably should be also enabled for _llc_ tests, I haven't tried it yet.

---

Patch is 48.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70031.diff


6 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1) 
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+75) 
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+17) 
- (modified) llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir (+125-17) 
- (modified) llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir (+416-3) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
 extern char &AMDGPUPerfHintAnalysisID;
 
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
 // Passes common to R600 and SI
 FunctionPass *createAMDGPUPromoteAlloca();
 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeGCNPreRAOptimizationsPass(*PR);
   initializeGCNPreRALongBranchRegPass(*PR);
   initializeGCNRewritePartialRegUsesPass(*PR);
+  initializeGCNRegPressurePrinterPass(*PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..1db4e9dd151d847 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GCNRegPressure.h"
+#include "AMDGPU.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 
 using namespace llvm;
@@ -487,3 +488,77 @@ LLVM_DUMP_METHOD
 void GCNRegPressure::dump() const { dbgs() << print(*this); }
 
 #endif
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  GCNUpwardRPTracker RPT(LIS);
+
+  auto &OS = dbgs();
+
+// Leading spaces are important for YAML syntax.
+#define PFX "  "
+
+  OS << "---\nname: " << MF.getName() << "\nbody:             |\n";
+
+  auto printRP = [](const GCNRegPressure &RP) {
+    return Printable([&RP](raw_ostream &OS) {
+      OS << format(PFX "  %-5d", RP.getSGPRNum())
+         << format(" %-5d", RP.getVGPRNum(false));
+    });
+  };
+
+  // Register pressure before and at an instruction (in program order).
+  SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
+
+  for (auto &MBB : MF) {
+    OS << PFX;
+    MBB.printName(OS);
+    OS << ":\n";
+
+    if (MBB.empty()) {
+      SlotIndex MBBSI = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+      GCNRPTracker::LiveRegSet LRThrough = getLiveRegs(MBBSI, LIS, MRI);
+      GCNRegPressure RP = getRegPressure(MRI, LRThrough);
+      OS << PFX "  Live-through:" << llvm::print(LRThrough, MRI);
+      OS << PFX "  SGPR  VGPR\n" << printRP(RP) << '\n';
+      continue;
+    }
+
+    RPT.reset(MBB.instr_back());
+    RPT.moveMaxPressure(); // Clear max pressure.
+
+    GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
+    GCNRegPressure RPAtMBBEnd = RPT.getPressure();
+
+    RP.clear();
+    RP.reserve(MBB.size());
+    for (auto &MI : reverse(MBB)) {
+      RPT.recede(MI);
+      RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+    }
+
+    OS << PFX "  Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+    OS << PFX "  SGPR  VGPR\n";
+    auto I = RP.rbegin();
+    for (auto &MI : MBB) {
+      auto &[RPBeforeInstr, RPAtInstr] = *I++;
+      OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << "  ";
+      MI.print(OS);
+    }
+    OS << printRP(RPAtMBBEnd) << '\n';
+    OS << PFX "  Live-out:" << llvm::print(LRAtMBBEnd, MRI);
+  }
+  OS << "...\n";
+  return false;
+
+#undef PFX
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..f2256f68c2c7037 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
 
   void clearMaxPressure() { MaxPressure.clear(); }
 
+  GCNRegPressure getPressure() const { return CurPressure; }
+
   // returns MaxPressure, resetting it
   decltype(MaxPressure) moveMaxPressure() {
     auto Res = MaxPressure;
@@ -277,6 +279,21 @@ Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
                          const GCNRPTracker::LiveRegSet &TrackedL,
                          const TargetRegisterInfo *TRI);
 
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+  static char ID;
+
+public:
+  GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index e4f56cc328e4782..138c8e785dec280 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
 
 # The sequence of DBG_VALUEs forms a scheduling region with 0 real
 # instructions. The RegPressure tracker would end up skipping over any
@@ -27,33 +28,33 @@ body:             |
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
-  ; CHECK-NEXT:   undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   undef [[V_ADD_F32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   dead undef [[V_ADD_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK-NEXT:   %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
-  ; CHECK-NEXT:   undef %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
-  ; CHECK-NEXT:   %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
-  ; CHECK-NEXT:   undef %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
+  ; CHECK-NEXT:   undef [[V_ADD_F32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_ADD_F32_e32_1:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX2 [[V_ADD_F32_e32_1]], [[V_MOV_B32_e32_]], 32, 0, implicit $exec
+  ; CHECK-NEXT:   undef [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   %11.sub1:vreg_64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK-NEXT:   dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
-  ; CHECK-NEXT:   dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[GLOBAL_LOAD_DWORD2]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
   ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; CHECK-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
   ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_1]]
+  ; CHECK-NEXT:   GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
@@ -65,9 +66,114 @@ body:             |
   ; CHECK-NEXT:   S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   S_NOP 0, implicit [[COPY]]
   ; CHECK-NEXT:   S_NOP 0, implicit [[DEF8]]
   ; CHECK-NEXT:   S_ENDPGM 0
+  ;
+  ; RP-LABEL: name: only_dbg_value_sched_region
+  ; RP: bb.0:
+  ; RP-NEXT:   Live-in:
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   0     1      %0:vgpr_32 = COPY $vgpr0
+  ; RP-NEXT:   0     1
+  ; RP-NEXT:   0     3      %1:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     3
+  ; RP-NEXT:   0     5      %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     5
+  ; RP-NEXT:   0     6      %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+  ; RP-NEXT:   0     6
+  ; RP-NEXT:   0     7      undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+  ; RP-NEXT:   0     7
+  ; RP-NEXT:   0     8      %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; RP-NEXT:   0     8
+  ; RP-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64
+  ; RP-NEXT:   0     9
+  ; RP-NEXT:   0     9      undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     8
+  ; RP-NEXT:   0     8      dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     7
+  ; RP-NEXT:   0     8      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     6
+  ; RP-NEXT:   0     7      %8:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     7
+  ; RP-NEXT:   0     9      %9:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     9
+  ; RP-NEXT:   0     11     %10:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     11
+  ; RP-NEXT:   0     12     undef %11.sub1:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     12
+  ; RP-NEXT:   0     13     %12:vgpr_32 = IMPLICIT_DEF
+  ; RP-NEXT:   0     13
+  ; RP-NEXT:   0     14     %13:vgpr_32 = IMPLICIT_DEF
+  ; RP-NEXT:   0     14
+  ; RP-NEXT:   0     16     %14:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     16
+  ; RP-NEXT:   0     18     %15:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     18
+  ; RP-NEXT:   0     19     %16:vgpr_32 = IMPLICIT_DEF
+  ; RP-NEXT:   0     19
+  ; RP-NEXT:   0     20     %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RP-NEXT:   0     20
+  ; RP-NEXT:   0     21     %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RP-NEXT:   0     21
+  ; RP-NEXT:   0     22     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     20
+  ; RP-NEXT:   0     21     %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     20
+  ; RP-NEXT:   0     20     GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+  ; RP-NEXT:   0     16
+  ; RP-NEXT:   0     17     %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     15
+  ; RP-NEXT:   0     16     %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     14
+  ; RP-NEXT:   0     14     dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     12
+  ; RP-NEXT:   0     12     dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     10
+  ; RP-NEXT:   0     10     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     10
+  ; RP-NEXT:   0     11     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+  ; RP-NEXT:   0     9
+  ; RP-NEXT:   0     9      S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+  ; RP-NEXT:   0     5
+  ; RP-NEXT:   0     5      GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT: bb.1:
+  ; RP-NEXT:   Live-in: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     0      DBG_VALUE
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     0      DBG_VALUE
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     0      DBG_VALUE
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT: bb.2:
+  ; RP-NEXT:   Live-through: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT: bb.3:
+  ; RP-NEXT:   Live-in: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_NOP 0, implicit %0:vgpr_32
+  ; RP-NEXT:   0     1
+  ; RP-NEXT:   0     1      S_NOP 0, implicit %16:vgpr_32
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   0     0      S_ENDPGM 0
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   Live-out:
   bb.0:
     liveins: $vgpr0
 
@@ -111,6 +217,8 @@ body:             |
     DBG_VALUE
     S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
 
+  bb.3:
+
   bb.2:
     S_NOP 0, implicit %0
     S_NOP 0, implicit %16
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index f8c7be8e414ca15..3f33e795e1e6ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,6 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
 
 --- |
   %struct.widget.0 = type { float, i32, i32 }
@@ -171,8 +173,6 @@
 ...
 ---
 
-# CHECK: name: sched_dbg_value_crash
-# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
 
 name:            sched_dbg_value_crash
 alignment:       1
@@ -197,7 +197,420 @@ constants:
 body:             |
   bb.0.bb:
     liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
-
+    ; CHECK-LABEL: name: sched_dbg_value_crash
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 24, 0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[DEF]], [[COPY2]], implicit-def dead $vcc, implicit $exec
+    ; CHECK-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 12, [[S_LOAD_DWORDX2_IMM]], 0, implicit $exec
+    ; CHECK-NEXT: dead [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 32, 0
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY1]], 4, 0
+    ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 4, 0, implicit $exec
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[V_MAD_I64_I32_e64_2:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_3:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[GLOBAL_LOAD_DWORD]], [[DEF1]], 0, 0, implicit $exec
+    ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 0, 0
+    ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 4, 0
+    ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 32, 0, implicit $exec
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sreg_64_xexec = S_MOV_B32 0
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32_xm0 = S_LSHR_B32 [[S_LOAD_DWORDX2_IMM3]].sub0, 16, implicit-def d...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/70031


More information about the llvm-commits mailing list