[llvm] e808f8a - [AMDGPU] GCNRegPressurePrinter pass to print GCNRegPressure values for testing. (#70031)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 15:01:44 PDT 2023


Author: Valery Pykhtin
Date: 2023-11-01T23:01:39+01:00
New Revision: e808f8a61683ccd9da88f722135ff59ddcdb5aef

URL: https://github.com/llvm/llvm-project/commit/e808f8a61683ccd9da88f722135ff59ddcdb5aef
DIFF: https://github.com/llvm/llvm-project/commit/e808f8a61683ccd9da88f722135ff59ddcdb5aef.diff

LOG: [AMDGPU] GCNRegPressurePrinter pass to print GCNRegPressure values for testing. (#70031)

Using GCNDownwardRPTracker or GCNUpwardRPTracker the pass collects register pressure values for a function and prints these values next to instructions. Output can be used to generate Filecheck rules in mir tests.

Added: 
    llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPU.h
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
    llvm/lib/Target/AMDGPU/GCNRegPressure.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
 extern char &AMDGPUPerfHintAnalysisID;
 
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
 // Passes common to R600 and SI
 FunctionPass *createAMDGPUPromoteAlloca();
 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeGCNPreRAOptimizationsPass(*PR);
   initializeGCNPreRALongBranchRegPass(*PR);
   initializeGCNRewritePartialRegUsesPass(*PR);
+  initializeGCNRegPressurePrinterPass(*PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {

diff  --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..a04c470b7b9762f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GCNRegPressure.h"
+#include "AMDGPU.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 
 using namespace llvm;
@@ -31,7 +32,6 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
   return true;
 }
 
-
 ///////////////////////////////////////////////////////////////////////////////
 // GCNRegPressure
 
@@ -135,8 +135,6 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
                           O.getVGPRNum(ST.hasGFX90AInsts()));
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
 Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
   return Printable([&RP, ST](raw_ostream &OS) {
     OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' '
@@ -155,7 +153,6 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
     OS << '\n';
   });
 }
-#endif
 
 static LaneBitmask getDefRegMask(const MachineOperand &MO,
                                  const MachineRegisterInfo &MRI) {
@@ -269,6 +266,13 @@ void GCNUpwardRPTracker::reset(const MachineInstr &MI,
   GCNRPTracker::reset(MI, LiveRegsCopy, true);
 }
 
+void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
+                               const LiveRegSet &LiveRegs_) {
+  MRI = &MRI_;
+  LiveRegs = LiveRegs_;
+  MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
+}
+
 void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
   assert(MRI && "call reset first");
 
@@ -418,19 +422,17 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
   return advance(End);
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
 Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
                                const GCNRPTracker::LiveRegSet &TrackedLR,
-                               const TargetRegisterInfo *TRI) {
-  return Printable([&LISLR, &TrackedLR, TRI](raw_ostream &OS) {
+                               const TargetRegisterInfo *TRI, StringRef Pfx) {
+  return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
     for (auto const &P : TrackedLR) {
       auto I = LISLR.find(P.first);
       if (I == LISLR.end()) {
-        OS << "  " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+        OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
            << " isn't found in LIS reported set\n";
       } else if (I->second != P.second) {
-        OS << "  " << printReg(P.first, TRI)
+        OS << Pfx << printReg(P.first, TRI)
            << " masks doesn't match: LIS reported " << PrintLaneMask(I->second)
            << ", tracked " << PrintLaneMask(P.second) << '\n';
       }
@@ -438,7 +440,7 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
     for (auto const &P : LISLR) {
       auto I = TrackedLR.find(P.first);
       if (I == TrackedLR.end()) {
-        OS << "  " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+        OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
            << " isn't found in tracked set\n";
       }
     }
@@ -467,7 +469,6 @@ bool GCNUpwardRPTracker::isValid() const {
   return true;
 }
 
-LLVM_DUMP_METHOD
 Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
                       const MachineRegisterInfo &MRI) {
   return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
@@ -483,7 +484,122 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
   });
 }
 
-LLVM_DUMP_METHOD
 void GCNRegPressure::dump() const { dbgs() << print(*this); }
 
-#endif
+static cl::opt<bool> UseDownwardTracker(
+    "amdgpu-print-rp-downward",
+    cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
+    cl::init(false), cl::Hidden);
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+  const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+  auto &OS = dbgs();
+
+// Leading spaces are important for YAML syntax.
+#define PFX "  "
+
+  OS << "---\nname: " << MF.getName() << "\nbody:             |\n";
+
+  auto printRP = [](const GCNRegPressure &RP) {
+    return Printable([&RP](raw_ostream &OS) {
+      OS << format(PFX "  %-5d", RP.getSGPRNum())
+         << format(" %-5d", RP.getVGPRNum(false));
+    });
+  };
+
+  auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR,
+                                    const GCNRPTracker::LiveRegSet &LISLR) {
+    if (LISLR != TrackedLR) {
+      OS << PFX "  mis LIS: " << llvm::print(LISLR, MRI)
+         << reportMismatch(LISLR, TrackedLR, TRI, PFX "    ");
+    }
+  };
+
+  // Register pressure before and at an instruction (in program order).
+  SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
+
+  for (auto &MBB : MF) {
+    RP.clear();
+    RP.reserve(MBB.size());
+
+    OS << PFX;
+    MBB.printName(OS);
+    OS << ":\n";
+
+    SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+    SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
+
+    GCNRPTracker::LiveRegSet LiveIn, LiveOut;
+    GCNRegPressure RPAtMBBEnd;
+
+    if (UseDownwardTracker) {
+      if (MBB.empty()) {
+        LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
+        RPAtMBBEnd = getRegPressure(MRI, LiveIn);
+      } else {
+        GCNDownwardRPTracker RPT(LIS);
+        RPT.reset(MBB.front());
+
+        LiveIn = RPT.getLiveRegs();
+
+        while (!RPT.advanceBeforeNext()) {
+          GCNRegPressure RPBeforeMI = RPT.getPressure();
+          RPT.advanceToNext();
+          RP.emplace_back(RPBeforeMI, RPT.getPressure());
+        }
+
+        LiveOut = RPT.getLiveRegs();
+        RPAtMBBEnd = RPT.getPressure();
+      }
+    } else {
+      GCNUpwardRPTracker RPT(LIS);
+      RPT.reset(MRI, MBBEndSlot);
+      RPT.moveMaxPressure(); // Clear max pressure.
+
+      LiveOut = RPT.getLiveRegs();
+      RPAtMBBEnd = RPT.getPressure();
+
+      for (auto &MI : reverse(MBB)) {
+        RPT.recede(MI);
+        if (!MI.isDebugInstr())
+          RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+      }
+
+      LiveIn = RPT.getLiveRegs();
+    }
+
+    OS << PFX "  Live-in: " << llvm::print(LiveIn, MRI);
+    if (!UseDownwardTracker)
+      ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI));
+
+    OS << PFX "  SGPR  VGPR\n";
+    int I = 0;
+    for (auto &MI : MBB) {
+      if (!MI.isDebugInstr()) {
+        auto &[RPBeforeInstr, RPAtInstr] =
+            RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
+        ++I;
+        OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << "  ";
+      } else
+        OS << PFX "               ";
+      MI.print(OS);
+    }
+    OS << printRP(RPAtMBBEnd) << '\n';
+
+    OS << PFX "  Live-out:" << llvm::print(LiveOut, MRI);
+    if (UseDownwardTracker)
+      ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
+  }
+  OS << "...\n";
+  return false;
+
+#undef PFX
+}
\ No newline at end of file

diff  --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..c750fe74749e2b3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
 
   void clearMaxPressure() { MaxPressure.clear(); }
 
+  GCNRegPressure getPressure() const { return CurPressure; }
+
   // returns MaxPressure, resetting it
   decltype(MaxPressure) moveMaxPressure() {
     auto Res = MaxPressure;
@@ -140,6 +142,9 @@ class GCNRPTracker {
   }
 };
 
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
+                                     const MachineRegisterInfo &MRI);
+
 class GCNUpwardRPTracker : public GCNRPTracker {
 public:
   GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
@@ -148,6 +153,14 @@ class GCNUpwardRPTracker : public GCNRPTracker {
   // filling live regs upon this point using LIS
   void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
 
+  // reset tracker and set live register set to the specified value.
+  void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
+  // reset tracker at the specified slot index.
+  void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) {
+    reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_));
+  }
+
   // move to the state just above the MI
   void recede(const MachineInstr &MI);
 
@@ -196,10 +209,6 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
                             const LiveIntervals &LIS,
                             const MachineRegisterInfo &MRI);
 
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
-                                     const LiveIntervals &LIS,
-                                     const MachineRegisterInfo &MRI);
-
 /// creates a map MachineInstr -> LiveRegSet
 /// R - range of iterators on instructions
 /// After - upon entry or exit of every instruction
@@ -275,7 +284,22 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
 
 Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
                          const GCNRPTracker::LiveRegSet &TrackedL,
-                         const TargetRegisterInfo *TRI);
+                         const TargetRegisterInfo *TRI, StringRef Pfx = "  ");
+
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+  static char ID;
+
+public:
+  GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
 
 } // end namespace llvm
 

diff  --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
new file mode 100644
index 000000000000000..d53050167e98bef
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -0,0 +1,462 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
+
+
+---
+name:  trivial
+tracksRegLiveness: true
+body:             |
+  ; RP-LABEL: name: trivial
+  ; RP: bb.0:
+  ; RP-NEXT:   Live-in:
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   0     1      %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+  ; RP-NEXT:   0     1
+  ; RP-NEXT:   2     1      %1:sgpr_64 = IMPLICIT_DEF
+  ; RP-NEXT:   2     1
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000F
+  ; RP-NEXT: bb.1:
+  ; RP-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000F
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   2     1
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000F
+  ; RP-NEXT: bb.2:
+  ; RP-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000F
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   2     1
+  ; RP-NEXT:   2     1      S_NOP 0, implicit %0:vgpr_32, implicit %1:sgpr_64
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   Live-out:
+  bb.0:
+    %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    %1:sgpr_64 = IMPLICIT_DEF
+  bb.1:
+
+  bb.2:
+    S_NOP 0, implicit %0, implicit %1
+...
+---
+name:  live_through_test
+tracksRegLiveness: true
+body:             |
+  ; RPU-LABEL: name: live_through_test
+  ; RPU: bb.0:
+  ; RPU-NEXT:   Live-in:
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   3     0      %0:sgpr_128 = IMPLICIT_DEF
+  ; RPU-NEXT:   3     0
+  ; RPU-NEXT:   Live-out: %0:00000000000000F3
+  ; RPU-NEXT: bb.1:
+  ; RPU-NEXT:   Live-in:  %0:00000000000000F3
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   3     0
+  ; RPU-NEXT:   3     0      S_NOP 0, implicit %0.sub0:sgpr_128
+  ; RPU-NEXT:   2     0
+  ; RPU-NEXT:   3     0      %0.sub0:sgpr_128 = IMPLICIT_DEF
+  ; RPU-NEXT:   3     0
+  ; RPU-NEXT:   3     0      %0.sub1:sgpr_128 = IMPLICIT_DEF
+  ; RPU-NEXT:   3     0
+  ; RPU-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
+  ; RPU-NEXT:   2     0
+  ; RPU-NEXT:   3     0      %0.sub2:sgpr_128 = IMPLICIT_DEF
+  ; RPU-NEXT:   3     0
+  ; RPU-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
+  ; RPU-NEXT:   2     0
+  ; RPU-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128
+  ; RPU-NEXT:   2     0
+  ; RPU-NEXT:   Live-out: %0:00000000000000C3
+  ; RPU-NEXT: bb.2:
+  ; RPU-NEXT:   Live-in:  %0:00000000000000C3
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   2     0
+  ; RPU-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   Live-out:
+  ;
+  ; RPD-LABEL: name: live_through_test
+  ; RPD: bb.0:
+  ; RPD-NEXT:   Live-in:
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   4     0      %0:sgpr_128 = IMPLICIT_DEF
+  ; RPD-NEXT:   3     0
+  ; RPD-NEXT:   Live-out: %0:00000000000000F3
+  ; RPD-NEXT: bb.1:
+  ; RPD-NEXT:   Live-in:  %0:00000000000000F3
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   3     0
+  ; RPD-NEXT:   3     0      S_NOP 0, implicit %0.sub0:sgpr_128
+  ; RPD-NEXT:   2     0
+  ; RPD-NEXT:   3     0      %0.sub0:sgpr_128 = IMPLICIT_DEF
+  ; RPD-NEXT:   3     0
+  ; RPD-NEXT:   4     0      %0.sub1:sgpr_128 = IMPLICIT_DEF
+  ; RPD-NEXT:   3     0
+  ; RPD-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
+  ; RPD-NEXT:   2     0
+  ; RPD-NEXT:   3     0      %0.sub2:sgpr_128 = IMPLICIT_DEF
+  ; RPD-NEXT:   3     0
+  ; RPD-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
+  ; RPD-NEXT:   2     0
+  ; RPD-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128
+  ; RPD-NEXT:   2     0
+  ; RPD-NEXT:   Live-out: %0:00000000000000C3
+  ; RPD-NEXT: bb.2:
+  ; RPD-NEXT:   Live-in:  %0:00000000000000C3
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   2     0
+  ; RPD-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   Live-out:
+  bb.0:
+    %0:sgpr_128 = IMPLICIT_DEF
+  bb.1:
+
+    S_NOP 0, implicit %0.sub0 ; kill sub0
+    %0.sub0 = IMPLICIT_DEF ; redef sub0
+
+    %0.sub1:sgpr_128 = IMPLICIT_DEF ; redef sub1
+
+    S_NOP 0, implicit %0.sub2 ; kill sub2
+    %0.sub2:sgpr_128 = IMPLICIT_DEF ; redef sub2
+    S_NOP 0, implicit %0.sub2 ; kill sub2
+
+    S_NOP 0, implicit %0.sub3 ; use sub3, live-through
+
+  bb.2:
+    S_NOP 0, implicit %0.sub3, implicit %0.sub0
+...
+
+# This testcase shows the problem with LiveIntervals: it doesn't create
+# subranges for undefined but used subregisters. Upward tracker is able to see
+# the use of undefined subregister and tracks it correctly.
+---
+name:  upward_problem_lis_subregs_mismatch
+tracksRegLiveness: true
+body:             |
+  ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch
+  ; RPU: bb.0:
+  ; RPU-NEXT:   Live-in:
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   0     1      undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+  ; RPU-NEXT:   0     1
+  ; RPU-NEXT:   0     2      undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
+  ; RPU-NEXT: bb.1:
+  ; RPU-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
+  ; RPU-NEXT: bb.2:
+  ; RPU-NEXT:   Live-in:  %0:000000000000000F %1:000000000000000F
+  ; RPU-NEXT:   mis LIS:  %0:0000000000000003 %1:000000000000000C
+  ; RPU-NEXT:     %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F
+  ; RPU-NEXT:     %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     4
+  ; RPU-NEXT:   0     4      S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   Live-out:
+  ;
+  ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch
+  ; RPD: bb.0:
+  ; RPD-NEXT:   Live-in:
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   0     1      undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+  ; RPD-NEXT:   0     1
+  ; RPD-NEXT:   0     2      undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
+  ; RPD-NEXT: bb.1:
+  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
+  ; RPD-NEXT: bb.2:
+  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   0     2      S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   Live-out:
+  bb.0:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+    undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+
+  bb.1:
+
+  bb.2:
+    S_NOP 0, implicit %0, implicit %1
+...
+---
+name:            only_dbg_value_sched_region
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  waveLimiter:     true
+body:             |
+  ; RPU-LABEL: name: only_dbg_value_sched_region
+  ; RPU: bb.0:
+  ; RPU-NEXT:   Live-in:
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   0     1      %0:vgpr_32 = COPY $vgpr0
+  ; RPU-NEXT:   0     1
+  ; RPU-NEXT:   0     3      %1:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     3
+  ; RPU-NEXT:   0     5      %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     5
+  ; RPU-NEXT:   0     6      %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+  ; RPU-NEXT:   0     6
+  ; RPU-NEXT:   0     7      undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+  ; RPU-NEXT:   0     7
+  ; RPU-NEXT:   0     8      %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; RPU-NEXT:   0     8
+  ; RPU-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64
+  ; RPU-NEXT:   0     9
+  ; RPU-NEXT:   0     9      undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPU-NEXT:   0     8
+  ; RPU-NEXT:   0     8      dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPU-NEXT:   0     7
+  ; RPU-NEXT:   0     8      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     6
+  ; RPU-NEXT:   0     7      %8:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     7
+  ; RPU-NEXT:   0     9      %9:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     9
+  ; RPU-NEXT:   0     11     %10:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     11
+  ; RPU-NEXT:   0     12     undef %11.sub1:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     12
+  ; RPU-NEXT:   0     13     %12:vgpr_32 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     13
+  ; RPU-NEXT:   0     14     %13:vgpr_32 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     14
+  ; RPU-NEXT:   0     16     %14:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     16
+  ; RPU-NEXT:   0     18     %15:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     18
+  ; RPU-NEXT:   0     19     %16:vgpr_32 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     19
+  ; RPU-NEXT:   0     20     %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RPU-NEXT:   0     20
+  ; RPU-NEXT:   0     21     %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RPU-NEXT:   0     21
+  ; RPU-NEXT:   0     22     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPU-NEXT:   0     20
+  ; RPU-NEXT:   0     21     %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:   0     20
+  ; RPU-NEXT:   0     20     GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+  ; RPU-NEXT:   0     16
+  ; RPU-NEXT:   0     17     %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     15
+  ; RPU-NEXT:   0     16     %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     14
+  ; RPU-NEXT:   0     14     dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:   0     12
+  ; RPU-NEXT:   0     12     dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     10
+  ; RPU-NEXT:   0     10     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     10
+  ; RPU-NEXT:   0     11     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+  ; RPU-NEXT:   0     9
+  ; RPU-NEXT:   0     9      S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+  ; RPU-NEXT:   0     5
+  ; RPU-NEXT:   0     5      GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RPU-NEXT: bb.1:
+  ; RPU-NEXT:   Live-in:  %0:0000000000000003 %16:0000000000000003
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RPU-NEXT:                DBG_VALUE
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RPU-NEXT: bb.2:
+  ; RPU-NEXT:   Live-in:  %0:0000000000000003 %16:0000000000000003
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RPU-NEXT: bb.3:
+  ; RPU-NEXT:   Live-in:  %0:0000000000000003 %16:0000000000000003
+  ; RPU-NEXT:   SGPR  VGPR
+  ; RPU-NEXT:   0     2
+  ; RPU-NEXT:   0     2      S_NOP 0, implicit %0:vgpr_32
+  ; RPU-NEXT:   0     1
+  ; RPU-NEXT:   0     1      S_NOP 0, implicit %16:vgpr_32
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   0     0      S_ENDPGM 0
+  ; RPU-NEXT:   0     0
+  ; RPU-NEXT:   Live-out:
+  ;
+  ; RPD-LABEL: name: only_dbg_value_sched_region
+  ; RPD: bb.0:
+  ; RPD-NEXT:   Live-in:
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   0     1      %0:vgpr_32 = COPY $vgpr0
+  ; RPD-NEXT:   0     1
+  ; RPD-NEXT:   0     3      %1:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     3
+  ; RPD-NEXT:   0     5      %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     5
+  ; RPD-NEXT:   0     6      %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+  ; RPD-NEXT:   0     6
+  ; RPD-NEXT:   0     7      undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+  ; RPD-NEXT:   0     7
+  ; RPD-NEXT:   0     8      %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; RPD-NEXT:   0     8
+  ; RPD-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64
+  ; RPD-NEXT:   0     9
+  ; RPD-NEXT:   0     10     undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPD-NEXT:   0     8
+  ; RPD-NEXT:   0     9      dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPD-NEXT:   0     7
+  ; RPD-NEXT:   0     8      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     6
+  ; RPD-NEXT:   0     8      %8:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     7
+  ; RPD-NEXT:   0     9      %9:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     9
+  ; RPD-NEXT:   0     11     %10:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     11
+  ; RPD-NEXT:   0     12     undef %11.sub1:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     12
+  ; RPD-NEXT:   0     13     %12:vgpr_32 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     13
+  ; RPD-NEXT:   0     14     %13:vgpr_32 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     14
+  ; RPD-NEXT:   0     16     %14:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     16
+  ; RPD-NEXT:   0     18     %15:vreg_64 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     18
+  ; RPD-NEXT:   0     19     %16:vgpr_32 = IMPLICIT_DEF
+  ; RPD-NEXT:   0     19
+  ; RPD-NEXT:   0     20     %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RPD-NEXT:   0     20
+  ; RPD-NEXT:   0     21     %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RPD-NEXT:   0     21
+  ; RPD-NEXT:   0     22     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPD-NEXT:   0     20
+  ; RPD-NEXT:   0     21     %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:   0     20
+  ; RPD-NEXT:   0     20     GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+  ; RPD-NEXT:   0     16
+  ; RPD-NEXT:   0     17     %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     15
+  ; RPD-NEXT:   0     16     %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     14
+  ; RPD-NEXT:   0     15     dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:   0     12
+  ; RPD-NEXT:   0     13     dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     10
+  ; RPD-NEXT:   0     11     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     10
+  ; RPD-NEXT:   0     12     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+  ; RPD-NEXT:   0     9
+  ; RPD-NEXT:   0     9      S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+  ; RPD-NEXT:   0     5
+  ; RPD-NEXT:   0     5      GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RPD-NEXT: bb.1:
+  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %16:0000000000000003
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RPD-NEXT:                DBG_VALUE
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RPD-NEXT: bb.2:
+  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %16:0000000000000003
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RPD-NEXT: bb.3:
+  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %16:0000000000000003
+  ; RPD-NEXT:   SGPR  VGPR
+  ; RPD-NEXT:   0     2
+  ; RPD-NEXT:   0     2      S_NOP 0, implicit %0:vgpr_32
+  ; RPD-NEXT:   0     1
+  ; RPD-NEXT:   0     1      S_NOP 0, implicit %16:vgpr_32
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   0     0      S_ENDPGM 0
+  ; RPD-NEXT:   0     0
+  ; RPD-NEXT:   Live-out:
+  bb.0:
+    liveins: $vgpr0
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec
+    %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, implicit $exec
+    undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec
+    %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+    %5:vreg_64 = COPY %2
+    undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec
+    %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec
+    %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, implicit $exec
+    %8:vreg_64 = IMPLICIT_DEF
+    %9:vreg_64 = IMPLICIT_DEF
+    %10:vreg_64 = IMPLICIT_DEF
+    undef %11.sub1:vreg_64 = IMPLICIT_DEF
+    %12:vgpr_32 = IMPLICIT_DEF
+    %13:vgpr_32 = IMPLICIT_DEF
+    %14:vreg_64 = IMPLICIT_DEF
+    %15:vreg_64 = IMPLICIT_DEF
+    %16:vgpr_32 = IMPLICIT_DEF
+    %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec
+    %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec
+    DBG_VALUE
+    GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
+    %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, implicit $exec
+    %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, implicit $exec
+    %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
+    DBG_VALUE
+    DBG_VALUE
+    %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, implicit $exec
+    %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec
+    %23:vreg_64 = V_LSHLREV_B64_e64 2, %8, implicit $exec
+    S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17
+    GLOBAL_STORE_DWORD %15, %18, 0, 0, implicit $exec
+
+  bb.1:
+    DBG_VALUE
+    S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+    DBG_VALUE
+    DBG_VALUE
+    S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+    DBG_VALUE
+
+  bb.3:
+
+  bb.2:
+    S_NOP 0, implicit %0
+    S_NOP 0, implicit %16
+    S_ENDPGM 0
+...
+


        


More information about the llvm-commits mailing list