[llvm] e808f8a - [AMDGPU] GCNRegPressurePrinter pass to print GCNRegPressure values for testing. (#70031)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 15:01:44 PDT 2023
Author: Valery Pykhtin
Date: 2023-11-01T23:01:39+01:00
New Revision: e808f8a61683ccd9da88f722135ff59ddcdb5aef
URL: https://github.com/llvm/llvm-project/commit/e808f8a61683ccd9da88f722135ff59ddcdb5aef
DIFF: https://github.com/llvm/llvm-project/commit/e808f8a61683ccd9da88f722135ff59ddcdb5aef.diff
LOG: [AMDGPU] GCNRegPressurePrinter pass to print GCNRegPressure values for testing. (#70031)
Using GCNDownwardRPTracker or GCNUpwardRPTracker the pass collects register pressure values for a function and prints these values next to instructions. Output can be used to generate Filecheck rules in mir tests.
Added:
llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
llvm/lib/Target/AMDGPU/GCNRegPressure.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegPass(*PR);
initializeGCNRewritePartialRegUsesPass(*PR);
+ initializeGCNRegPressurePrinterPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..a04c470b7b9762f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
+#include "AMDGPU.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -31,7 +32,6 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
return true;
}
-
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
@@ -135,8 +135,6 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
O.getVGPRNum(ST.hasGFX90AInsts()));
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
return Printable([&RP, ST](raw_ostream &OS) {
OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' '
@@ -155,7 +153,6 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) {
OS << '\n';
});
}
-#endif
static LaneBitmask getDefRegMask(const MachineOperand &MO,
const MachineRegisterInfo &MRI) {
@@ -269,6 +266,13 @@ void GCNUpwardRPTracker::reset(const MachineInstr &MI,
GCNRPTracker::reset(MI, LiveRegsCopy, true);
}
+void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
+ const LiveRegSet &LiveRegs_) {
+ MRI = &MRI_;
+ LiveRegs = LiveRegs_;
+ MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
+}
+
void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(MRI && "call reset first");
@@ -418,19 +422,17 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
return advance(End);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedLR,
- const TargetRegisterInfo *TRI) {
- return Printable([&LISLR, &TrackedLR, TRI](raw_ostream &OS) {
+ const TargetRegisterInfo *TRI, StringRef Pfx) {
+ return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
for (auto const &P : TrackedLR) {
auto I = LISLR.find(P.first);
if (I == LISLR.end()) {
- OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+ OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
<< " isn't found in LIS reported set\n";
} else if (I->second != P.second) {
- OS << " " << printReg(P.first, TRI)
+ OS << Pfx << printReg(P.first, TRI)
<< " masks doesn't match: LIS reported " << PrintLaneMask(I->second)
<< ", tracked " << PrintLaneMask(P.second) << '\n';
}
@@ -438,7 +440,7 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
for (auto const &P : LISLR) {
auto I = TrackedLR.find(P.first);
if (I == TrackedLR.end()) {
- OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
+ OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
<< " isn't found in tracked set\n";
}
}
@@ -467,7 +469,6 @@ bool GCNUpwardRPTracker::isValid() const {
return true;
}
-LLVM_DUMP_METHOD
Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI) {
return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
@@ -483,7 +484,122 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
});
}
-LLVM_DUMP_METHOD
void GCNRegPressure::dump() const { dbgs() << print(*this); }
-#endif
+static cl::opt<bool> UseDownwardTracker(
+ "amdgpu-print-rp-downward",
+ cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
+ cl::init(false), cl::Hidden);
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+ auto &OS = dbgs();
+
+// Leading spaces are important for YAML syntax.
+#define PFX " "
+
+ OS << "---\nname: " << MF.getName() << "\nbody: |\n";
+
+ auto printRP = [](const GCNRegPressure &RP) {
+ return Printable([&RP](raw_ostream &OS) {
+ OS << format(PFX " %-5d", RP.getSGPRNum())
+ << format(" %-5d", RP.getVGPRNum(false));
+ });
+ };
+
+ auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR,
+ const GCNRPTracker::LiveRegSet &LISLR) {
+ if (LISLR != TrackedLR) {
+ OS << PFX " mis LIS: " << llvm::print(LISLR, MRI)
+ << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
+ }
+ };
+
+ // Register pressure before and at an instruction (in program order).
+ SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
+
+ for (auto &MBB : MF) {
+ RP.clear();
+ RP.reserve(MBB.size());
+
+ OS << PFX;
+ MBB.printName(OS);
+ OS << ":\n";
+
+ SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+ SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
+
+ GCNRPTracker::LiveRegSet LiveIn, LiveOut;
+ GCNRegPressure RPAtMBBEnd;
+
+ if (UseDownwardTracker) {
+ if (MBB.empty()) {
+ LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
+ RPAtMBBEnd = getRegPressure(MRI, LiveIn);
+ } else {
+ GCNDownwardRPTracker RPT(LIS);
+ RPT.reset(MBB.front());
+
+ LiveIn = RPT.getLiveRegs();
+
+ while (!RPT.advanceBeforeNext()) {
+ GCNRegPressure RPBeforeMI = RPT.getPressure();
+ RPT.advanceToNext();
+ RP.emplace_back(RPBeforeMI, RPT.getPressure());
+ }
+
+ LiveOut = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+ }
+ } else {
+ GCNUpwardRPTracker RPT(LIS);
+ RPT.reset(MRI, MBBEndSlot);
+ RPT.moveMaxPressure(); // Clear max pressure.
+
+ LiveOut = RPT.getLiveRegs();
+ RPAtMBBEnd = RPT.getPressure();
+
+ for (auto &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ if (!MI.isDebugInstr())
+ RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ }
+
+ LiveIn = RPT.getLiveRegs();
+ }
+
+ OS << PFX " Live-in: " << llvm::print(LiveIn, MRI);
+ if (!UseDownwardTracker)
+ ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI));
+
+ OS << PFX " SGPR VGPR\n";
+ int I = 0;
+ for (auto &MI : MBB) {
+ if (!MI.isDebugInstr()) {
+ auto &[RPBeforeInstr, RPAtInstr] =
+ RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
+ ++I;
+ OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
+ } else
+ OS << PFX " ";
+ MI.print(OS);
+ }
+ OS << printRP(RPAtMBBEnd) << '\n';
+
+ OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
+ if (UseDownwardTracker)
+ ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
+ }
+ OS << "...\n";
+ return false;
+
+#undef PFX
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..c750fe74749e2b3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
+ GCNRegPressure getPressure() const { return CurPressure; }
+
// returns MaxPressure, resetting it
decltype(MaxPressure) moveMaxPressure() {
auto Res = MaxPressure;
@@ -140,6 +142,9 @@ class GCNRPTracker {
}
};
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
class GCNUpwardRPTracker : public GCNRPTracker {
public:
GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
@@ -148,6 +153,14 @@ class GCNUpwardRPTracker : public GCNRPTracker {
// filling live regs upon this point using LIS
void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
+ // reset tracker and set live register set to the specified value.
+ void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
+ // reset tracker at the specified slot index.
+ void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) {
+ reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_));
+ }
+
// move to the state just above the MI
void recede(const MachineInstr &MI);
@@ -196,10 +209,6 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
- const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
-
/// creates a map MachineInstr -> LiveRegSet
/// R - range of iterators on instructions
/// After - upon entry or exit of every instruction
@@ -275,7 +284,22 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
- const TargetRegisterInfo *TRI);
+ const TargetRegisterInfo *TRI, StringRef Pfx = " ");
+
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+ static char ID;
+
+public:
+ GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
new file mode 100644
index 000000000000000..d53050167e98bef
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -0,0 +1,462 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
+
+
+---
+name: trivial
+tracksRegLiveness: true
+body: |
+ ; RP-LABEL: name: trivial
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 2 1 %1:sgpr_64 = IMPLICIT_DEF
+ ; RP-NEXT: 2 1
+ ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 1
+ ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 1
+ ; RP-NEXT: 2 1 S_NOP 0, implicit %0:vgpr_32, implicit %1:sgpr_64
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ bb.0:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:sgpr_64 = IMPLICIT_DEF
+ bb.1:
+
+ bb.2:
+ S_NOP 0, implicit %0, implicit %1
+...
+---
+name: live_through_test
+tracksRegLiveness: true
+body: |
+ ; RPU-LABEL: name: live_through_test
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: Live-out: %0:00000000000000F3
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:00000000000000F3
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RPU-NEXT: 3 0
+ ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: Live-out: %0:00000000000000C3
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-in: %0:00000000000000C3
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 2 0
+ ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: live_through_test
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: Live-out: %0:00000000000000F3
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:00000000000000F3
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RPD-NEXT: 3 0
+ ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: Live-out: %0:00000000000000C3
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-in: %0:00000000000000C3
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 2 0
+ ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ bb.0:
+ %0:sgpr_128 = IMPLICIT_DEF
+ bb.1:
+
+ S_NOP 0, implicit %0.sub0 ; kill sub0
+ %0.sub0 = IMPLICIT_DEF ; redef sub0
+
+ %0.sub1:sgpr_128 = IMPLICIT_DEF ; redef sub1
+
+ S_NOP 0, implicit %0.sub2 ; kill sub2
+ %0.sub2:sgpr_128 = IMPLICIT_DEF ; redef sub2
+ S_NOP 0, implicit %0.sub2 ; kill sub2
+
+ S_NOP 0, implicit %0.sub3 ; use sub3, live-through
+
+ bb.2:
+ S_NOP 0, implicit %0.sub3, implicit %0.sub0
+...
+
+# This testcase shows the problem with LiveIntervals: it doesn't create
+# subranges for undefined but used subregisters. Upward tracker is able to see
+# the use of undefined subregister and tracks it correctly.
+---
+name: upward_problem_lis_subregs_mismatch
+tracksRegLiveness: true
+body: |
+ ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-in: %0:000000000000000F %1:000000000000000F
+ ; RPU-NEXT: mis LIS: %0:0000000000000003 %1:000000000000000C
+ ; RPU-NEXT: %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F
+ ; RPU-NEXT: %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 4
+ ; RPU-NEXT: 0 4 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ bb.0:
+ undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+
+ bb.1:
+
+ bb.2:
+ S_NOP 0, implicit %0, implicit %1
+...
+---
+name: only_dbg_value_sched_region
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ waveLimiter: true
+body: |
+ ; RPU-LABEL: name: only_dbg_value_sched_region
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 3
+ ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 8
+ ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 6
+ ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 7
+ ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 11
+ ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 13
+ ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 18
+ ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 19
+ ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPU-NEXT: 0 21
+ ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 20
+ ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPU-NEXT: 0 16
+ ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 15
+ ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 14
+ ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 12
+ ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 10
+ ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPU-NEXT: 0 9
+ ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPU-NEXT: 0 5
+ ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPU-NEXT: DBG_VALUE
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.2:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: bb.3:
+ ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 2
+ ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 0 S_ENDPGM 0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: only_dbg_value_sched_region
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 3
+ ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 8
+ ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 6
+ ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 7
+ ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 11
+ ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 13
+ ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 18
+ ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF
+ ; RPD-NEXT: 0 19
+ ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; RPD-NEXT: 0 21
+ ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 20
+ ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+ ; RPD-NEXT: 0 16
+ ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 15
+ ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 14
+ ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 12
+ ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 10
+ ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPD-NEXT: 0 9
+ ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+ ; RPD-NEXT: 0 5
+ ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ ; RPD-NEXT: DBG_VALUE
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.2:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: bb.3:
+ ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 2
+ ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_ENDPGM 0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ bb.0:
+ liveins: $vgpr0
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vreg_64 = IMPLICIT_DEF
+ %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec
+ %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, implicit $exec
+ undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec
+ %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+ %5:vreg_64 = COPY %2
+ undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec
+ %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec
+ %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, implicit $exec
+ %8:vreg_64 = IMPLICIT_DEF
+ %9:vreg_64 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ undef %11.sub1:vreg_64 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vreg_64 = IMPLICIT_DEF
+ %15:vreg_64 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec
+ %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec
+ DBG_VALUE
+ GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
+ %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, implicit $exec
+ %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, implicit $exec
+ %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
+ DBG_VALUE
+ DBG_VALUE
+ %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, implicit $exec
+ %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec
+ %23:vreg_64 = V_LSHLREV_B64_e64 2, %8, implicit $exec
+ S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17
+ GLOBAL_STORE_DWORD %15, %18, 0, 0, implicit $exec
+
+ bb.1:
+ DBG_VALUE
+ S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ DBG_VALUE
+ DBG_VALUE
+ S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+ DBG_VALUE
+
+ bb.3:
+
+ bb.2:
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %16
+ S_ENDPGM 0
+...
+
More information about the llvm-commits
mailing list