[llvm] [llvm][AsmPrinter] Add an option to print instruction latencies (PR #113243)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 17:57:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Jon Roelofs (jroelofs)
<details>
<summary>Changes</summary>
... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.
---
Full diff: https://github.com/llvm/llvm-project/pull/113243.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (+92-2)
- (added) llvm/test/CodeGen/AArch64/latency.ll (+10)
``````````diff
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 327e7f7f8a1ed8..015c4cc3d4b721 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -162,6 +162,13 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
+// This isn't turned on by default, since several of the scheduling models are
+// not completely accurate, and we don't want to be misleading.
+static cl::opt<bool> PrintLatency(
+ "asm-print-latency",
+ cl::desc("Print instruction latencies as verbose asm comments."),
+ cl::Hidden, cl::init(false));
+
STATISTIC(EmittedInsts, "Number of machine instrs printed");
char AsmPrinter::ID = 0;
@@ -1080,8 +1087,78 @@ void AsmPrinter::emitFunctionEntryLabel() {
}
}
+/// Gets latency information for \p Inst from the itinerary
+/// scheduling model.
+/// \return The maximum expected latency over all the operands or -1
+/// if no information is available.
+static int getItineraryLatency(const MachineInstr &MI,
+ const MachineFunction *MF,
+ const MCSubtargetInfo *STI) {
+ const int NoInformationAvailable = -1;
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // Check if we have a CPU to get the itinerary information.
+ if (STI->getCPU().empty())
+ return NoInformationAvailable;
+
+ // Get itinerary information.
+ InstrItineraryData IID = STI->getInstrItineraryForCPU(STI->getCPU());
+ // Get the scheduling class of the requested instruction.
+ const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+ unsigned SCClass = Desc.getSchedClass();
+
+ unsigned Latency = 0;
+
+ for (unsigned Idx = 0, IdxEnd = MI.getNumOperands(); Idx != IdxEnd; ++Idx)
+ if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
+ Latency = std::max(Latency, *OperCycle);
+
+ return (int)Latency;
+}
+
+/// Gets latency information for \p Inst.
+/// \return The maximum expected latency over all the definitions or -1
+/// if no information is available.
+static int getLatency(const MachineInstr &MI, const MCSubtargetInfo *STI) {
+ const MCSchedModel SCModel = STI->getSchedModel();
+ const int NoInformationAvailable = -1;
+
+ const MachineFunction *MF = MI.getMF();
+ if (!MF)
+ return NoInformationAvailable;
+
+ // Check if we have a scheduling model for instructions.
+ if (!SCModel.hasInstrSchedModel())
+ // Try to fall back to the itinerary model if the scheduling model doesn't
+ // have a scheduling table. Note the default does not have a table.
+ return getItineraryLatency(MI, MF, STI);
+
+ // Get the scheduling class of the requested instruction.
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+ unsigned SCClass = Desc.getSchedClass();
+ const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass);
+ // Resolving the variant SchedClass requires an MI to pass to
+ // SubTargetInfo::resolveSchedClass.
+ if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
+ return NoInformationAvailable;
+
+ // Compute output latency.
+ int16_t Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, WLEntry->Cycles);
+ }
+
+ return Latency;
+}
+
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, const MCSubtargetInfo *STI,
+ raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1109,6 +1186,13 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// Check for spill-induced copies
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
CommentOS << " Reload Reuse\n";
+
+ if (PrintLatency) {
+ int Latency = getLatency(MI, STI);
+ // Report only interesting latencies.
+ if (1 < Latency)
+ CommentOS << " Latency: " << Latency << "\n";
+ }
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -1750,6 +1834,12 @@ void AsmPrinter::emitFunctionBody() {
int NumInstsInFunction = 0;
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
+ const MCSubtargetInfo *STI = nullptr;
+ if (this->MF)
+ STI = &getSubtargetInfo();
+ else
+ STI = TM.getMCSubtargetInfo();
+
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
// Print a label for the basic block.
@@ -1773,7 +1863,7 @@ void AsmPrinter::emitFunctionBody() {
Handler->beginInstruction(&MI);
if (isVerbose())
- emitComments(MI, OutStreamer->getCommentOS());
+ emitComments(MI, STI, OutStreamer->getCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
diff --git a/llvm/test/CodeGen/AArch64/latency.ll b/llvm/test/CodeGen/AArch64/latency.ll
new file mode 100644
index 00000000000000..b722eec3e2571a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/latency.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone | FileCheck %s --match-full-lines --check-prefix=OFF
+
+define <4 x i64> @load_v4i64(ptr %ptr){
+; ON: ldp q0, q1, [x0] ; Latency: 4
+; OFF: ldp q0, q1, [x0]
+ %a = load <4 x i64>, ptr %ptr
+ ret <4 x i64> %a
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/113243
More information about the llvm-commits
mailing list