[llvm] [MCA] Enhance debug prints of processor resources (PR #190132)
Tomer Shafir via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 05:27:36 PDT 2026
https://github.com/tomershafir updated https://github.com/llvm/llvm-project/pull/190132
>From f0fad617b08c02a3acb9e9595d977a7adc1be638 Mon Sep 17 00:00:00 2001
From: tomershafir <tomer.shafir8 at gmail.com>
Date: Tue, 31 Mar 2026 12:17:39 +0300
Subject: [PATCH] [MCA] Enhance debug prints of processor resources
Previously, `computeProcResourceMasks()` would print resource masks on debug mode from multiple call sites, creating noise in the debug output. This patch aims to fix this and also print more info about the resources.
It splits to 2 types of debug prints for resources:
1. No simulation - mask only
2. Simulation - mask + other info
For 2, it shares printing on a single place in `ResourceManager` constructor, that should cover all the other simulation cases indirectly:
1. `llvm/lib/MCA/HardwareUnits/ResourceManager` - covered
2. `llvm/lib/MCA/InstrBuilder.c` - should be covered indirectly - only used by `llvm-mca` before simulation that constructs a `ResourceManager`
3. `llvm/tools/llvm-mca/Views/SummaryView.cpp` - after simulation that constructs a `ResourceManager`
4. `llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp` - after simulation that constructs a `ResourceManager`
It also adds `BufferSize` to the output, which should be useful to debug scheduling model + MCA integration.
For 1, it inlines mask-only printing into 2 other callers:
1. `llvm/include/llvm/MCA/Stages/InstructionTables.h`
2. `llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp`
as they only use the masks there. I think this is a reasonable duplication across distinguishably different users/tools.
Now every pair of callers, even across groups (1 and 2), effectively print in a mutually exclusive way.
The patch adds debug tests for the 3 new callers, in the corresponding root test directories, to drive further location of logically target-independent tests that just require some target at the root. I think this convention is more discoverable, and is pretty widely used in the project.
---
.../llvm/MCA/Stages/InstructionTables.h | 11 ++++
.../lib/MCA/HardwareUnits/ResourceManager.cpp | 18 ++++++
llvm/lib/MCA/Support.cpp | 9 ---
...alysis-processor-resource-masks-debug.test | 55 +++++++++++++++++++
.../llvm-mca/processor-resource-masks-debug.s | 29 ++++++++++
.../llvm-mca/processor-resources-debug.s | 29 ++++++++++
.../lib/SchedClassResolution.cpp | 12 ++++
7 files changed, 154 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test
create mode 100644 llvm/test/tools/llvm-mca/processor-resource-masks-debug.s
create mode 100644 llvm/test/tools/llvm-mca/processor-resources-debug.s
diff --git a/llvm/include/llvm/MCA/Stages/InstructionTables.h b/llvm/include/llvm/MCA/Stages/InstructionTables.h
index 7a96e82dd995b..80cbae070b5a1 100644
--- a/llvm/include/llvm/MCA/Stages/InstructionTables.h
+++ b/llvm/include/llvm/MCA/Stages/InstructionTables.h
@@ -22,6 +22,9 @@
#include "llvm/MCA/Stages/Stage.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
namespace llvm {
namespace mca {
@@ -35,6 +38,14 @@ class LLVM_ABI InstructionTables final : public Stage {
InstructionTables(const MCSchedModel &Model)
: SM(Model), Masks(Model.getNumProcResourceKinds()) {
computeProcResourceMasks(Model, Masks);
+ LLVM_DEBUG({
+ dbgs() << "\nProcessor resource masks:\n";
+ for (unsigned I = 0, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *Model.getProcResource(I);
+ dbgs() << '[' << format_decimal(I, 2) << "] " << " - "
+ << format_hex(Masks[I], 16) << " - " << Desc.Name << '\n';
+ }
+ });
}
bool hasWorkToComplete() const override { return false; }
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index e45bd00f1a292..5a9a44a140e91 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -132,6 +132,24 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
Strategies[Index] = getStrategyFor(*Resources[Index]);
}
+ // Print static resource information on debug mode
+ LLVM_DEBUG({
+ dbgs() << "\nProcessor resources:\n";
+ // Print InvalidUnit first to be consistent with scheduling model indexing
+ // schema
+ const MCProcResourceDesc &InvalidUnit = *SM.getProcResource(0);
+ dbgs() << "[ 0] - " << format_hex(ProcResID2Mask[0], 16) << " - "
+ << InvalidUnit.Name << "\n";
+ for (unsigned I = 0, E = Resources.size(); I < E; ++I) {
+ const ResourceState &RS = *Resources[I];
+ const unsigned ProcResID = RS.getProcResourceID();
+ const MCProcResourceDesc &Desc = *SM.getProcResource(ProcResID);
+ dbgs() << '[' << format_decimal(ProcResID, 2) << "] "
+ << " - " << format_hex(RS.getResourceMask(), 16) << " - "
+ << Desc.Name << " (BufferSize=" << RS.getBufferSize() << ")\n";
+ }
+ });
+
for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
uint64_t Mask = ProcResID2Mask[I];
unsigned Index = getResourceStateIndex(Mask);
diff --git a/llvm/lib/MCA/Support.cpp b/llvm/lib/MCA/Support.cpp
index 1f1f2ab8d2c36..4bc6a7781d6f1 100644
--- a/llvm/lib/MCA/Support.cpp
+++ b/llvm/lib/MCA/Support.cpp
@@ -67,15 +67,6 @@ void computeProcResourceMasks(const MCSchedModel &SM,
}
ProcResourceID++;
}
-
- LLVM_DEBUG({
- dbgs() << "\nProcessor resource masks:\n";
- for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
- const MCProcResourceDesc &Desc = *SM.getProcResource(I);
- dbgs() << '[' << format_decimal(I, 2) << "] " << " - "
- << format_hex(Masks[I], 16) << " - " << Desc.Name << '\n';
- }
- });
}
double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
diff --git a/llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test b/llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test
new file mode 100644
index 0000000000000..fa97d8fa9cd6f
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/analysis-processor-resource-masks-debug.test
@@ -0,0 +1,55 @@
+# REQUIRES: asserts
+# REQUIRES: aarch64-registered-target
+
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=/dev/null -analysis-numpoints=1 --debug-only=exegesis-sched-class-resolution 2>&1 | FileCheck %s
+
+## Do not print detailed processor resources information without simulation
+# CHECK-NOT: Processor resources:
+
+## Print mask-only information without simulation
+# CHECK-COUNT-1: Processor resource masks:
+# CHECK-NEXT: [ 0] - 0x00000000000000 - InvalidUnit
+# CHECK-NEXT: [ 1] - 0x00000000000001 - V2UnitB
+# CHECK-NEXT: [ 2] - 0x00000000000002 - V2UnitD
+# CHECK-NEXT: [ 3] - 0x000000000081e0 - V2UnitF
+# CHECK-NEXT: [ 4] - 0x00000000000004 - V2UnitFlg
+# CHECK-NEXT: [ 5] - 0x000000000107e0 - V2UnitI
+# CHECK-NEXT: [ 6] - 0x00000000020018 - V2UnitL
+# CHECK-NEXT: [ 7] - 0x00000000000008 - V2UnitL2
+# CHECK-NEXT: [ 8] - 0x00000000000010 - V2UnitL01
+# CHECK-NEXT: [ 9] - 0x00000000040060 - V2UnitM
+# CHECK-NEXT: [10] - 0x00000000000020 - V2UnitM0
+# CHECK-NEXT: [11] - 0x00000000000040 - V2UnitM1
+# CHECK-NEXT: [12] - 0x00000000080180 - V2UnitR
+# CHECK-NEXT: [13] - 0x00000000100780 - V2UnitS
+# CHECK-NEXT: [14] - 0x00000000000080 - V2UnitS0
+# CHECK-NEXT: [15] - 0x00000000000100 - V2UnitS1
+# CHECK-NEXT: [16] - 0x00000000000200 - V2UnitS2
+# CHECK-NEXT: [17] - 0x00000000000400 - V2UnitS3
+# CHECK-NEXT: [18] - 0x00000000207800 - V2UnitV
+# CHECK-NEXT: [19] - 0x00000000000800 - V2UnitV0
+# CHECK-NEXT: [20] - 0x00000000001000 - V2UnitV1
+# CHECK-NEXT: [21] - 0x00000000002000 - V2UnitV2
+# CHECK-NEXT: [22] - 0x00000000004000 - V2UnitV3
+# CHECK-NEXT: [23] - 0x00000000401800 - V2UnitV01
+# CHECK-NEXT: [24] - 0x00000000802800 - V2UnitV02
+# CHECK-NEXT: [25] - 0x00000001005000 - V2UnitV13
+# CHECK-NEXT: [26] - 0x00000002006000 - V2UnitV23
+
+---
+mode: latency
+key:
+ instructions:
+ - 'ADDVv4i16v H16 D16'
+ config: ''
+ register_initial_values:
+ - 'D16=0x0'
+cpu_name: neoverse-v2
+llvm_triple: aarch64
+min_instructions: 100
+measurements:
+ - { key: latency, value: 1.0, per_snippet_value: 1.0 }
+error: ''
+info: Repeating a single explicitly serial instruction
+assembled_snippet: 10E4002F10BA710E10BA710E10BA710E10BA710EC0035FD6
+...
diff --git a/llvm/test/tools/llvm-mca/processor-resource-masks-debug.s b/llvm/test/tools/llvm-mca/processor-resource-masks-debug.s
new file mode 100644
index 0000000000000..afb23fb3fdb99
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/processor-resource-masks-debug.s
@@ -0,0 +1,29 @@
+# REQUIRES: asserts
+# REQUIRES: aarch64-registered-target
+
+# RUN: llvm-mca < %s -mtriple=aarch64 -mcpu=apple-m1 -debug -instruction-tables 2>&1 | FileCheck %s
+
+# LLVM-MCA-BEGIN foo
+add x2, x0, x1
+# LLVM-MCA-END
+
+## Do not print detailed processor resources information without simulation
+# CHECK-NOT: Processor resources:
+
+## Print mask-only information without simulation
+# CHECK-COUNT-1: Processor resource masks:
+# CHECK-NEXT: [ 0] - 0x00000000000000 - InvalidUnit
+# CHECK-NEXT: [ 1] - 0x00000000000001 - CyUnitB
+# CHECK-NEXT: [ 2] - 0x00000000000002 - CyUnitBR
+# CHECK-NEXT: [ 3] - 0x00000000000004 - CyUnitFloatDiv
+# CHECK-NEXT: [ 4] - 0x00000000000008 - CyUnitI
+# CHECK-NEXT: [ 5] - 0x00000000000010 - CyUnitID
+# CHECK-NEXT: [ 6] - 0x00000000000020 - CyUnitIM
+# CHECK-NEXT: [ 7] - 0x00000000000040 - CyUnitIS
+# CHECK-NEXT: [ 8] - 0x00000000000080 - CyUnitIntDiv
+# CHECK-NEXT: [ 9] - 0x00000000000100 - CyUnitLS
+# CHECK-NEXT: [10] - 0x00000000000200 - CyUnitV
+# CHECK-NEXT: [11] - 0x00000000000400 - CyUnitVC
+# CHECK-NEXT: [12] - 0x00000000000800 - CyUnitVD
+# CHECK-NEXT: [13] - 0x00000000001000 - CyUnitVM
+# CHECK: [0] Code Region - foo
diff --git a/llvm/test/tools/llvm-mca/processor-resources-debug.s b/llvm/test/tools/llvm-mca/processor-resources-debug.s
new file mode 100644
index 0000000000000..632516b2c2b10
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/processor-resources-debug.s
@@ -0,0 +1,29 @@
+# REQUIRES: asserts
+# REQUIRES: aarch64-registered-target
+
+# RUN: llvm-mca < %s -mtriple=aarch64 -mcpu=apple-m1 -debug 2>&1 | FileCheck %s
+
+# LLVM-MCA-BEGIN foo
+add x2, x0, x1
+# LLVM-MCA-END
+
+## Print detailed processor resources information on simulation
+# CHECK-COUNT-1: Processor resources:
+# CHECK-NEXT: [ 0] - 0x00000000000000 - InvalidUnit
+# CHECK-NEXT: [ 1] - 0x00000000000001 - CyUnitB (BufferSize=24)
+# CHECK-NEXT: [ 2] - 0x00000000000002 - CyUnitBR (BufferSize=-1)
+# CHECK-NEXT: [ 3] - 0x00000000000004 - CyUnitFloatDiv (BufferSize=-1)
+# CHECK-NEXT: [ 4] - 0x00000000000008 - CyUnitI (BufferSize=48)
+# CHECK-NEXT: [ 5] - 0x00000000000010 - CyUnitID (BufferSize=16)
+# CHECK-NEXT: [ 6] - 0x00000000000020 - CyUnitIM (BufferSize=32)
+# CHECK-NEXT: [ 7] - 0x00000000000040 - CyUnitIS (BufferSize=24)
+# CHECK-NEXT: [ 8] - 0x00000000000080 - CyUnitIntDiv (BufferSize=-1)
+# CHECK-NEXT: [ 9] - 0x00000000000100 - CyUnitLS (BufferSize=28)
+# CHECK-NEXT: [10] - 0x00000000000200 - CyUnitV (BufferSize=48)
+# CHECK-NEXT: [11] - 0x00000000000400 - CyUnitVC (BufferSize=16)
+# CHECK-NEXT: [12] - 0x00000000000800 - CyUnitVD (BufferSize=16)
+# CHECK-NEXT: [13] - 0x00000000001000 - CyUnitVM (BufferSize=32)
+# CHECK: [0] Code Region - foo
+
+## Do not print mask-only information on simulation
+# CHECK-NOT: Processor resource masks:
diff --git a/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp b/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp
index d6dfb65bf82e6..3b39bda935be6 100644
--- a/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp
@@ -11,9 +11,12 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MCA/Support.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include <vector>
+#define DEBUG_TYPE "exegesis-sched-class-resolution"
+
namespace llvm {
namespace exegesis {
@@ -55,6 +58,15 @@ getNonRedundantWriteProcRes(const MCSchedClassDesc &SCDesc,
// Collect resource masks.
SmallVector<uint64_t> ProcResourceMasks(NumProcRes);
mca::computeProcResourceMasks(SM, ProcResourceMasks);
+ LLVM_DEBUG({
+ dbgs() << "\nProcessor resource masks:\n";
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ dbgs() << '[' << format_decimal(I, 2) << "] " << " - "
+ << format_hex(ProcResourceMasks[I], 16) << " - " << Desc.Name
+ << '\n';
+ }
+ });
// Sort entries by smaller resources for (basic) topological ordering.
using ResourceMaskAndEntry = std::pair<uint64_t, const MCWriteProcResEntry *>;
More information about the llvm-commits
mailing list