[llvm] [MC] Use StringTable to reduce dynamic relocations (PR #144202)
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 13 21:00:21 PDT 2025
https://github.com/rnk created https://github.com/llvm/llvm-project/pull/144202
Dynamic relocations are expensive on ELF/Linux platforms because they are applied in userspace on process startup. Therefore, it is worth optimizing them to make PIE and PIC dylib builds faster. In +asserts builds (non-NDEBUG), @nikic identified these schedule class name string pointers as the leading source of dynamic relocations. [1]
This change uses llvm::StringTable and the StringToOffsetTable TableGen helper to turn the string pointers into 32-bit offsets into a separate character array.
The number of dynamic relocations is reduced by ~60%:
❯ llvm-readelf --dyn-relocations lib/libLLVM.so | wc -l
381376 # before
155156 # after
The test suite time is modestly affected, but I'm running on a shared noisy workstation VM with a ton of cores:
https://gist.github.com/rnk/f38882c2fe2e63d0eb58b8fffeab69de
Testing Time: 100.88s # before
Testing Time: 78.50s. # after
Testing Time: 96.25s. # before again
I haven't used any fancy hyperfine/denoising tools, but I think the result is clearly visible and IMO we should just ship it.
[1] https://gist.github.com/nikic/554f0a544ca15d5219788f1030f78c5a
>From 551c831e2cfaea5d076460e017d4c5cbd190dd0c Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at google.com>
Date: Fri, 13 Jun 2025 20:05:53 -0700
Subject: [PATCH 1/2] [MC] Use StringTable to reduce dynamic relocations
Linux PIC/PIE process startup is more expensive than other OSs because
the kernel doesn't collude to share a single copy of the pre-relocated
image pages. Instead, ld.so has to apply relative relocations at
startup.
According to @nikic , these tables were the leading source of dynamic
relocations in +asserts (non-NDEBUG) builds.
TODO: Data, check-llvm before & after
---
llvm/include/llvm/MC/MCSchedule.h | 14 +++++++++++++-
llvm/lib/MC/MCSchedule.cpp | 1 +
llvm/lib/MCA/InstrBuilder.cpp | 3 ++-
llvm/tools/llvm-exegesis/lib/Analysis.cpp | 9 +++++----
llvm/utils/TableGen/SubtargetEmitter.cpp | 21 +++++++++++++++++----
5 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index eb71f4581bd61..ba5ceae71aaaf 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -15,6 +15,7 @@
#define LLVM_MC_MCSCHEDULE_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringTable.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -124,7 +125,7 @@ struct MCSchedClassDesc {
static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- const char* Name;
+ uint32_t NameOff;
#endif
uint16_t NumMicroOps : 13;
uint16_t BeginGroup : 1;
@@ -324,6 +325,9 @@ struct MCSchedModel {
const MCSchedClassDesc *SchedClassTable;
unsigned NumProcResourceKinds;
unsigned NumSchedClasses;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ const StringTable *SchedClassNames;
+#endif
// Instruction itinerary tables used by InstrItineraryData.
friend class InstrItineraryData;
const InstrItinerary *InstrItineraries;
@@ -368,6 +372,14 @@ struct MCSchedModel {
return &SchedClassTable[SchedClassIdx];
}
+ StringRef getSchedClassName(unsigned SchedClassIdx) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ return (*SchedClassNames)[SchedClassTable[SchedClassIdx].NameOff];
+#else
+ return "<unknown>";
+#endif
+ }
+
/// Returns the latency value for the scheduling class.
LLVM_ABI static int computeInstrLatency(const MCSubtargetInfo &STI,
const MCSchedClassDesc &SCDesc);
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 8aea08919f469..527ccf3fc36e0 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
0,
0,
nullptr,
+ nullptr,
nullptr};
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 2bac99b6309af..cad25a6ddd3f5 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -75,7 +75,8 @@ static void initializeUsedResources(InstrDesc &ID,
WithColor::warning()
<< "Ignoring invalid write of zero cycles on processor resource "
<< PR.Name << "\n";
- WithColor::note() << "found in scheduling class " << SCDesc.Name
+ WithColor::note() << "found in scheduling class "
+ << SM.getSchedClassName(ID.SchedClassID)
<< " (write index #" << I << ")\n";
#endif
continue;
diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
index be10c32cf08d5..fb843285ada2a 100644
--- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
@@ -137,9 +137,9 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId(
State_.getSubtargetInfo(), State_.getInstrInfo(), MCI);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- const MCSchedClassDesc *const SCDesc =
- State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId);
- writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
+ StringRef SCDescName =
+ State_.getSubtargetInfo().getSchedModel().getSchedClassName(SchedClassId);
+ writeEscaped<kEscapeCsv>(OS, SCDescName);
#else
OS << SchedClassId;
#endif
@@ -563,7 +563,8 @@ Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
"class=\"sched-class-name\">";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
+ writeEscaped<kEscapeHtml>(OS, SI.getSchedModel().getSchedClassName(
+ RSCAndPoints.RSC.SchedClassId));
#else
OS << RSCAndPoints.RSC.SchedClassId;
#endif
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index ca008e256a70f..e58ed7090d114 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include <algorithm>
@@ -1456,6 +1457,10 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
}
OS << "}; // " << Target << "ReadAdvanceTable\n";
+ // Pool all SchedClass names in a string table.
+ StringToOffsetTable StrTab;
+ unsigned InvalidNameOff = StrTab.GetOrAddStringOffset("InvalidSchedClass");
+
// Emit a SchedClass table for each processor.
for (const auto &[Idx, Proc] : enumerate(SchedModels.procModels())) {
if (!Proc.hasInstrSchedModel())
@@ -1473,14 +1478,15 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
// name and position.
assert(SchedModels.getSchedClass(0).Name == "NoInstrModel" &&
"invalid class not first");
- OS << " {DBGFIELD(\"InvalidSchedClass\") "
+ OS << " {DBGFIELD(" << InvalidNameOff << ") "
<< MCSchedClassDesc::InvalidNumMicroOps
<< ", false, false, false, 0, 0, 0, 0, 0, 0},\n";
for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) {
MCSchedClassDesc &MCDesc = SCTab[SCIdx];
const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);
- OS << " {DBGFIELD(\"" << SchedClass.Name << "\") ";
+ unsigned NameOff = StrTab.GetOrAddStringOffset(SchedClass.Name);
+ OS << " {DBGFIELD(" << NameOff << ") ";
if (SchedClass.Name.size() < 18)
OS.indent(18 - SchedClass.Name.size());
OS << MCDesc.NumMicroOps << ", " << (MCDesc.BeginGroup ? "true" : "false")
@@ -1495,6 +1501,8 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
}
OS << "}; // " << Proc.ModelName << "SchedClasses\n";
}
+
+ StrTab.EmitStringTableDef(OS, Target + "SchedClassNames");
}
void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) {
@@ -1548,6 +1556,8 @@ void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) {
else
OS << " nullptr, nullptr, 0, 0,"
<< " // No instruction-level machine model.\n";
+ OS << " DBGVAL_OR_NULLPTR(&" << Target
+ << "SchedClassNames), // SchedClassNames\n";
if (PM.hasItineraries())
OS << " " << PM.ItinsDef->getName() << ",\n";
else
@@ -1569,8 +1579,10 @@ void SubtargetEmitter::emitSchedModel(raw_ostream &OS) {
<< "#endif\n"
<< "#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)\n"
<< "#define DBGFIELD(x) x,\n"
+ << "#define DBGVAL_OR_NULLPTR(x) x\n"
<< "#else\n"
<< "#define DBGFIELD(x)\n"
+ << "#define DBGVAL_OR_NULLPTR(x) nullptr\n"
<< "#endif\n";
if (SchedModels.hasItineraries()) {
@@ -1588,10 +1600,11 @@ void SubtargetEmitter::emitSchedModel(raw_ostream &OS) {
}
emitSchedClassTables(SchedTables, OS);
- OS << "\n#undef DBGFIELD\n";
-
// Emit the processor machine model
emitProcessorModels(OS);
+
+ OS << "\n#undef DBGFIELD\n";
+ OS << "\n#undef DBGVAL_OR_NULLPTR\n";
}
static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
>From d9e4f0f02fb1042c333b5b67852e25102f980a7a Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at google.com>
Date: Fri, 13 Jun 2025 20:29:31 -0700
Subject: [PATCH 2/2] fix tblgen tests
---
.../TableGen/CompressWriteLatencyEntry.td | 8 +++----
llvm/test/TableGen/InvalidMCSchedClassDesc.td | 22 +++++++++----------
llvm/utils/TableGen/SubtargetEmitter.cpp | 2 +-
3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/llvm/test/TableGen/CompressWriteLatencyEntry.td b/llvm/test/TableGen/CompressWriteLatencyEntry.td
index 88273e8858448..d6a9f0ac0dd76 100644
--- a/llvm/test/TableGen/CompressWriteLatencyEntry.td
+++ b/llvm/test/TableGen/CompressWriteLatencyEntry.td
@@ -33,10 +33,10 @@ def Read_D : SchedRead;
// CHECK-NEXT: }; // MyTargetReadAdvanceTable
// CHECK: static const llvm::MCSchedClassDesc SchedModel_ASchedClasses[] = {
-// CHECK-NEXT: {DBGFIELD("InvalidSchedClass") 8191, false, false, false, 0, 0, 0, 0, 0, 0},
-// CHECK-NEXT: {DBGFIELD("Inst_A") 1, false, false, false, 0, 0, 1, 1, 0, 0}, // #1
-// CHECK-NEXT: {DBGFIELD("Inst_B") 1, false, false, false, 0, 0, 2, 1, 0, 0}, // #2
-// CHECK-NEXT: {DBGFIELD("Inst_C") 1, false, false, false, 0, 0, 1, 1, 1, 1}, // #3
+// CHECK-NEXT: {DBGFIELD(1) 8191, false, false, false, 0, 0, 0, 0, 0, 0},
+// CHECK-NEXT: {DBGFIELD(/*Inst_A*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 0, 0}, // #1
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 2, 1, 0, 0}, // #2
+// CHECK-NEXT: {DBGFIELD(/*Inst_C*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 1, 1}, // #3
// CHECK-NEXT: }; // SchedModel_ASchedClasses
let SchedModel = SchedModel_A in {
diff --git a/llvm/test/TableGen/InvalidMCSchedClassDesc.td b/llvm/test/TableGen/InvalidMCSchedClassDesc.td
index de5392237a84c..e43edd4174589 100644
--- a/llvm/test/TableGen/InvalidMCSchedClassDesc.td
+++ b/llvm/test/TableGen/InvalidMCSchedClassDesc.td
@@ -1,13 +1,13 @@
// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s
-// Check if it is valid MCSchedClassDesc if didn't have the resources.
+// Check if it is valid MCSchedClassDesc if didn't have the resources.
include "llvm/Target/Target.td"
def MyTarget : Target;
let OutOperandList = (outs), InOperandList = (ins) in {
- def Inst_A : Instruction;
- def Inst_B : Instruction;
+ def Inst_A : Instruction;
+ def Inst_B : Instruction;
}
let CompleteModel = 0 in {
@@ -18,8 +18,8 @@ let CompleteModel = 0 in {
// Inst_B didn't have the resoures, and it is invalid.
// CHECK: SchedModel_ASchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A") 1
-// CHECK-NEXT: {DBGFIELD("Inst_B") 8191
+// CHECK: {DBGFIELD(/*Inst_A*/ 19) 1
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26) 8191
let SchedModel = SchedModel_A in {
def Write_A : SchedWriteRes<[]>;
def : InstRW<[Write_A], (instrs Inst_A)>;
@@ -27,18 +27,18 @@ let SchedModel = SchedModel_A in {
// Inst_A didn't have the resoures, and it is invalid.
// CHECK: SchedModel_BSchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A") 8191
-// CHECK-NEXT: {DBGFIELD("Inst_B") 1
+// CHECK: {DBGFIELD(/*Inst_A*/ 19) 8191
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26) 1
let SchedModel = SchedModel_B in {
- def Write_B: SchedWriteRes<[]>;
+ def Write_B: SchedWriteRes<[]>;
def : InstRW<[Write_B], (instrs Inst_B)>;
}
// CHECK: SchedModel_CSchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A") 1
-// CHECK-NEXT: {DBGFIELD("Inst_B") 1
+// CHECK: {DBGFIELD(/*Inst_A*/ 19) 1
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26) 1
let SchedModel = SchedModel_C in {
- def Write_C: SchedWriteRes<[]>;
+ def Write_C: SchedWriteRes<[]>;
def : InstRW<[Write_C], (instrs Inst_A, Inst_B)>;
}
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index e58ed7090d114..fb36890f547b2 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1486,7 +1486,7 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
MCSchedClassDesc &MCDesc = SCTab[SCIdx];
const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);
unsigned NameOff = StrTab.GetOrAddStringOffset(SchedClass.Name);
- OS << " {DBGFIELD(" << NameOff << ") ";
+ OS << " {DBGFIELD(/*" << SchedClass.Name << "*/ " << NameOff << ") ";
if (SchedClass.Name.size() < 18)
OS.indent(18 - SchedClass.Name.size());
OS << MCDesc.NumMicroOps << ", " << (MCDesc.BeginGroup ? "true" : "false")
More information about the llvm-commits
mailing list