[llvm] [MC] Use StringTable to reduce dynamic relocations (PR #144202)

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 13 21:00:21 PDT 2025


https://github.com/rnk created https://github.com/llvm/llvm-project/pull/144202

Dynamic relocations are expensive on ELF/Linux platforms because they are applied in userspace on process startup. Therefore, it is worth optimizing them to make PIE and PIC dylib builds faster. In +asserts builds (non-NDEBUG), @nikic identified these schedule class name string pointers as the leading source of dynamic relocations. [1]

This change uses llvm::StringTable and the StringToOffsetTable TableGen helper to turn the string pointers into 32-bit offsets into a separate character array.

The number of dynamic relocations is reduced by ~60%:
❯ llvm-readelf --dyn-relocations lib/libLLVM.so | wc -l
381376 # before
155156 # after

The test suite time is modestly affected, but I'm running on a shared noisy workstation VM with a ton of cores:
https://gist.github.com/rnk/f38882c2fe2e63d0eb58b8fffeab69de
Testing Time: 100.88s   # before
Testing Time: 78.50s. # after
Testing Time: 96.25s.  # before again

I haven't used any fancy hyperfine/denoising tools, but I think the result is clearly visible and IMO we should just ship it.

[1] https://gist.github.com/nikic/554f0a544ca15d5219788f1030f78c5a

>From 551c831e2cfaea5d076460e017d4c5cbd190dd0c Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at google.com>
Date: Fri, 13 Jun 2025 20:05:53 -0700
Subject: [PATCH 1/2] [MC] Use StringTable to reduce dynamic relocations

Linux PIC/PIE process startup is more expensive than other OSs because
the kernel doesn't collude to share a single copy of the pre-relocated
image pages. Instead, ld.so has to apply relative relocations at
startup.

According to @nikic , these tables were the leading source of dynamic
relocations in +asserts (non-NDEBUG) builds.

TODO: Data, check-llvm before & after
---
 llvm/include/llvm/MC/MCSchedule.h         | 14 +++++++++++++-
 llvm/lib/MC/MCSchedule.cpp                |  1 +
 llvm/lib/MCA/InstrBuilder.cpp             |  3 ++-
 llvm/tools/llvm-exegesis/lib/Analysis.cpp |  9 +++++----
 llvm/utils/TableGen/SubtargetEmitter.cpp  | 21 +++++++++++++++++----
 5 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index eb71f4581bd61..ba5ceae71aaaf 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -15,6 +15,7 @@
 #define LLVM_MC_MCSCHEDULE_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringTable.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -124,7 +125,7 @@ struct MCSchedClassDesc {
   static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1;
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  const char* Name;
+  uint32_t NameOff;
 #endif
   uint16_t NumMicroOps : 13;
   uint16_t BeginGroup : 1;
@@ -324,6 +325,9 @@ struct MCSchedModel {
   const MCSchedClassDesc *SchedClassTable;
   unsigned NumProcResourceKinds;
   unsigned NumSchedClasses;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  const StringTable *SchedClassNames;
+#endif
   // Instruction itinerary tables used by InstrItineraryData.
   friend class InstrItineraryData;
   const InstrItinerary *InstrItineraries;
@@ -368,6 +372,14 @@ struct MCSchedModel {
     return &SchedClassTable[SchedClassIdx];
   }
 
+  StringRef getSchedClassName(unsigned SchedClassIdx) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    return (*SchedClassNames)[SchedClassTable[SchedClassIdx].NameOff];
+#else
+    return "<unknown>";
+#endif
+  }
+
   /// Returns the latency value for the scheduling class.
   LLVM_ABI static int computeInstrLatency(const MCSubtargetInfo &STI,
                                           const MCSchedClassDesc &SCDesc);
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 8aea08919f469..527ccf3fc36e0 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
                                             0,
                                             0,
                                             nullptr,
+                                            nullptr,
                                             nullptr};
 
 int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 2bac99b6309af..cad25a6ddd3f5 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -75,7 +75,8 @@ static void initializeUsedResources(InstrDesc &ID,
       WithColor::warning()
           << "Ignoring invalid write of zero cycles on processor resource "
           << PR.Name << "\n";
-      WithColor::note() << "found in scheduling class " << SCDesc.Name
+      WithColor::note() << "found in scheduling class "
+                        << SM.getSchedClassName(ID.SchedClassID)
                         << " (write index #" << I << ")\n";
 #endif
       continue;
diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
index be10c32cf08d5..fb843285ada2a 100644
--- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
@@ -137,9 +137,9 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
   std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId(
       State_.getSubtargetInfo(), State_.getInstrInfo(), MCI);
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  const MCSchedClassDesc *const SCDesc =
-      State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId);
-  writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
+  StringRef SCDescName =
+      State_.getSubtargetInfo().getSchedModel().getSchedClassName(SchedClassId);
+  writeEscaped<kEscapeCsv>(OS, SCDescName);
 #else
   OS << SchedClassId;
 #endif
@@ -563,7 +563,8 @@ Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
           "class=\"sched-class-name\">";
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-    writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
+    writeEscaped<kEscapeHtml>(OS, SI.getSchedModel().getSchedClassName(
+                                      RSCAndPoints.RSC.SchedClassId));
 #else
     OS << RSCAndPoints.RSC.SchedClassId;
 #endif
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index ca008e256a70f..e58ed7090d114 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
 #include <algorithm>
@@ -1456,6 +1457,10 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
   }
   OS << "}; // " << Target << "ReadAdvanceTable\n";
 
+  // Pool all SchedClass names in a string table.
+  StringToOffsetTable StrTab;
+  unsigned InvalidNameOff = StrTab.GetOrAddStringOffset("InvalidSchedClass");
+
   // Emit a SchedClass table for each processor.
   for (const auto &[Idx, Proc] : enumerate(SchedModels.procModels())) {
     if (!Proc.hasInstrSchedModel())
@@ -1473,14 +1478,15 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
     // name and position.
     assert(SchedModels.getSchedClass(0).Name == "NoInstrModel" &&
            "invalid class not first");
-    OS << "  {DBGFIELD(\"InvalidSchedClass\")  "
+    OS << "  {DBGFIELD(" << InvalidNameOff << ")  "
        << MCSchedClassDesc::InvalidNumMicroOps
        << ", false, false, false, 0, 0,  0, 0,  0, 0},\n";
 
     for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) {
       MCSchedClassDesc &MCDesc = SCTab[SCIdx];
       const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);
-      OS << "  {DBGFIELD(\"" << SchedClass.Name << "\") ";
+      unsigned NameOff = StrTab.GetOrAddStringOffset(SchedClass.Name);
+      OS << "  {DBGFIELD(" << NameOff << ") ";
       if (SchedClass.Name.size() < 18)
         OS.indent(18 - SchedClass.Name.size());
       OS << MCDesc.NumMicroOps << ", " << (MCDesc.BeginGroup ? "true" : "false")
@@ -1495,6 +1501,8 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
     }
     OS << "}; // " << Proc.ModelName << "SchedClasses\n";
   }
+
+  StrTab.EmitStringTableDef(OS, Target + "SchedClassNames");
 }
 
 void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) {
@@ -1548,6 +1556,8 @@ void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) {
     else
       OS << "  nullptr, nullptr, 0, 0,"
          << " // No instruction-level machine model.\n";
+    OS << "  DBGVAL_OR_NULLPTR(&" << Target
+       << "SchedClassNames), // SchedClassNames\n";
     if (PM.hasItineraries())
       OS << "  " << PM.ItinsDef->getName() << ",\n";
     else
@@ -1569,8 +1579,10 @@ void SubtargetEmitter::emitSchedModel(raw_ostream &OS) {
      << "#endif\n"
      << "#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)\n"
      << "#define DBGFIELD(x) x,\n"
+     << "#define DBGVAL_OR_NULLPTR(x) x\n"
      << "#else\n"
      << "#define DBGFIELD(x)\n"
+     << "#define DBGVAL_OR_NULLPTR(x) nullptr\n"
      << "#endif\n";
 
   if (SchedModels.hasItineraries()) {
@@ -1588,10 +1600,11 @@ void SubtargetEmitter::emitSchedModel(raw_ostream &OS) {
   }
   emitSchedClassTables(SchedTables, OS);
 
-  OS << "\n#undef DBGFIELD\n";
-
   // Emit the processor machine model
   emitProcessorModels(OS);
+
+  OS << "\n#undef DBGFIELD\n";
+  OS << "\n#undef DBGVAL_OR_NULLPTR\n";
 }
 
 static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {

>From d9e4f0f02fb1042c333b5b67852e25102f980a7a Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk at google.com>
Date: Fri, 13 Jun 2025 20:29:31 -0700
Subject: [PATCH 2/2] fix tblgen tests

---
 .../TableGen/CompressWriteLatencyEntry.td     |  8 +++----
 llvm/test/TableGen/InvalidMCSchedClassDesc.td | 22 +++++++++----------
 llvm/utils/TableGen/SubtargetEmitter.cpp      |  2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/llvm/test/TableGen/CompressWriteLatencyEntry.td b/llvm/test/TableGen/CompressWriteLatencyEntry.td
index 88273e8858448..d6a9f0ac0dd76 100644
--- a/llvm/test/TableGen/CompressWriteLatencyEntry.td
+++ b/llvm/test/TableGen/CompressWriteLatencyEntry.td
@@ -33,10 +33,10 @@ def Read_D : SchedRead;
 // CHECK-NEXT: }; // MyTargetReadAdvanceTable
 
 // CHECK:  static const llvm::MCSchedClassDesc SchedModel_ASchedClasses[] = {
-// CHECK-NEXT:  {DBGFIELD("InvalidSchedClass")  8191, false, false, false, 0, 0,  0, 0,  0, 0},
-// CHECK-NEXT:  {DBGFIELD("Inst_A")             1, false, false, false,  0, 0,  1, 1,  0, 0}, // #1
-// CHECK-NEXT:  {DBGFIELD("Inst_B")             1, false, false, false,  0, 0,  2, 1,  0, 0}, // #2
-// CHECK-NEXT:  {DBGFIELD("Inst_C")             1, false, false, false,  0, 0,  1, 1,  1, 1}, // #3
+// CHECK-NEXT:  {DBGFIELD(1)  8191, false, false, false, 0, 0,  0, 0,  0, 0},
+// CHECK-NEXT:  {DBGFIELD(/*Inst_A*/ {{[0-9]+}})             1, false, false, false,  0, 0,  1, 1,  0, 0}, // #1
+// CHECK-NEXT:  {DBGFIELD(/*Inst_B*/ {{[0-9]+}})             1, false, false, false,  0, 0,  2, 1,  0, 0}, // #2
+// CHECK-NEXT:  {DBGFIELD(/*Inst_C*/ {{[0-9]+}})             1, false, false, false,  0, 0,  1, 1,  1, 1}, // #3
 // CHECK-NEXT: }; // SchedModel_ASchedClasses
 
 let SchedModel = SchedModel_A in {
diff --git a/llvm/test/TableGen/InvalidMCSchedClassDesc.td b/llvm/test/TableGen/InvalidMCSchedClassDesc.td
index de5392237a84c..e43edd4174589 100644
--- a/llvm/test/TableGen/InvalidMCSchedClassDesc.td
+++ b/llvm/test/TableGen/InvalidMCSchedClassDesc.td
@@ -1,13 +1,13 @@
 // RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s
-// Check if it is valid MCSchedClassDesc if didn't have the resources. 
+// Check if it is valid MCSchedClassDesc if didn't have the resources.
 
 include "llvm/Target/Target.td"
 
 def MyTarget : Target;
 
 let OutOperandList = (outs), InOperandList = (ins) in {
-  def Inst_A : Instruction; 
-  def Inst_B : Instruction; 
+  def Inst_A : Instruction;
+  def Inst_B : Instruction;
 }
 
 let CompleteModel = 0 in {
@@ -18,8 +18,8 @@ let CompleteModel = 0 in {
 
 // Inst_B didn't have the resoures, and it is invalid.
 // CHECK: SchedModel_ASchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A")             1
-// CHECK-NEXT: {DBGFIELD("Inst_B")             8191
+// CHECK: {DBGFIELD(/*Inst_A*/ 19)             1
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26)             8191
 let SchedModel = SchedModel_A in {
   def Write_A : SchedWriteRes<[]>;
   def : InstRW<[Write_A], (instrs Inst_A)>;
@@ -27,18 +27,18 @@ let SchedModel = SchedModel_A in {
 
 // Inst_A didn't have the resoures, and it is invalid.
 // CHECK: SchedModel_BSchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A")             8191
-// CHECK-NEXT: {DBGFIELD("Inst_B")             1 
+// CHECK: {DBGFIELD(/*Inst_A*/ 19)             8191
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26)             1
 let SchedModel = SchedModel_B in {
-  def Write_B: SchedWriteRes<[]>; 
+  def Write_B: SchedWriteRes<[]>;
   def : InstRW<[Write_B], (instrs Inst_B)>;
 }
 
 // CHECK: SchedModel_CSchedClasses[] = {
-// CHECK: {DBGFIELD("Inst_A")             1
-// CHECK-NEXT: {DBGFIELD("Inst_B")             1
+// CHECK: {DBGFIELD(/*Inst_A*/ 19)             1
+// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26)             1
 let SchedModel = SchedModel_C in {
-  def Write_C: SchedWriteRes<[]>; 
+  def Write_C: SchedWriteRes<[]>;
   def : InstRW<[Write_C], (instrs Inst_A, Inst_B)>;
 }
 
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index e58ed7090d114..fb36890f547b2 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1486,7 +1486,7 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
       MCSchedClassDesc &MCDesc = SCTab[SCIdx];
       const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);
       unsigned NameOff = StrTab.GetOrAddStringOffset(SchedClass.Name);
-      OS << "  {DBGFIELD(" << NameOff << ") ";
+      OS << "  {DBGFIELD(/*" << SchedClass.Name << "*/ " << NameOff << ") ";
       if (SchedClass.Name.size() < 18)
         OS.indent(18 - SchedClass.Name.size());
       OS << MCDesc.NumMicroOps << ", " << (MCDesc.BeginGroup ? "true" : "false")



More information about the llvm-commits mailing list