[lld] [lld-macho] Always store symbol name length eagerly (NFC) (PR #106906)

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 1 05:58:03 PDT 2024


https://github.com/BertalanD created https://github.com/llvm/llvm-project/pull/106906

The only instance where we weren't already passing a `StringRef` with a known length to `Symbol`'s constructor is where the argument is a string literal. Even in that case, lazy `strlen` calls don't make sense, as the compiler can constant-evaluate the `StringRef(const char*)` constructor.

For symbols that go into the symbol table we need the length when calculating the hash anyway. We could get away with not calling `getName()` for local symbols, but the total contribution of `strlen` to the run time is already below 1%, so that would just complicate the code for a negligible benefit.

>From b122c2dff5819c5a3b589ce30326784c2435bcc9 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Sun, 1 Sep 2024 14:38:41 +0200
Subject: [PATCH] [lld-macho] Always store symbol name length eagerly (NFC)

The only instance where we weren't already passing a `StringRef` with a
known length to `Symbol`'s constructor is where the argument is a string
literal. Even in that case, lazy `strlen` calls don't make sense, as the
compiler can constant-evaluate the `StringRef(const char*)` constructor.

For symbols that go into the symbol table we need the length when
calculating the hash anyway. We could get away with not calling
`getName()` for local symbols, but the total contribution of `strlen` to
the run time is already below 1%, so that would just complicate the code
for a negligible benefit.
---
 lld/MachO/Symbols.cpp |  2 +-
 lld/MachO/Symbols.h   | 29 +++++++++--------------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 7bac78a59e4fec..f52da4f48aafba 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -52,7 +52,7 @@ uint64_t Symbol::getLazyPtrVA() const {
 uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
 uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
 
-Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
+Defined::Defined(StringRef name, InputFile *file, InputSection *isec,
                  uint64_t value, uint64_t size, bool isWeakDef, bool isExternal,
                  bool isPrivateExtern, bool includeInSymtab,
                  bool isReferencedDynamically, bool noDeadStrip,
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index be5d135b8cd54f..bd60226f38ad30 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -21,14 +21,6 @@ namespace macho {
 
 class MachHeaderSection;
 
-struct StringRefZ {
-  StringRefZ(const char *s) : data(s), size(-1) {}
-  StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
-
-  const char *data;
-  const uint32_t size;
-};
-
 class Symbol {
 public:
   enum Kind {
@@ -45,11 +37,7 @@ class Symbol {
 
   Kind kind() const { return symbolKind; }
 
-  StringRef getName() const {
-    if (nameSize == (uint32_t)-1)
-      nameSize = strlen(nameData);
-    return {nameData, nameSize};
-  }
+  StringRef getName() const { return {nameData, nameSize}; }
 
   bool isLive() const { return used; }
   bool isLazy() const {
@@ -96,15 +84,15 @@ class Symbol {
   InputFile *getFile() const { return file; }
 
 protected:
-  Symbol(Kind k, StringRefZ name, InputFile *file)
-      : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
+  Symbol(Kind k, StringRef name, InputFile *file)
+      : symbolKind(k), nameData(name.data()), file(file), nameSize(name.size()),
         isUsedInRegularObj(!file || isa<ObjFile>(file)),
         used(!config->deadStrip) {}
 
   Kind symbolKind;
   const char *nameData;
   InputFile *file;
-  mutable uint32_t nameSize;
+  uint32_t nameSize;
 
 public:
   // True if this symbol was referenced by a regular (non-bitcode) object.
@@ -116,7 +104,7 @@ class Symbol {
 
 class Defined : public Symbol {
 public:
-  Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
+  Defined(StringRef name, InputFile *file, InputSection *isec, uint64_t value,
           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
           bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
@@ -206,7 +194,7 @@ enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
 
 class Undefined : public Symbol {
 public:
-  Undefined(StringRefZ name, InputFile *file, RefState refState,
+  Undefined(StringRef name, InputFile *file, RefState refState,
             bool wasBitcodeSymbol)
       : Symbol(UndefinedKind, name, file), refState(refState),
         wasBitcodeSymbol(wasBitcodeSymbol) {
@@ -238,7 +226,7 @@ class Undefined : public Symbol {
 // to regular defined symbols in a __common section.
 class CommonSymbol : public Symbol {
 public:
-  CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
+  CommonSymbol(StringRef name, InputFile *file, uint64_t size, uint32_t align,
                bool isPrivateExtern)
       : Symbol(CommonKind, name, file), size(size),
         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
@@ -255,7 +243,7 @@ class CommonSymbol : public Symbol {
 
 class DylibSymbol : public Symbol {
 public:
-  DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
+  DylibSymbol(DylibFile *file, StringRef name, bool isWeakDef,
               RefState refState, bool isTlv)
       : Symbol(DylibKind, name, file), shouldReexport(false),
         refState(refState), weakDef(isWeakDef), tlv(isTlv) {
@@ -301,6 +289,7 @@ class DylibSymbol : public Symbol {
   }
 
   bool shouldReexport : 1;
+
 private:
   RefState refState : 2;
   const bool weakDef : 1;



More information about the llvm-commits mailing list