[lld] 77152a6 - [LLD][ELF] Optimize linker script filename glob pattern matching NFC

Andrew Ng via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 16 02:27:48 PDT 2020


Author: Andrew Ng
Date: 2020-09-16T10:26:11+01:00
New Revision: 77152a6b7ac07ce65568d7c69305653e7cad4bb0

URL: https://github.com/llvm/llvm-project/commit/77152a6b7ac07ce65568d7c69305653e7cad4bb0
DIFF: https://github.com/llvm/llvm-project/commit/77152a6b7ac07ce65568d7c69305653e7cad4bb0.diff

LOG: [LLD][ELF] Optimize linker script filename glob pattern matching NFC

Optimize the filename glob pattern matching in
LinkerScript::computeInputSections() and LinkerScript::shouldKeep().

Add InputFile::getNameForScript() which gets and if required caches the
Inputfile's name used for linker script matching. This avoids the
overhead of name creation that was in getFilename() in LinkerScript.cpp.

Add InputSectionDescription::matchesFile() and
SectionPattern::excludesFile() which perform the glob pattern matching
for an InputFile and make use of a cache of the previous result. As both
computeInputSections() and shouldKeep() process sections in order and
the sections of the same InputFile are contiguous, these single entry
caches can significantly speed up performance for more complex glob
patterns.

These changes have been seen to reduce link time with --gc-sections by
up to ~40% with linker scripts that contain KEEP filename glob patterns
such as "*crtbegin*.o".

Differential Revision: https://reviews.llvm.org/D87469

Added: 
    

Modified: 
    lld/ELF/AArch64ErrataFix.h
    lld/ELF/ARMErrataFix.h
    lld/ELF/InputFiles.cpp
    lld/ELF/InputFiles.h
    lld/ELF/LinkerScript.cpp
    lld/ELF/LinkerScript.h
    lld/ELF/Relocations.h
    lld/include/lld/Common/Strings.h

Removed: 
    


################################################################################
diff  --git a/lld/ELF/AArch64ErrataFix.h b/lld/ELF/AArch64ErrataFix.h
index 0548b58751ff9..dfe57b95dd996 100644
--- a/lld/ELF/AArch64ErrataFix.h
+++ b/lld/ELF/AArch64ErrataFix.h
@@ -18,7 +18,7 @@ namespace elf {
 
 class Defined;
 class InputSection;
-struct InputSectionDescription;
+class InputSectionDescription;
 class OutputSection;
 class Patch843419Section;
 

diff  --git a/lld/ELF/ARMErrataFix.h b/lld/ELF/ARMErrataFix.h
index 5a39bcc75cd3b..a93609b35bafc 100644
--- a/lld/ELF/ARMErrataFix.h
+++ b/lld/ELF/ARMErrataFix.h
@@ -19,7 +19,7 @@ namespace elf {
 
 class Defined;
 class InputSection;
-struct InputSectionDescription;
+class InputSectionDescription;
 class OutputSection;
 class Patch657417Section;
 

diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 63474b15e451e..bd079b41ac908 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -274,6 +274,16 @@ std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec,
   }
 }
 
+StringRef InputFile::getNameForScript() const {
+  if (archiveName.empty())
+    return getName();
+
+  if (nameForScriptCache.empty())
+    nameForScriptCache = (archiveName + Twine(':') + getName()).str();
+
+  return nameForScriptCache;
+}
+
 template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() {
   llvm::call_once(initDwarf, [this]() {
     dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(

diff  --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 7af85e417ca58..b1c83ddf384fb 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -92,6 +92,9 @@ class InputFile {
     return symbols;
   }
 
+  // Get filename to use for linker script processing.
+  StringRef getNameForScript() const;
+
   // Filename of .a which contained this file. If this file was
   // not in an archive file, it is the empty string. We use this
   // string for creating error messages.
@@ -147,6 +150,9 @@ class InputFile {
 
 private:
   const Kind fileKind;
+
+  // Cache for getNameForScript().
+  mutable std::string nameForScriptCache;
 };
 
 class ELFFileBase : public InputFile {

diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 11f0fc9d5fbe2..ba51a8b402fd1 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -320,20 +320,33 @@ void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) {
   cmd->sym->type = v.type;
 }
 
-static std::string getFilename(InputFile *file) {
-  if (!file)
-    return "";
-  if (file->archiveName.empty())
-    return std::string(file->getName());
-  return (file->archiveName + ':' + file->getName()).str();
+static inline StringRef getFilename(const InputFile *file) {
+  return file ? file->getNameForScript() : StringRef();
 }
 
-bool LinkerScript::shouldKeep(InputSectionBase *s) {
-  if (keptSections.empty())
+bool InputSectionDescription::matchesFile(const InputFile *file) const {
+  if (filePat.isTrivialMatchAll())
+    return true;
+
+  if (!matchesFileCache || matchesFileCache->first != file)
+    matchesFileCache.emplace(file, filePat.match(getFilename(file)));
+
+  return matchesFileCache->second;
+}
+
+bool SectionPattern::excludesFile(const InputFile *file) const {
+  if (excludedFilePat.empty())
     return false;
-  std::string filename = getFilename(s->file);
+
+  if (!excludesFileCache || excludesFileCache->first != file)
+    excludesFileCache.emplace(file, excludedFilePat.match(getFilename(file)));
+
+  return excludesFileCache->second;
+}
+
+bool LinkerScript::shouldKeep(InputSectionBase *s) {
   for (InputSectionDescription *id : keptSections)
-    if (id->filePat.match(filename))
+    if (id->matchesFile(s->file))
       for (SectionPattern &p : id->sectionPatterns)
         if (p.sectionPat.match(s->name) &&
             (s->flags & id->withFlags) == id->withFlags &&
@@ -433,9 +446,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd,
       if (!pat.sectionPat.match(sec->name))
         continue;
 
-      std::string filename = getFilename(sec->file);
-      if (!cmd->filePat.match(filename) ||
-          pat.excludedFilePat.match(filename) ||
+      if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) ||
           (sec->flags & cmd->withFlags) != cmd->withFlags ||
           (sec->flags & cmd->withoutFlags) != 0)
         continue;

diff  --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 4a1a5fd71b67f..efa473f45e308 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -29,6 +29,7 @@ namespace lld {
 namespace elf {
 
 class Defined;
+class InputFile;
 class InputSection;
 class InputSectionBase;
 class OutputSection;
@@ -146,19 +147,32 @@ struct MemoryRegion {
 // This struct represents one section match pattern in SECTIONS() command.
 // It can optionally have negative match pattern for EXCLUDED_FILE command.
 // Also it may be surrounded with SORT() command, so contains sorting rules.
-struct SectionPattern {
+class SectionPattern {
+  StringMatcher excludedFilePat;
+
+  // Cache of the most recent input argument and result of excludesFile().
+  mutable llvm::Optional<std::pair<const InputFile *, bool>> excludesFileCache;
+
+public:
   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
       : excludedFilePat(pat1), sectionPat(pat2),
         sortOuter(SortSectionPolicy::Default),
         sortInner(SortSectionPolicy::Default) {}
 
-  StringMatcher excludedFilePat;
+  bool excludesFile(const InputFile *file) const;
+
   StringMatcher sectionPat;
   SortSectionPolicy sortOuter;
   SortSectionPolicy sortInner;
 };
 
-struct InputSectionDescription : BaseCommand {
+class InputSectionDescription : public BaseCommand {
+  SingleStringMatcher filePat;
+
+  // Cache of the most recent input argument and result of matchesFile().
+  mutable llvm::Optional<std::pair<const InputFile *, bool>> matchesFileCache;
+
+public:
   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
                           uint64_t withoutFlags = 0)
       : BaseCommand(InputSectionKind), filePat(filePattern),
@@ -168,7 +182,7 @@ struct InputSectionDescription : BaseCommand {
     return c->kind == InputSectionKind;
   }
 
-  SingleStringMatcher filePat;
+  bool matchesFile(const InputFile *file) const;
 
   // Input sections that matches at least one of SectionPatterns
   // will be associated with this InputSectionDescription.

diff  --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 4f48082b8be9d..fccd56880718a 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -131,7 +131,7 @@ bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
 
 class ThunkSection;
 class Thunk;
-struct InputSectionDescription;
+class InputSectionDescription;
 
 class ThunkCreator {
 public:

diff  --git a/lld/include/lld/Common/Strings.h b/lld/include/lld/Common/Strings.h
index 3940d2443cd45..38d93e01c0b95 100644
--- a/lld/include/lld/Common/Strings.h
+++ b/lld/include/lld/Common/Strings.h
@@ -39,6 +39,11 @@ class SingleStringMatcher {
   // Match s against this pattern, exactly if ExactMatch is true.
   bool match(llvm::StringRef s) const;
 
+  // Returns true for pattern "*" which will match all inputs.
+  bool isTrivialMatchAll() const {
+    return !ExactMatch && GlobPatternMatcher.isTrivialMatchAll();
+  }
+
 private:
   // Whether to do an exact match irregardless of the presence of wildcard
   // character.
@@ -69,7 +74,7 @@ class StringMatcher {
   // Add a new pattern to the existing ones to match against.
   void addPattern(SingleStringMatcher Matcher) { patterns.push_back(Matcher); }
 
-  bool empty() { return patterns.empty(); }
+  bool empty() const { return patterns.empty(); }
 
   // Match s against the patterns.
   bool match(llvm::StringRef s) const;


        


More information about the llvm-commits mailing list