[lld] r280544 - [ELF] - Use std::regex instead of hand written logic in elf::globMatch()

George Rimar via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 2 14:17:20 PDT 2016


Author: grimar
Date: Fri Sep  2 16:17:20 2016
New Revision: 280544

URL: http://llvm.org/viewvc/llvm-project?rev=280544&view=rev
Log:
[ELF] - Use std::regex instead of hand written logic in elf::globMatch()

Use std::regex instead of hand written matcher.

Patch based on code and ideas of Rui Ueyama.

Differential revision: https://reviews.llvm.org/D23829

Added:
    lld/trunk/test/ELF/wildcards2.s
Modified:
    lld/trunk/ELF/LinkerScript.cpp
    lld/trunk/ELF/LinkerScript.h
    lld/trunk/ELF/Strings.cpp
    lld/trunk/ELF/Strings.h
    lld/trunk/ELF/SymbolTable.cpp
    lld/trunk/ELF/SymbolTable.h

Modified: lld/trunk/ELF/LinkerScript.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=280544&r1=280543&r2=280544&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.cpp (original)
+++ lld/trunk/ELF/LinkerScript.cpp Fri Sep  2 16:17:20 2016
@@ -92,44 +92,35 @@ template <class ELFT> LinkerScript<ELFT>
 
 template <class ELFT>
 bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
-  for (StringRef Pat : Opt.KeptSections)
-    if (globMatch(Pat, S->getSectionName()))
-      return true;
-  return false;
-}
-
-static bool match(ArrayRef<StringRef> Patterns, StringRef S) {
-  for (StringRef Pat : Patterns)
-    if (globMatch(Pat, S))
+  for (Regex *Re : Opt.KeptSections)
+    if (Re->match(S->getSectionName()))
       return true;
   return false;
 }
 
 static bool fileMatches(const InputSectionDescription *Desc,
                         StringRef Filename) {
-  if (!globMatch(Desc->FilePattern, Filename))
-    return false;
-  return Desc->ExcludedFiles.empty() || !match(Desc->ExcludedFiles, Filename);
+  return const_cast<Regex &>(Desc->FileRe).match(Filename) &&
+         !const_cast<Regex &>(Desc->ExcludedFileRe).match(Filename);
 }
 
 // Returns input sections filtered by given glob patterns.
 template <class ELFT>
 std::vector<InputSectionBase<ELFT> *>
 LinkerScript<ELFT>::getInputSections(const InputSectionDescription *I) {
-  ArrayRef<StringRef> Patterns = I->SectionPatterns;
+  const Regex &Re = I->SectionRe;
   std::vector<InputSectionBase<ELFT> *> Ret;
   for (const std::unique_ptr<ObjectFile<ELFT>> &F :
        Symtab<ELFT>::X->getObjectFiles()) {
     if (fileMatches(I, sys::path::filename(F->getName())))
       for (InputSectionBase<ELFT> *S : F->getSections())
         if (!isDiscarded(S) && !S->OutSec &&
-            match(Patterns, S->getSectionName()))
+            const_cast<Regex &>(Re).match(S->getSectionName()))
           Ret.push_back(S);
   }
 
-  if (llvm::find(Patterns, "COMMON") != Patterns.end())
+  if (const_cast<Regex &>(Re).match("COMMON"))
     Ret.push_back(CommonInputSection<ELFT>::X);
-
   return Ret;
 }
 
@@ -634,7 +625,7 @@ private:
   std::vector<uint8_t> readOutputSectionFiller();
   std::vector<StringRef> readOutputSectionPhdrs();
   InputSectionDescription *readInputSectionDescription(StringRef Tok);
-  std::vector<StringRef> readInputFilePatterns();
+  Regex readFilePatterns();
   InputSectionDescription *readInputSectionRules(StringRef FilePattern);
   unsigned readPhdrType();
   SortKind readSortKind();
@@ -908,11 +899,11 @@ static int precedence(StringRef Op) {
       .Default(-1);
 }
 
-std::vector<StringRef> ScriptParser::readInputFilePatterns() {
+Regex ScriptParser::readFilePatterns() {
   std::vector<StringRef> V;
   while (!Error && !skip(")"))
     V.push_back(next());
-  return V;
+  return compileGlobPatterns(V);
 }
 
 SortKind ScriptParser::readSortKind() {
@@ -925,15 +916,13 @@ SortKind ScriptParser::readSortKind() {
 
 InputSectionDescription *
 ScriptParser::readInputSectionRules(StringRef FilePattern) {
-  auto *Cmd = new InputSectionDescription;
-  Cmd->FilePattern = FilePattern;
+  auto *Cmd = new InputSectionDescription(FilePattern);
   expect("(");
 
   // Read EXCLUDE_FILE().
   if (skip("EXCLUDE_FILE")) {
     expect("(");
-    while (!Error && !skip(")"))
-      Cmd->ExcludedFiles.push_back(next());
+    Cmd->ExcludedFileRe = readFilePatterns();
   }
 
   // Read SORT().
@@ -943,16 +932,16 @@ ScriptParser::readInputSectionRules(Stri
     if (SortKind K2 = readSortKind()) {
       Cmd->SortInner = K2;
       expect("(");
-      Cmd->SectionPatterns = readInputFilePatterns();
+      Cmd->SectionRe = readFilePatterns();
       expect(")");
     } else {
-      Cmd->SectionPatterns = readInputFilePatterns();
+      Cmd->SectionRe = readFilePatterns();
     }
     expect(")");
     return Cmd;
   }
 
-  Cmd->SectionPatterns = readInputFilePatterns();
+  Cmd->SectionRe = readFilePatterns();
   return Cmd;
 }
 
@@ -965,9 +954,7 @@ ScriptParser::readInputSectionDescriptio
     StringRef FilePattern = next();
     InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
     expect(")");
-    Opt.KeptSections.insert(Opt.KeptSections.end(),
-                            Cmd->SectionPatterns.begin(),
-                            Cmd->SectionPatterns.end());
+    Opt.KeptSections.push_back(&Cmd->SectionRe);
     return Cmd;
   }
   return readInputSectionRules(Tok);

Modified: lld/trunk/ELF/LinkerScript.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.h?rev=280544&r1=280543&r2=280544&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.h (original)
+++ lld/trunk/ELF/LinkerScript.h Fri Sep  2 16:17:20 2016
@@ -10,12 +10,14 @@
 #ifndef LLD_ELF_LINKER_SCRIPT_H
 #define LLD_ELF_LINKER_SCRIPT_H
 
+#include "Strings.h"
 #include "Writer.h"
 #include "lld/Core/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
 #include <functional>
 
 namespace lld {
@@ -94,13 +96,15 @@ struct OutputSectionCommand : BaseComman
 enum SortKind { SortNone, SortByName, SortByAlignment };
 
 struct InputSectionDescription : BaseCommand {
-  InputSectionDescription() : BaseCommand(InputSectionKind) {}
+  InputSectionDescription(StringRef FilePattern)
+      : BaseCommand(InputSectionKind),
+        FileRe(compileGlobPatterns({FilePattern})) {}
   static bool classof(const BaseCommand *C);
-  StringRef FilePattern;
+  llvm::Regex FileRe;
   SortKind SortOuter = SortNone;
   SortKind SortInner = SortNone;
-  std::vector<StringRef> ExcludedFiles;
-  std::vector<StringRef> SectionPatterns;
+  llvm::Regex ExcludedFileRe;
+  llvm::Regex SectionRe;
 };
 
 struct AssertCommand : BaseCommand {
@@ -133,7 +137,7 @@ struct ScriptConfiguration {
 
   // List of section patterns specified with KEEP commands. They will
   // be kept even if they are unused and --gc-sections is specified.
-  std::vector<StringRef> KeptSections;
+  std::vector<llvm::Regex *> KeptSections;
 };
 
 extern ScriptConfiguration *ScriptConfig;

Modified: lld/trunk/ELF/Strings.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.cpp?rev=280544&r1=280543&r2=280544&view=diff
==============================================================================
--- lld/trunk/ELF/Strings.cpp (original)
+++ lld/trunk/ELF/Strings.cpp Fri Sep  2 16:17:20 2016
@@ -9,6 +9,7 @@
 
 #include "Strings.h"
 #include "Error.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h"
@@ -22,28 +23,36 @@ using namespace llvm;
 using namespace lld;
 using namespace lld::elf;
 
-// Returns true if S matches T. S can contain glob meta-characters.
-// The asterisk ('*') matches zero or more characters, and the question
-// mark ('?') matches one character.
-bool elf::globMatch(StringRef S, StringRef T) {
-  for (;;) {
-    if (S.empty())
-      return T.empty();
-    if (S[0] == '*') {
-      S = S.substr(1);
-      if (S.empty())
-        // Fast path. If a pattern is '*', it matches anything.
-        return true;
-      for (size_t I = 0, E = T.size(); I < E; ++I)
-        if (globMatch(S, T.substr(I)))
-          return true;
-      return false;
-    }
-    if (T.empty() || (S[0] != T[0] && S[0] != '?'))
-      return false;
+bool elf::hasWildcard(StringRef S) {
+  return S.find_first_of("?*") != StringRef::npos;
+}
+
+static std::string toRegex(StringRef S) {
+  if (S.find_first_of("[]") != StringRef::npos)
+    warning("unsupported wildcard: " + S);
+
+  std::string T;
+  while (!S.empty()) {
+    char C = S.front();
+    if (C == '*')
+      T += ".*";
+    else if (C == '?')
+      T += '.';
+    else if (StringRef(".+^${}()|/\\[]").find_first_of(C) != StringRef::npos)
+      T += std::string("\\") + C;
+    else
+      T += C;
     S = S.substr(1);
-    T = T.substr(1);
   }
+  return T;
+}
+
+// Takes multiple glob patterns and converts them into regex object.
+Regex elf::compileGlobPatterns(ArrayRef<StringRef> V) {
+  std::string T = "^(" + toRegex(V[0]);
+  for (StringRef S : V.slice(1))
+    T += "|" + toRegex(S);
+  return Regex(T + ")$");
 }
 
 // Converts a hex string (e.g. "deadbeef") to a vector.

Modified: lld/trunk/ELF/Strings.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.h?rev=280544&r1=280543&r2=280544&view=diff
==============================================================================
--- lld/trunk/ELF/Strings.h (original)
+++ lld/trunk/ELF/Strings.h Fri Sep  2 16:17:20 2016
@@ -11,11 +11,13 @@
 #define LLD_COFF_STRINGS_H
 
 #include "lld/Core/LLVM.h"
+#include "llvm/Support/Regex.h"
 #include <vector>
 
 namespace lld {
 namespace elf {
-bool globMatch(StringRef S, StringRef T);
+llvm::Regex compileGlobPatterns(ArrayRef<StringRef> V);
+bool hasWildcard(StringRef S);
 std::vector<uint8_t> parseHex(StringRef S);
 bool isValidCIdentifier(StringRef S);
 

Modified: lld/trunk/ELF/SymbolTable.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.cpp?rev=280544&r1=280543&r2=280544&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.cpp (original)
+++ lld/trunk/ELF/SymbolTable.cpp Fri Sep  2 16:17:20 2016
@@ -483,13 +483,14 @@ template <class ELFT> SymbolBody *Symbol
   return SymVector[V.Idx]->body();
 }
 
-// Returns a list of defined symbols that match with a given glob pattern.
+// Returns a list of defined symbols that match with a given regex.
 template <class ELFT>
-std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(StringRef Pattern) {
+std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const Regex &Re) {
   std::vector<SymbolBody *> Res;
   for (Symbol *Sym : SymVector) {
     SymbolBody *B = Sym->body();
-    if (!B->isUndefined() && globMatch(Pattern, B->getName()))
+    StringRef Name = B->getName();
+    if (!B->isUndefined() && const_cast<Regex &>(Re).match(Name))
       Res.push_back(B);
   }
   return Res;
@@ -578,10 +579,6 @@ template <class ELFT> void SymbolTable<E
       B->symbol()->ExportDynamic = true;
 }
 
-static bool hasWildcard(StringRef S) {
-  return S.find_first_of("?*") != StringRef::npos;
-}
-
 static void setVersionId(SymbolBody *Body, StringRef VersionName,
                          StringRef Name, uint16_t Version) {
   if (!Body || Body->isUndefined()) {
@@ -625,11 +622,11 @@ static SymbolBody *findDemangled(const s
 
 static std::vector<SymbolBody *>
 findAllDemangled(const std::map<std::string, SymbolBody *> &D,
-                 StringRef Pattern) {
+                 const Regex &Re) {
   std::vector<SymbolBody *> Res;
   for (auto &P : D) {
     SymbolBody *Body = P.second;
-    if (!Body->isUndefined() && globMatch(Pattern, P.first))
+    if (!Body->isUndefined() && const_cast<Regex &>(Re).match(P.first))
       Res.push_back(Body);
   }
   return Res;
@@ -682,8 +679,9 @@ template <class ELFT> void SymbolTable<E
       if (!hasWildcard(Sym.Name))
         continue;
       std::vector<SymbolBody *> All =
-          Sym.IsExternCpp ? findAllDemangled(Demangled, Sym.Name)
-                          : findAll(Sym.Name);
+          Sym.IsExternCpp
+              ? findAllDemangled(Demangled, compileGlobPatterns({Sym.Name}))
+              : findAll(compileGlobPatterns({Sym.Name}));
 
       for (SymbolBody *B : All)
         if (B->symbol()->VersionId == Config->DefaultSymbolVersion)

Modified: lld/trunk/ELF/SymbolTable.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.h?rev=280544&r1=280543&r2=280544&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.h (original)
+++ lld/trunk/ELF/SymbolTable.h Fri Sep  2 16:17:20 2016
@@ -13,6 +13,7 @@
 #include "InputFiles.h"
 #include "LTO.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Regex.h"
 
 namespace lld {
 namespace elf {
@@ -91,7 +92,7 @@ public:
   void wrap(StringRef Name);
 
 private:
-  std::vector<SymbolBody *> findAll(StringRef Pattern);
+  std::vector<SymbolBody *> findAll(const llvm::Regex &Re);
   std::pair<Symbol *, bool> insert(StringRef &Name);
   std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type,
                                    uint8_t Visibility, bool CanOmitFromDynSym,

Added: lld/trunk/test/ELF/wildcards2.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/wildcards2.s?rev=280544&view=auto
==============================================================================
--- lld/trunk/test/ELF/wildcards2.s (added)
+++ lld/trunk/test/ELF/wildcards2.s Fri Sep  2 16:17:20 2016
@@ -0,0 +1,25 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+## Check that aabc is not included in text.
+# RUN: echo "SECTIONS { \
+# RUN:      .text : { *(.abc) } }" > %t.script
+# RUN: ld.lld -o %t.out --script %t.script %t
+# RUN: llvm-objdump -section-headers %t.out | \
+# RUN:   FileCheck %s
+# CHECK:      Sections:
+# CHECK-NEXT:  Idx Name          Size      Address          Type
+# CHECK-NEXT:    0               00000000 0000000000000000
+# CHECK-NEXT:    1 .text         00000004 0000000000000120 TEXT DATA
+# CHECK-NEXT:    2 aabc          00000004 0000000000000124 TEXT DATA
+
+.text
+.section .abc,"ax", at progbits
+.long 0
+
+.text
+.section aabc,"ax", at progbits
+.long 0
+
+.globl _start
+_start:




More information about the llvm-commits mailing list