[lld] r280544 - [ELF] - Use std::regex instead of hand written logic in elf::globMatch()

Anton Korobeynikov via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 4 01:23:44 PDT 2016


Does this mean that gcc 4.9 is necessary to build lld now? Have you
updated the docs?

On Sat, Sep 3, 2016 at 12:17 AM, George Rimar via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: grimar
> Date: Fri Sep  2 16:17:20 2016
> New Revision: 280544
>
> URL: http://llvm.org/viewvc/llvm-project?rev=280544&view=rev
> Log:
> [ELF] - Use std::regex instead of hand written logic in elf::globMatch()
>
> Use std::regex instead of hand written matcher.
>
> Patch based on code and ideas of Rui Ueyama.
>
> Differential revision: https://reviews.llvm.org/D23829
>
> Added:
>     lld/trunk/test/ELF/wildcards2.s
> Modified:
>     lld/trunk/ELF/LinkerScript.cpp
>     lld/trunk/ELF/LinkerScript.h
>     lld/trunk/ELF/Strings.cpp
>     lld/trunk/ELF/Strings.h
>     lld/trunk/ELF/SymbolTable.cpp
>     lld/trunk/ELF/SymbolTable.h
>
> Modified: lld/trunk/ELF/LinkerScript.cpp
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=280544&r1=280543&r2=280544&view=diff
> ==============================================================================
> --- lld/trunk/ELF/LinkerScript.cpp (original)
> +++ lld/trunk/ELF/LinkerScript.cpp Fri Sep  2 16:17:20 2016
> @@ -92,44 +92,35 @@ template <class ELFT> LinkerScript<ELFT>
>
>  template <class ELFT>
>  bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
> -  for (StringRef Pat : Opt.KeptSections)
> -    if (globMatch(Pat, S->getSectionName()))
> -      return true;
> -  return false;
> -}
> -
> -static bool match(ArrayRef<StringRef> Patterns, StringRef S) {
> -  for (StringRef Pat : Patterns)
> -    if (globMatch(Pat, S))
> +  for (Regex *Re : Opt.KeptSections)
> +    if (Re->match(S->getSectionName()))
>        return true;
>    return false;
>  }
>
>  static bool fileMatches(const InputSectionDescription *Desc,
>                          StringRef Filename) {
> -  if (!globMatch(Desc->FilePattern, Filename))
> -    return false;
> -  return Desc->ExcludedFiles.empty() || !match(Desc->ExcludedFiles, Filename);
> +  return const_cast<Regex &>(Desc->FileRe).match(Filename) &&
> +         !const_cast<Regex &>(Desc->ExcludedFileRe).match(Filename);
>  }
>
>  // Returns input sections filtered by given glob patterns.
>  template <class ELFT>
>  std::vector<InputSectionBase<ELFT> *>
>  LinkerScript<ELFT>::getInputSections(const InputSectionDescription *I) {
> -  ArrayRef<StringRef> Patterns = I->SectionPatterns;
> +  const Regex &Re = I->SectionRe;
>    std::vector<InputSectionBase<ELFT> *> Ret;
>    for (const std::unique_ptr<ObjectFile<ELFT>> &F :
>         Symtab<ELFT>::X->getObjectFiles()) {
>      if (fileMatches(I, sys::path::filename(F->getName())))
>        for (InputSectionBase<ELFT> *S : F->getSections())
>          if (!isDiscarded(S) && !S->OutSec &&
> -            match(Patterns, S->getSectionName()))
> +            const_cast<Regex &>(Re).match(S->getSectionName()))
>            Ret.push_back(S);
>    }
>
> -  if (llvm::find(Patterns, "COMMON") != Patterns.end())
> +  if (const_cast<Regex &>(Re).match("COMMON"))
>      Ret.push_back(CommonInputSection<ELFT>::X);
> -
>    return Ret;
>  }
>
> @@ -634,7 +625,7 @@ private:
>    std::vector<uint8_t> readOutputSectionFiller();
>    std::vector<StringRef> readOutputSectionPhdrs();
>    InputSectionDescription *readInputSectionDescription(StringRef Tok);
> -  std::vector<StringRef> readInputFilePatterns();
> +  Regex readFilePatterns();
>    InputSectionDescription *readInputSectionRules(StringRef FilePattern);
>    unsigned readPhdrType();
>    SortKind readSortKind();
> @@ -908,11 +899,11 @@ static int precedence(StringRef Op) {
>        .Default(-1);
>  }
>
> -std::vector<StringRef> ScriptParser::readInputFilePatterns() {
> +Regex ScriptParser::readFilePatterns() {
>    std::vector<StringRef> V;
>    while (!Error && !skip(")"))
>      V.push_back(next());
> -  return V;
> +  return compileGlobPatterns(V);
>  }
>
>  SortKind ScriptParser::readSortKind() {
> @@ -925,15 +916,13 @@ SortKind ScriptParser::readSortKind() {
>
>  InputSectionDescription *
>  ScriptParser::readInputSectionRules(StringRef FilePattern) {
> -  auto *Cmd = new InputSectionDescription;
> -  Cmd->FilePattern = FilePattern;
> +  auto *Cmd = new InputSectionDescription(FilePattern);
>    expect("(");
>
>    // Read EXCLUDE_FILE().
>    if (skip("EXCLUDE_FILE")) {
>      expect("(");
> -    while (!Error && !skip(")"))
> -      Cmd->ExcludedFiles.push_back(next());
> +    Cmd->ExcludedFileRe = readFilePatterns();
>    }
>
>    // Read SORT().
> @@ -943,16 +932,16 @@ ScriptParser::readInputSectionRules(Stri
>      if (SortKind K2 = readSortKind()) {
>        Cmd->SortInner = K2;
>        expect("(");
> -      Cmd->SectionPatterns = readInputFilePatterns();
> +      Cmd->SectionRe = readFilePatterns();
>        expect(")");
>      } else {
> -      Cmd->SectionPatterns = readInputFilePatterns();
> +      Cmd->SectionRe = readFilePatterns();
>      }
>      expect(")");
>      return Cmd;
>    }
>
> -  Cmd->SectionPatterns = readInputFilePatterns();
> +  Cmd->SectionRe = readFilePatterns();
>    return Cmd;
>  }
>
> @@ -965,9 +954,7 @@ ScriptParser::readInputSectionDescriptio
>      StringRef FilePattern = next();
>      InputSectionDescription *Cmd = readInputSectionRules(FilePattern);
>      expect(")");
> -    Opt.KeptSections.insert(Opt.KeptSections.end(),
> -                            Cmd->SectionPatterns.begin(),
> -                            Cmd->SectionPatterns.end());
> +    Opt.KeptSections.push_back(&Cmd->SectionRe);
>      return Cmd;
>    }
>    return readInputSectionRules(Tok);
>
> Modified: lld/trunk/ELF/LinkerScript.h
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.h?rev=280544&r1=280543&r2=280544&view=diff
> ==============================================================================
> --- lld/trunk/ELF/LinkerScript.h (original)
> +++ lld/trunk/ELF/LinkerScript.h Fri Sep  2 16:17:20 2016
> @@ -10,12 +10,14 @@
>  #ifndef LLD_ELF_LINKER_SCRIPT_H
>  #define LLD_ELF_LINKER_SCRIPT_H
>
> +#include "Strings.h"
>  #include "Writer.h"
>  #include "lld/Core/LLVM.h"
>  #include "llvm/ADT/DenseMap.h"
>  #include "llvm/ADT/MapVector.h"
>  #include "llvm/Support/Allocator.h"
>  #include "llvm/Support/MemoryBuffer.h"
> +#include "llvm/Support/Regex.h"
>  #include <functional>
>
>  namespace lld {
> @@ -94,13 +96,15 @@ struct OutputSectionCommand : BaseComman
>  enum SortKind { SortNone, SortByName, SortByAlignment };
>
>  struct InputSectionDescription : BaseCommand {
> -  InputSectionDescription() : BaseCommand(InputSectionKind) {}
> +  InputSectionDescription(StringRef FilePattern)
> +      : BaseCommand(InputSectionKind),
> +        FileRe(compileGlobPatterns({FilePattern})) {}
>    static bool classof(const BaseCommand *C);
> -  StringRef FilePattern;
> +  llvm::Regex FileRe;
>    SortKind SortOuter = SortNone;
>    SortKind SortInner = SortNone;
> -  std::vector<StringRef> ExcludedFiles;
> -  std::vector<StringRef> SectionPatterns;
> +  llvm::Regex ExcludedFileRe;
> +  llvm::Regex SectionRe;
>  };
>
>  struct AssertCommand : BaseCommand {
> @@ -133,7 +137,7 @@ struct ScriptConfiguration {
>
>    // List of section patterns specified with KEEP commands. They will
>    // be kept even if they are unused and --gc-sections is specified.
> -  std::vector<StringRef> KeptSections;
> +  std::vector<llvm::Regex *> KeptSections;
>  };
>
>  extern ScriptConfiguration *ScriptConfig;
>
> Modified: lld/trunk/ELF/Strings.cpp
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.cpp?rev=280544&r1=280543&r2=280544&view=diff
> ==============================================================================
> --- lld/trunk/ELF/Strings.cpp (original)
> +++ lld/trunk/ELF/Strings.cpp Fri Sep  2 16:17:20 2016
> @@ -9,6 +9,7 @@
>
>  #include "Strings.h"
>  #include "Error.h"
> +#include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/StringRef.h"
>  #include "llvm/ADT/Twine.h"
>  #include "llvm/Config/config.h"
> @@ -22,28 +23,36 @@ using namespace llvm;
>  using namespace lld;
>  using namespace lld::elf;
>
> -// Returns true if S matches T. S can contain glob meta-characters.
> -// The asterisk ('*') matches zero or more characters, and the question
> -// mark ('?') matches one character.
> -bool elf::globMatch(StringRef S, StringRef T) {
> -  for (;;) {
> -    if (S.empty())
> -      return T.empty();
> -    if (S[0] == '*') {
> -      S = S.substr(1);
> -      if (S.empty())
> -        // Fast path. If a pattern is '*', it matches anything.
> -        return true;
> -      for (size_t I = 0, E = T.size(); I < E; ++I)
> -        if (globMatch(S, T.substr(I)))
> -          return true;
> -      return false;
> -    }
> -    if (T.empty() || (S[0] != T[0] && S[0] != '?'))
> -      return false;
> +bool elf::hasWildcard(StringRef S) {
> +  return S.find_first_of("?*") != StringRef::npos;
> +}
> +
> +static std::string toRegex(StringRef S) {
> +  if (S.find_first_of("[]") != StringRef::npos)
> +    warning("unsupported wildcard: " + S);
> +
> +  std::string T;
> +  while (!S.empty()) {
> +    char C = S.front();
> +    if (C == '*')
> +      T += ".*";
> +    else if (C == '?')
> +      T += '.';
> +    else if (StringRef(".+^${}()|/\\[]").find_first_of(C) != StringRef::npos)
> +      T += std::string("\\") + C;
> +    else
> +      T += C;
>      S = S.substr(1);
> -    T = T.substr(1);
>    }
> +  return T;
> +}
> +
> +// Takes multiple glob patterns and converts them into regex object.
> +Regex elf::compileGlobPatterns(ArrayRef<StringRef> V) {
> +  std::string T = "^(" + toRegex(V[0]);
> +  for (StringRef S : V.slice(1))
> +    T += "|" + toRegex(S);
> +  return Regex(T + ")$");
>  }
>
>  // Converts a hex string (e.g. "deadbeef") to a vector.
>
> Modified: lld/trunk/ELF/Strings.h
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.h?rev=280544&r1=280543&r2=280544&view=diff
> ==============================================================================
> --- lld/trunk/ELF/Strings.h (original)
> +++ lld/trunk/ELF/Strings.h Fri Sep  2 16:17:20 2016
> @@ -11,11 +11,13 @@
>  #define LLD_COFF_STRINGS_H
>
>  #include "lld/Core/LLVM.h"
> +#include "llvm/Support/Regex.h"
>  #include <vector>
>
>  namespace lld {
>  namespace elf {
> -bool globMatch(StringRef S, StringRef T);
> +llvm::Regex compileGlobPatterns(ArrayRef<StringRef> V);
> +bool hasWildcard(StringRef S);
>  std::vector<uint8_t> parseHex(StringRef S);
>  bool isValidCIdentifier(StringRef S);
>
>
> Modified: lld/trunk/ELF/SymbolTable.cpp
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.cpp?rev=280544&r1=280543&r2=280544&view=diff
> ==============================================================================
> --- lld/trunk/ELF/SymbolTable.cpp (original)
> +++ lld/trunk/ELF/SymbolTable.cpp Fri Sep  2 16:17:20 2016
> @@ -483,13 +483,14 @@ template <class ELFT> SymbolBody *Symbol
>    return SymVector[V.Idx]->body();
>  }
>
> -// Returns a list of defined symbols that match with a given glob pattern.
> +// Returns a list of defined symbols that match with a given regex.
>  template <class ELFT>
> -std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(StringRef Pattern) {
> +std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const Regex &Re) {
>    std::vector<SymbolBody *> Res;
>    for (Symbol *Sym : SymVector) {
>      SymbolBody *B = Sym->body();
> -    if (!B->isUndefined() && globMatch(Pattern, B->getName()))
> +    StringRef Name = B->getName();
> +    if (!B->isUndefined() && const_cast<Regex &>(Re).match(Name))
>        Res.push_back(B);
>    }
>    return Res;
> @@ -578,10 +579,6 @@ template <class ELFT> void SymbolTable<E
>        B->symbol()->ExportDynamic = true;
>  }
>
> -static bool hasWildcard(StringRef S) {
> -  return S.find_first_of("?*") != StringRef::npos;
> -}
> -
>  static void setVersionId(SymbolBody *Body, StringRef VersionName,
>                           StringRef Name, uint16_t Version) {
>    if (!Body || Body->isUndefined()) {
> @@ -625,11 +622,11 @@ static SymbolBody *findDemangled(const s
>
>  static std::vector<SymbolBody *>
>  findAllDemangled(const std::map<std::string, SymbolBody *> &D,
> -                 StringRef Pattern) {
> +                 const Regex &Re) {
>    std::vector<SymbolBody *> Res;
>    for (auto &P : D) {
>      SymbolBody *Body = P.second;
> -    if (!Body->isUndefined() && globMatch(Pattern, P.first))
> +    if (!Body->isUndefined() && const_cast<Regex &>(Re).match(P.first))
>        Res.push_back(Body);
>    }
>    return Res;
> @@ -682,8 +679,9 @@ template <class ELFT> void SymbolTable<E
>        if (!hasWildcard(Sym.Name))
>          continue;
>        std::vector<SymbolBody *> All =
> -          Sym.IsExternCpp ? findAllDemangled(Demangled, Sym.Name)
> -                          : findAll(Sym.Name);
> +          Sym.IsExternCpp
> +              ? findAllDemangled(Demangled, compileGlobPatterns({Sym.Name}))
> +              : findAll(compileGlobPatterns({Sym.Name}));
>
>        for (SymbolBody *B : All)
>          if (B->symbol()->VersionId == Config->DefaultSymbolVersion)
>
> Modified: lld/trunk/ELF/SymbolTable.h
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.h?rev=280544&r1=280543&r2=280544&view=diff
> ==============================================================================
> --- lld/trunk/ELF/SymbolTable.h (original)
> +++ lld/trunk/ELF/SymbolTable.h Fri Sep  2 16:17:20 2016
> @@ -13,6 +13,7 @@
>  #include "InputFiles.h"
>  #include "LTO.h"
>  #include "llvm/ADT/DenseMap.h"
> +#include "llvm/Support/Regex.h"
>
>  namespace lld {
>  namespace elf {
> @@ -91,7 +92,7 @@ public:
>    void wrap(StringRef Name);
>
>  private:
> -  std::vector<SymbolBody *> findAll(StringRef Pattern);
> +  std::vector<SymbolBody *> findAll(const llvm::Regex &Re);
>    std::pair<Symbol *, bool> insert(StringRef &Name);
>    std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type,
>                                     uint8_t Visibility, bool CanOmitFromDynSym,
>
> Added: lld/trunk/test/ELF/wildcards2.s
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/wildcards2.s?rev=280544&view=auto
> ==============================================================================
> --- lld/trunk/test/ELF/wildcards2.s (added)
> +++ lld/trunk/test/ELF/wildcards2.s Fri Sep  2 16:17:20 2016
> @@ -0,0 +1,25 @@
> +# REQUIRES: x86
> +
> +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
> +## Check that aabc is not included in text.
> +# RUN: echo "SECTIONS { \
> +# RUN:      .text : { *(.abc) } }" > %t.script
> +# RUN: ld.lld -o %t.out --script %t.script %t
> +# RUN: llvm-objdump -section-headers %t.out | \
> +# RUN:   FileCheck %s
> +# CHECK:      Sections:
> +# CHECK-NEXT:  Idx Name          Size      Address          Type
> +# CHECK-NEXT:    0               00000000 0000000000000000
> +# CHECK-NEXT:    1 .text         00000004 0000000000000120 TEXT DATA
> +# CHECK-NEXT:    2 aabc          00000004 0000000000000124 TEXT DATA
> +
> +.text
> +.section .abc,"ax", at progbits
> +.long 0
> +
> +.text
> +.section aabc,"ax", at progbits
> +.long 0
> +
> +.globl _start
> +_start:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits



-- 
With best regards, Anton Korobeynikov
Department of Statistical Modelling, Saint Petersburg State University


More information about the llvm-commits mailing list