[lld] r285949 - Add [<chars>] to the glob matcher to eliminate use of llvm::Regex.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 3 10:57:38 PDT 2016
Author: ruiu
Date: Thu Nov 3 12:57:38 2016
New Revision: 285949
URL: http://llvm.org/viewvc/llvm-project?rev=285949&view=rev
Log:
Add [<chars>] to the glob matcher to eliminate use of llvm::Regex.
Previously, it didn't support the character class, so we couldn't
eliminate the use fo llvm::Regex. Now that it is supported, we
can remove compileGlobPattern, which converts a glob pattern to
a regex.
This patch contains optimization for exact/prefix/suffix matches.
Differential Revision: https://reviews.llvm.org/D26284
Modified:
lld/trunk/ELF/LinkerScript.cpp
lld/trunk/ELF/LinkerScript.h
lld/trunk/ELF/Strings.cpp
lld/trunk/ELF/Strings.h
lld/trunk/ELF/SymbolTable.cpp
lld/trunk/ELF/SymbolTable.h
Modified: lld/trunk/ELF/LinkerScript.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=285949&r1=285948&r2=285949&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.cpp (original)
+++ lld/trunk/ELF/LinkerScript.cpp Thu Nov 3 12:57:38 2016
@@ -111,11 +111,11 @@ template <class ELFT>
bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) {
for (InputSectionDescription *ID : Opt.KeptSections) {
StringRef Filename = S->getFile()->getName();
- if (!ID->FileRe.match(sys::path::filename(Filename)))
+ if (!ID->FilePat.match(sys::path::filename(Filename)))
continue;
for (SectionPattern &P : ID->SectionPatterns)
- if (P.SectionRe.match(S->Name))
+ if (P.SectionPat.match(S->Name))
return true;
}
return false;
@@ -178,13 +178,13 @@ void LinkerScript<ELFT>::computeInputSec
size_t SizeBefore = I->Sections.size();
for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) {
StringRef Filename = sys::path::filename(F->getName());
- if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename))
+ if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename))
continue;
for (InputSectionBase<ELFT> *S : F->getSections())
- if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name))
+ if (!isDiscarded(S) && !S->OutSec && Pat.SectionPat.match(S->Name))
I->Sections.push_back(S);
- if (Pat.SectionRe.match("COMMON"))
+ if (Pat.SectionPat.match("COMMON"))
I->Sections.push_back(InputSection<ELFT>::CommonInputSection);
}
@@ -1211,7 +1211,7 @@ StringMatcher ScriptParser::readFilePatt
std::vector<StringRef> V;
while (!Error && !consume(")"))
V.push_back(next());
- return StringMatcher(std::move(V));
+ return StringMatcher(V);
}
SortSectionPolicy ScriptParser::readSortKind() {
@@ -1236,10 +1236,10 @@ SortSectionPolicy ScriptParser::readSort
std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
std::vector<SectionPattern> Ret;
while (!Error && peek() != ")") {
- StringMatcher ExcludeFileRe;
+ StringMatcher ExcludeFilePat;
if (consume("EXCLUDE_FILE")) {
expect("(");
- ExcludeFileRe = readFilePatterns();
+ ExcludeFilePat = readFilePatterns();
}
std::vector<StringRef> V;
@@ -1247,7 +1247,7 @@ std::vector<SectionPattern> ScriptParser
V.push_back(next());
if (!V.empty())
- Ret.push_back({std::move(ExcludeFileRe), StringMatcher(std::move(V))});
+ Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)});
else
setError("section pattern is expected");
}
Modified: lld/trunk/ELF/LinkerScript.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.h?rev=285949&r1=285948&r2=285949&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.h (original)
+++ lld/trunk/ELF/LinkerScript.h Thu Nov 3 12:57:38 2016
@@ -114,28 +114,20 @@ struct OutputSectionCommand : BaseComman
// It can optionally have negative match pattern for EXCLUDED_FILE command.
// Also it may be surrounded with SORT() command, so contains sorting rules.
struct SectionPattern {
- SectionPattern(StringMatcher &&Re1, StringMatcher &&Re2)
- : ExcludedFileRe(std::forward<StringMatcher>(Re1)),
- SectionRe(std::forward<StringMatcher>(Re2)) {}
+ SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2)
+ : ExcludedFilePat(Pat1), SectionPat(Pat2) {}
- SectionPattern(SectionPattern &&Other) {
- std::swap(ExcludedFileRe, Other.ExcludedFileRe);
- std::swap(SectionRe, Other.SectionRe);
- std::swap(SortOuter, Other.SortOuter);
- std::swap(SortInner, Other.SortInner);
- }
-
- StringMatcher ExcludedFileRe;
- StringMatcher SectionRe;
+ StringMatcher ExcludedFilePat;
+ StringMatcher SectionPat;
SortSectionPolicy SortOuter;
SortSectionPolicy SortInner;
};
struct InputSectionDescription : BaseCommand {
InputSectionDescription(StringRef FilePattern)
- : BaseCommand(InputSectionKind), FileRe(FilePattern) {}
+ : BaseCommand(InputSectionKind), FilePat({FilePattern}) {}
static bool classof(const BaseCommand *C);
- StringMatcher FileRe;
+ StringMatcher FilePat;
// Input sections that matches at least one of SectionPatterns
// will be associated with this InputSectionDescription.
Modified: lld/trunk/ELF/Strings.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.cpp?rev=285949&r1=285948&r2=285949&view=diff
==============================================================================
--- lld/trunk/ELF/Strings.cpp (original)
+++ lld/trunk/ELF/Strings.cpp Thu Nov 3 12:57:38 2016
@@ -21,36 +21,142 @@ using namespace llvm;
using namespace lld;
using namespace lld::elf;
-// Returns true if S matches T. S can contain glob meta-characters.
-// The asterisk ('*') matches zero or more characters, and the question
-// mark ('?') matches one character.
-static bool globMatch(StringRef S, StringRef T) {
+// This is a scanner for the glob pattern.
+// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
+// (which is a negative form of "[<chars>]"), or a non-meta character.
+// This function returns the first token in S.
+BitVector GlobPattern::scan(StringRef &S) {
+ switch (S[0]) {
+ case '*':
+ S = S.substr(1);
+ // '*' is represented by an empty bitvector.
+ // All other bitvectors are 256-bit long.
+ return BitVector();
+ case '?':
+ S = S.substr(1);
+ return BitVector(256, true);
+ case '[': {
+ size_t End = S.find(']', 1);
+ if (End == StringRef::npos) {
+ error("invalid glob pattern: " + Original);
+ return BitVector(256, false);
+ }
+ StringRef Chars = S.substr(1, End - 1);
+ S = S.substr(End + 1);
+ if (Chars.startswith("^"))
+ return expand(Chars.substr(1)).flip();
+ return expand(Chars);
+ }
+ default:
+ BitVector BV(256, false);
+ BV[S[0]] = true;
+ S = S.substr(1);
+ return BV;
+ }
+}
+
+// Expands character ranges and returns a bitmap.
+// For example, "a-cf-hz" is expanded to "abcfghz".
+BitVector GlobPattern::expand(StringRef S) {
+ BitVector BV(256, false);
+
+ // Expand "x-y".
for (;;) {
- if (S.empty())
- return T.empty();
- if (S[0] == '*') {
+ if (S.size() < 3)
+ break;
+
+ // If it doesn't start with something like "x-y",
+ // consume the first character and proceed.
+ if (S[1] != '-') {
+ BV[S[0]] = true;
S = S.substr(1);
- if (S.empty())
+ continue;
+ }
+
+ // It must be in the form of "x-y".
+ // Validate it and then interpret the range.
+ if (S[0] > S[2]) {
+ error("invalid glob pattern: " + Original);
+ return BV;
+ }
+ for (int C = S[0]; C <= S[2]; ++C)
+ BV[C] = true;
+ S = S.substr(3);
+ }
+
+ for (char C : S)
+ BV[C] = true;
+ return BV;
+}
+
+GlobPattern::GlobPattern(StringRef S) : Original(S) {
+ if (!hasWildcard(S)) {
+ // S doesn't contain any metacharacter,
+ // so the regular string comparison should work.
+ Exact = S;
+ } else if (S.endswith("*") && !hasWildcard(S.drop_back())) {
+ // S is something like "foo*". We can use startswith().
+ Prefix = S.drop_back();
+ } else if (S.startswith("*") && !hasWildcard(S.drop_front())) {
+ // S is something like "*foo". We can use endswith().
+ Suffix = S.drop_front();
+ } else {
+ // Otherwise, we need to do real glob pattern matching.
+ // Parse the pattern now.
+ while (!S.empty())
+ Tokens.push_back(scan(S));
+ }
+}
+
+bool GlobPattern::match(StringRef S) const {
+ if (Exact)
+ return S == *Exact;
+ if (Prefix)
+ return S.startswith(*Prefix);
+ if (Suffix)
+ return S.endswith(*Suffix);
+ return matchOne(Tokens, S);
+}
+
+// Runs glob pattern Pats against string S.
+bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
+ for (;;) {
+ if (Pats.empty())
+ return S.empty();
+
+ // If Pats[0] is '*', try to match Pats[1..] against all possible
+ // substrings of S to see at least one pattern succeeds.
+ if (Pats[0].size() == 0) {
+ Pats = Pats.slice(1);
+ if (Pats.empty())
// Fast path. If a pattern is '*', it matches anything.
return true;
- for (size_t I = 0, E = T.size(); I < E; ++I)
- if (globMatch(S, T.substr(I)))
+ for (size_t I = 0, E = S.size(); I < E; ++I)
+ if (matchOne(Pats, S.substr(I)))
return true;
return false;
}
- if (T.empty() || (S[0] != T[0] && S[0] != '?'))
+
+ // If Pats[0] is not '*', it must consume one character.
+ if (S.empty() || !Pats[0][S[0]])
return false;
+ Pats = Pats.slice(1);
S = S.substr(1);
- T = T.substr(1);
}
}
-bool StringMatcher::match(StringRef S) {
- for (StringRef P : Patterns)
- if (globMatch(P, S))
+StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) {
+ for (StringRef S : Pat)
+ Patterns.push_back(GlobPattern(S));
+}
+
+bool StringMatcher::match(StringRef S) const {
+ for (const GlobPattern &Pat : Patterns)
+ if (Pat.match(S))
return true;
return false;
}
+
// If an input string is in the form of "foo.N" where N is a number,
// return N. Otherwise, returns 65536, which is one greater than the
// lowest priority.
@@ -74,42 +180,6 @@ StringRef elf::unquote(StringRef S) {
return S.substr(1, S.size() - 2);
}
-// Converts a glob pattern to a regular expression.
-static std::string toRegex(StringRef S) {
- std::string T;
- bool InBracket = false;
- while (!S.empty()) {
- char C = S.front();
- if (InBracket) {
- InBracket = C != ']';
- T += C;
- S = S.drop_front();
- continue;
- }
-
- if (C == '*')
- T += ".*";
- else if (C == '?')
- T += '.';
- else if (StringRef(".+^${}()|/\\").find_first_of(C) != StringRef::npos)
- T += std::string("\\") + C;
- else
- T += C;
-
- InBracket = C == '[';
- S = S.substr(1);
- }
- return T;
-}
-
-// Converts multiple glob patterns to a regular expression.
-Regex elf::compileGlobPatterns(ArrayRef<StringRef> V) {
- std::string T = "^(" + toRegex(V[0]);
- for (StringRef S : V.slice(1))
- T += "|" + toRegex(S);
- return Regex(T + ")$");
-}
-
// Converts a hex string (e.g. "deadbeef") to a vector.
std::vector<uint8_t> elf::parseHex(StringRef S) {
std::vector<uint8_t> Hex;
Modified: lld/trunk/ELF/Strings.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.h?rev=285949&r1=285948&r2=285949&view=diff
==============================================================================
--- lld/trunk/ELF/Strings.h (original)
+++ lld/trunk/ELF/Strings.h Thu Nov 3 12:57:38 2016
@@ -12,29 +12,54 @@
#include "lld/Core/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Regex.h"
#include <vector>
namespace lld {
namespace elf {
-llvm::Regex compileGlobPatterns(ArrayRef<StringRef> V);
+
int getPriority(StringRef S);
bool hasWildcard(StringRef S);
std::vector<uint8_t> parseHex(StringRef S);
bool isValidCIdentifier(StringRef S);
StringRef unquote(StringRef S);
+// This class represents a glob pattern. Supported metacharacters
+// are "*", "?", "[<chars>]" and "[^<chars>]".
+class GlobPattern {
+public:
+ explicit GlobPattern(StringRef Pat);
+ bool match(StringRef S) const;
+
+private:
+ bool matchOne(ArrayRef<llvm::BitVector> Pat, StringRef S) const;
+ llvm::BitVector scan(StringRef &S);
+ llvm::BitVector expand(StringRef S);
+
+ // Parsed glob pattern.
+ std::vector<llvm::BitVector> Tokens;
+
+ // A glob pattern given to this class. This is for error reporting.
+ StringRef Original;
+
+ // The following members are for optimization.
+ llvm::Optional<StringRef> Exact;
+ llvm::Optional<StringRef> Prefix;
+ llvm::Optional<StringRef> Suffix;
+};
+
+// This class represents multiple glob patterns.
class StringMatcher {
public:
StringMatcher() = default;
- explicit StringMatcher(StringRef P) : Patterns({P}) {}
- explicit StringMatcher(std::vector<StringRef> &&Pat)
- : Patterns(std::move(Pat)) {}
+ explicit StringMatcher(const std::vector<StringRef> &Pat);
+
+ bool match(StringRef S) const;
- bool match(StringRef S);
private:
- std::vector<StringRef> Patterns;
+ std::vector<GlobPattern> Patterns;
};
// Returns a demangled C++ symbol name. If Name is not a mangled
Modified: lld/trunk/ELF/SymbolTable.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.cpp?rev=285949&r1=285948&r2=285949&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.cpp (original)
+++ lld/trunk/ELF/SymbolTable.cpp Thu Nov 3 12:57:38 2016
@@ -470,12 +470,12 @@ template <class ELFT> SymbolBody *Symbol
// Returns a list of defined symbols that match with a given regex.
template <class ELFT>
-std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const Regex &Re) {
+std::vector<SymbolBody *> SymbolTable<ELFT>::findAll(const StringMatcher &M) {
std::vector<SymbolBody *> Res;
for (Symbol *Sym : SymVector) {
SymbolBody *B = Sym->body();
StringRef Name = B->getName();
- if (!B->isUndefined() && const_cast<Regex &>(Re).match(Name))
+ if (!B->isUndefined() && M.match(Name))
Res.push_back(B);
}
return Res;
@@ -611,10 +611,10 @@ findDemangled(std::map<std::string, std:
static std::vector<SymbolBody *>
findAllDemangled(const std::map<std::string, std::vector<SymbolBody *>> &D,
- const Regex &Re) {
+ StringMatcher &M) {
std::vector<SymbolBody *> Res;
for (auto &P : D) {
- if (const_cast<Regex &>(Re).match(P.first))
+ if (M.match(P.first))
for (SymbolBody *Body : P.second)
if (!Body->isUndefined())
Res.push_back(Body);
@@ -639,8 +639,8 @@ template <class ELFT> void SymbolTable<E
}
if (Patterns.empty())
return;
- Regex Re = compileGlobPatterns(Patterns);
- std::vector<SymbolBody *> Syms = findAll(Re);
+ StringMatcher M(Patterns);
+ std::vector<SymbolBody *> Syms = findAll(M);
for (SymbolBody *B : Syms)
B->symbol()->VersionId = VER_NDX_GLOBAL;
}
@@ -696,9 +696,9 @@ template <class ELFT> void SymbolTable<E
for (SymbolVersion &Sym : V.Globals) {
if (!Sym.HasWildcards)
continue;
- Regex Re = compileGlobPatterns({Sym.Name});
+ StringMatcher M({Sym.Name});
std::vector<SymbolBody *> Syms =
- Sym.IsExternCpp ? findAllDemangled(Demangled, Re) : findAll(Re);
+ Sym.IsExternCpp ? findAllDemangled(Demangled, M) : findAll(M);
// Exact matching takes precendence over fuzzy matching,
// so we set a version to a symbol only if no version has been assigned
Modified: lld/trunk/ELF/SymbolTable.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.h?rev=285949&r1=285948&r2=285949&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.h (original)
+++ lld/trunk/ELF/SymbolTable.h Thu Nov 3 12:57:38 2016
@@ -12,9 +12,9 @@
#include "InputFiles.h"
#include "LTO.h"
+#include "Strings.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/Regex.h"
namespace lld {
namespace elf {
@@ -92,7 +92,7 @@ public:
void wrap(StringRef Name);
private:
- std::vector<SymbolBody *> findAll(const llvm::Regex &Re);
+ std::vector<SymbolBody *> findAll(const StringMatcher &M);
std::pair<Symbol *, bool> insert(StringRef &Name);
std::pair<Symbol *, bool> insert(StringRef &Name, uint8_t Type,
uint8_t Visibility, bool CanOmitFromDynSym,
More information about the llvm-commits
mailing list