[PATCH] Introduce an optimisation for special case lists with large numbers of literal entries.
Peter Collingbourne
peter at pcc.me.uk
Fri Jul 19 18:22:58 PDT 2013
- Refactor and add some comments
Hi samsonov,
http://llvm-reviews.chandlerc.com/D1150
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D1150?vs=2822&id=2925#toc
Files:
include/llvm/Support/Regex.h
include/llvm/Transforms/Utils/SpecialCaseList.h
lib/Support/Regex.cpp
lib/Transforms/Utils/SpecialCaseList.cpp
Index: include/llvm/Support/Regex.h
===================================================================
--- include/llvm/Support/Regex.h
+++ include/llvm/Support/Regex.h
@@ -77,6 +77,10 @@
/// string.
std::string sub(StringRef Repl, StringRef String, std::string *Error = 0);
+ /// \brief If this function returns true, Str is a regular expression that
+ /// matches itself and only itself.
+ static bool isLiteral(StringRef Str);
+
private:
struct llvm_regex *preg;
int error;
Index: include/llvm/Transforms/Utils/SpecialCaseList.h
===================================================================
--- include/llvm/Transforms/Utils/SpecialCaseList.h
+++ include/llvm/Transforms/Utils/SpecialCaseList.h
@@ -89,7 +89,8 @@
bool findCategory(const Module &M, StringRef &Category) const;
private:
- StringMap<StringMap<Regex*> > Entries;
+ struct Entry;
+ StringMap<StringMap<Entry> > Entries;
void init(const MemoryBuffer *MB);
bool findCategory(const StringRef Section, const StringRef Query,
Index: lib/Support/Regex.cpp
===================================================================
--- lib/Support/Regex.cpp
+++ lib/Support/Regex.cpp
@@ -168,3 +168,10 @@
return Res;
}
+
+bool Regex::isLiteral(StringRef Str) {
+ // Check for regex metacharacters. This list was derived from our regex
+ // implementation in regcomp.c and double checked against the POSIX extended
+ // regular expression specification.
+ return Str.find_first_of("()^$|*+?.[]\\{}") == StringRef::npos;
+}
Index: lib/Transforms/Utils/SpecialCaseList.cpp
===================================================================
--- lib/Transforms/Utils/SpecialCaseList.cpp
+++ lib/Transforms/Utils/SpecialCaseList.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -32,6 +33,22 @@
namespace llvm {
+/// Represents a set of regular expressions. Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx. The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+ StringSet<> Strings;
+ Regex *RegEx;
+
+ Entry() : RegEx(0) {}
+
+ bool match(StringRef Query) const {
+ return Strings.count(Query) || (RegEx && RegEx->match(Query));
+ }
+};
+
SpecialCaseList::SpecialCaseList(const StringRef Path) {
// Validate and open blacklist file.
if (Path.empty()) return;
@@ -82,6 +99,12 @@
Category = "init";
}
+ // See if we can store Regexp in Strings.
+ if (Regex::isLiteral(Regexp)) {
+ Entries[Prefix][Category].Strings.insert(Regexp);
+ continue;
+ }
+
// Replace * with .*
for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
pos += strlen(".*")) {
@@ -109,16 +132,20 @@
for (StringMap<std::string>::const_iterator II = I->second.begin(),
IE = I->second.end();
II != IE; ++II) {
- Entries[I->getKey()][II->getKey()] = new Regex(II->getValue());
+ Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
}
}
}
SpecialCaseList::~SpecialCaseList() {
- for (StringMap<StringMap<Regex*> >::iterator I = Entries.begin(),
- E = Entries.end();
+ for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+ E = Entries.end();
I != E; ++I) {
- DeleteContainerSeconds(I->second);
+ for (StringMap<Entry>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
+ II != IE; ++II) {
+ delete II->second.RegEx;
+ }
}
}
@@ -169,14 +196,13 @@
bool SpecialCaseList::findCategory(const StringRef Section,
const StringRef Query,
StringRef &Category) const {
- StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+ StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
- for (StringMap<Regex *>::const_iterator II = I->second.begin(),
- IE = I->second.end();
+ for (StringMap<Entry>::const_iterator II = I->second.begin(),
+ IE = I->second.end();
II != IE; ++II) {
- Regex *FunctionRegex = II->getValue();
- if (FunctionRegex->match(Query)) {
+ if (II->getValue().match(Query)) {
Category = II->first();
return true;
}
@@ -188,13 +214,12 @@
bool SpecialCaseList::inSectionCategory(const StringRef Section,
const StringRef Query,
const StringRef Category) const {
- StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+ StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
- StringMap<Regex *>::const_iterator II = I->second.find(Category);
+ StringMap<Entry>::const_iterator II = I->second.find(Category);
if (II == I->second.end()) return false;
- Regex *FunctionRegex = II->getValue();
- return FunctionRegex->match(Query);
+ return II->getValue().match(Query);
}
} // namespace llvm
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1150.2.patch
Type: text/x-patch
Size: 5655 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130719/671e11e9/attachment.bin>
More information about the llvm-commits
mailing list