[PATCH] Introduce an optimisation for special case lists with large numbers of literal entries.

Peter Collingbourne peter at pcc.me.uk
Fri Jul 19 18:22:58 PDT 2013


    - Refactor and add some comments

Hi samsonov,

http://llvm-reviews.chandlerc.com/D1150

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D1150?vs=2822&id=2925#toc

Files:
  include/llvm/Support/Regex.h
  include/llvm/Transforms/Utils/SpecialCaseList.h
  lib/Support/Regex.cpp
  lib/Transforms/Utils/SpecialCaseList.cpp

Index: include/llvm/Support/Regex.h
===================================================================
--- include/llvm/Support/Regex.h
+++ include/llvm/Support/Regex.h
@@ -77,6 +77,10 @@
     /// string.
     std::string sub(StringRef Repl, StringRef String, std::string *Error = 0);
 
+    /// \brief If this function returns true, Str is a regular expression that
+    /// matches itself and only itself.
+    static bool isLiteral(StringRef Str);
+
   private:
     struct llvm_regex *preg;
     int error;
Index: include/llvm/Transforms/Utils/SpecialCaseList.h
===================================================================
--- include/llvm/Transforms/Utils/SpecialCaseList.h
+++ include/llvm/Transforms/Utils/SpecialCaseList.h
@@ -89,7 +89,8 @@
   bool findCategory(const Module &M, StringRef &Category) const;
 
  private:
-  StringMap<StringMap<Regex*> > Entries;
+  struct Entry;
+  StringMap<StringMap<Entry> > Entries;
 
   void init(const MemoryBuffer *MB);
   bool findCategory(const StringRef Section, const StringRef Query,
Index: lib/Support/Regex.cpp
===================================================================
--- lib/Support/Regex.cpp
+++ lib/Support/Regex.cpp
@@ -168,3 +168,10 @@
 
   return Res;
 }
+
+bool Regex::isLiteral(StringRef Str) {
+  // Check for regex metacharacters.  This list was derived from our regex
+  // implementation in regcomp.c and double checked against the POSIX extended
+  // regular expression specification.
+  return Str.find_first_of("()^$|*+?.[]\\{}") == StringRef::npos;
+}
Index: lib/Transforms/Utils/SpecialCaseList.cpp
===================================================================
--- lib/Transforms/Utils/SpecialCaseList.cpp
+++ lib/Transforms/Utils/SpecialCaseList.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -32,6 +33,22 @@
 
 namespace llvm {
 
+/// Represents a set of regular expressions.  Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx.  The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+  StringSet<> Strings;
+  Regex *RegEx;
+
+  Entry() : RegEx(0) {}
+
+  bool match(StringRef Query) const {
+    return Strings.count(Query) || (RegEx && RegEx->match(Query));
+  }
+};
+
 SpecialCaseList::SpecialCaseList(const StringRef Path) {
   // Validate and open blacklist file.
   if (Path.empty()) return;
@@ -82,6 +99,12 @@
       Category = "init";
     }
 
+    // See if we can store Regexp in Strings.
+    if (Regex::isLiteral(Regexp)) {
+      Entries[Prefix][Category].Strings.insert(Regexp);
+      continue;
+    }
+
     // Replace * with .*
     for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
          pos += strlen(".*")) {
@@ -109,16 +132,20 @@
     for (StringMap<std::string>::const_iterator II = I->second.begin(),
                                                 IE = I->second.end();
          II != IE; ++II) {
-      Entries[I->getKey()][II->getKey()] = new Regex(II->getValue());
+      Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
     }
   }
 }
 
 SpecialCaseList::~SpecialCaseList() {
-  for (StringMap<StringMap<Regex*> >::iterator I = Entries.begin(),
-                                               E = Entries.end();
+  for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+                                              E = Entries.end();
        I != E; ++I) {
-    DeleteContainerSeconds(I->second);
+    for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                          IE = I->second.end();
+         II != IE; ++II) {
+      delete II->second.RegEx;
+    }
   }
 }
 
@@ -169,14 +196,13 @@
 bool SpecialCaseList::findCategory(const StringRef Section,
                                    const StringRef Query,
                                    StringRef &Category) const {
-  StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
 
-  for (StringMap<Regex *>::const_iterator II = I->second.begin(),
-                                          IE = I->second.end();
+  for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                        IE = I->second.end();
        II != IE; ++II) {
-    Regex *FunctionRegex = II->getValue();
-    if (FunctionRegex->match(Query)) {
+    if (II->getValue().match(Query)) {
       Category = II->first();
       return true;
     }
@@ -188,13 +214,12 @@
 bool SpecialCaseList::inSectionCategory(const StringRef Section,
                                         const StringRef Query,
                                         const StringRef Category) const {
-  StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
-  StringMap<Regex *>::const_iterator II = I->second.find(Category);
+  StringMap<Entry>::const_iterator II = I->second.find(Category);
   if (II == I->second.end()) return false;
 
-  Regex *FunctionRegex = II->getValue();
-  return FunctionRegex->match(Query);
+  return II->getValue().match(Query);
 }
 
 }  // namespace llvm
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1150.2.patch
Type: text/x-patch
Size: 5655 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130719/671e11e9/attachment.bin>


More information about the llvm-commits mailing list