[PATCH] Introduce an optimisation for special case lists with large numbers of literal entries.

Peter Collingbourne peter at pcc.me.uk
Fri Aug 2 18:59:57 PDT 2013


  Split out the isLiteralERE change.

Hi samsonov,

http://llvm-reviews.chandlerc.com/D1150

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D1150?vs=2925&id=3178#toc

Files:
  include/llvm/Transforms/Utils/SpecialCaseList.h
  lib/Transforms/Utils/SpecialCaseList.cpp

Index: include/llvm/Transforms/Utils/SpecialCaseList.h
===================================================================
--- include/llvm/Transforms/Utils/SpecialCaseList.h
+++ include/llvm/Transforms/Utils/SpecialCaseList.h
@@ -89,7 +89,8 @@
   bool findCategory(const Module &M, StringRef &Category) const;
 
  private:
-  StringMap<StringMap<Regex*> > Entries;
+  struct Entry;
+  StringMap<StringMap<Entry> > Entries;
 
   void init(const MemoryBuffer *MB);
   bool findCategory(const StringRef Section, const StringRef Query,
Index: lib/Transforms/Utils/SpecialCaseList.cpp
===================================================================
--- lib/Transforms/Utils/SpecialCaseList.cpp
+++ lib/Transforms/Utils/SpecialCaseList.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -32,6 +33,22 @@
 
 namespace llvm {
 
+/// Represents a set of regular expressions.  Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx.  The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+  StringSet<> Strings;
+  Regex *RegEx;
+
+  Entry() : RegEx(0) {}
+
+  bool match(StringRef Query) const {
+    return Strings.count(Query) || (RegEx && RegEx->match(Query));
+  }
+};
+
 SpecialCaseList::SpecialCaseList(const StringRef Path) {
   // Validate and open blacklist file.
   if (Path.empty()) return;
@@ -82,6 +99,12 @@
       Category = "init";
     }
 
+    // See if we can store Regexp in Strings.
+    if (Regex::isLiteralERE(Regexp)) {
+      Entries[Prefix][Category].Strings.insert(Regexp);
+      continue;
+    }
+
     // Replace * with .*
     for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
          pos += strlen(".*")) {
@@ -109,16 +132,20 @@
     for (StringMap<std::string>::const_iterator II = I->second.begin(),
                                                 IE = I->second.end();
          II != IE; ++II) {
-      Entries[I->getKey()][II->getKey()] = new Regex(II->getValue());
+      Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
     }
   }
 }
 
 SpecialCaseList::~SpecialCaseList() {
-  for (StringMap<StringMap<Regex*> >::iterator I = Entries.begin(),
-                                               E = Entries.end();
+  for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+                                              E = Entries.end();
        I != E; ++I) {
-    DeleteContainerSeconds(I->second);
+    for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                          IE = I->second.end();
+         II != IE; ++II) {
+      delete II->second.RegEx;
+    }
   }
 }
 
@@ -169,14 +196,13 @@
 bool SpecialCaseList::findCategory(const StringRef Section,
                                    const StringRef Query,
                                    StringRef &Category) const {
-  StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
 
-  for (StringMap<Regex *>::const_iterator II = I->second.begin(),
-                                          IE = I->second.end();
+  for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                        IE = I->second.end();
        II != IE; ++II) {
-    Regex *FunctionRegex = II->getValue();
-    if (FunctionRegex->match(Query)) {
+    if (II->getValue().match(Query)) {
       Category = II->first();
       return true;
     }
@@ -188,13 +214,12 @@
 bool SpecialCaseList::inSectionCategory(const StringRef Section,
                                         const StringRef Query,
                                         const StringRef Category) const {
-  StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
-  StringMap<Regex *>::const_iterator II = I->second.find(Category);
+  StringMap<Entry>::const_iterator II = I->second.find(Category);
   if (II == I->second.end()) return false;
 
-  Regex *FunctionRegex = II->getValue();
-  return FunctionRegex->match(Query);
+  return II->getValue().match(Query);
 }
 
 }  // namespace llvm
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1150.3.patch
Type: text/x-patch
Size: 4648 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130802/2c3ae445/attachment.bin>


More information about the llvm-commits mailing list