[llvm] r317617 - Extend SpecialCaseList to allow users to blame matches on entries in the file.

Mitch Phillips via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 7 13:16:46 PST 2017


Author: hctim
Date: Tue Nov  7 13:16:46 2017
New Revision: 317617

URL: http://llvm.org/viewvc/llvm-project?rev=317617&view=rev
Log:
Extend SpecialCaseList to allow users to blame matches on entries in the file.

Summary:
Extends SCL functionality to allow users to find the line number in the file the SCL is built from through SpecialCaseList::inSectionBlame(...).

Also removes the need to compile the SCL before use. As the matcher now contains a list of regexes to test against instead of a single regex, the regexes can be individually built on each insertion rather than one large compilation at the end of construction.

This change also fixes a bug where blank lines would cause the parser to become out-of-sync with the line number. An error on line `k` was being reported as being on line `k - num_blank_lines_before_k`.

Note: This change has a cyclical dependency on D39486. Both these changes must be submitted at the same time to avoid a build breakage.

Reviewers: vlad.tsyrklevich

Reviewed By: vlad.tsyrklevich

Subscribers: kcc, pcc, llvm-commits

Differential Revision: https://reviews.llvm.org/D39485

Modified:
    llvm/trunk/include/llvm/Support/SpecialCaseList.h
    llvm/trunk/lib/Support/SpecialCaseList.cpp
    llvm/trunk/unittests/Support/SpecialCaseListTest.cpp

Modified: llvm/trunk/include/llvm/Support/SpecialCaseList.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SpecialCaseList.h?rev=317617&r1=317616&r2=317617&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/SpecialCaseList.h (original)
+++ llvm/trunk/include/llvm/Support/SpecialCaseList.h Tue Nov  7 13:16:46 2017
@@ -89,6 +89,17 @@ public:
   bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
                  StringRef Category = StringRef()) const;
 
+  /// Returns the line number corresponding to the special case list entry if
+  /// the special case list contains a line
+  /// \code
+  ///   @Prefix:<E>=@Category
+  /// \endcode
+  /// where @Query satisfies wildcard expression <E> in a given @Section.
+  /// Returns zero if there is no blacklist entry corresponding to this
+  /// expression.
+  unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
+                          StringRef Category = StringRef()) const;
+
 protected:
   // Implementations of the create*() functions that can also be used by derived
   // classes.
@@ -96,25 +107,25 @@ protected:
                       std::string &Error);
   bool createInternal(const MemoryBuffer *MB, std::string &Error);
 
+  SpecialCaseList() = default;
   SpecialCaseList(SpecialCaseList const &) = delete;
   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
 
   /// Represents a set of regular expressions.  Regular expressions which are
-  /// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
-  /// others are represented as a single pipe-separated regex in RegEx.  The
-  /// reason for doing so is efficiency; StringSet is much faster at matching
+  /// "literal" (i.e. no regex metacharacters) are stored in Strings.  The
+  /// reason for doing so is efficiency; StringMap is much faster at matching
   /// literal strings than Regex.
   class Matcher {
   public:
-    bool insert(std::string Regexp, std::string &REError);
-    void compile();
-    bool match(StringRef Query) const;
+    bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
+    // Returns the line number in the source file that this query matches to.
+    // Returns zero if no match is found.
+    unsigned match(StringRef Query) const;
 
   private:
-    StringSet<> Strings;
+    StringMap<unsigned> Strings;
     TrigramIndex Trigrams;
-    std::unique_ptr<Regex> RegEx;
-    std::string UncompiledRegEx;
+    std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
   };
 
   using SectionEntries = StringMap<StringMap<Matcher>>;
@@ -127,19 +138,15 @@ protected:
   };
 
   std::vector<Section> Sections;
-  bool IsCompiled;
 
-  SpecialCaseList();
   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
   bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
              std::string &Error);
-  /// compile() should be called once, after parsing all the memory buffers.
-  void compile();
 
   // Helper method for derived classes to search by Prefix, Query, and Category
   // once they have already resolved a section entry.
-  bool inSection(const SectionEntries &Entries, StringRef Prefix,
-                 StringRef Query, StringRef Category) const;
+  unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
+                          StringRef Query, StringRef Category) const;
 };
 
 }  // namespace llvm

Modified: llvm/trunk/lib/Support/SpecialCaseList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/SpecialCaseList.cpp?rev=317617&r1=317616&r2=317617&view=diff
==============================================================================
--- llvm/trunk/lib/Support/SpecialCaseList.cpp (original)
+++ llvm/trunk/lib/Support/SpecialCaseList.cpp Tue Nov  7 13:16:46 2017
@@ -27,6 +27,7 @@
 namespace llvm {
 
 bool SpecialCaseList::Matcher::insert(std::string Regexp,
+                                      unsigned LineNumber,
                                       std::string &REError) {
   if (Regexp.empty()) {
     REError = "Supplied regexp was blank";
@@ -34,7 +35,7 @@ bool SpecialCaseList::Matcher::insert(st
   }
 
   if (Regex::isLiteralERE(Regexp)) {
-    Strings.insert(Regexp);
+    Strings[Regexp] = LineNumber;
     return true;
   }
   Trigrams.insert(Regexp);
@@ -45,34 +46,30 @@ bool SpecialCaseList::Matcher::insert(st
     Regexp.replace(pos, strlen("*"), ".*");
   }
 
+  Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+
   // Check that the regexp is valid.
   Regex CheckRE(Regexp);
   if (!CheckRE.isValid(REError))
     return false;
 
-  if (!UncompiledRegEx.empty())
-    UncompiledRegEx += "|";
-  UncompiledRegEx += "^(" + Regexp + ")$";
+  RegExes.emplace_back(
+      std::make_pair(make_unique<Regex>(std::move(CheckRE)), LineNumber));
   return true;
 }
 
-void SpecialCaseList::Matcher::compile() {
-  if (!UncompiledRegEx.empty()) {
-    RegEx.reset(new Regex(UncompiledRegEx));
-    UncompiledRegEx.clear();
-  }
-}
-
-bool SpecialCaseList::Matcher::match(StringRef Query) const {
-  if (Strings.count(Query))
-    return true;
+unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
+  auto It = Strings.find(Query);
+  if (It != Strings.end())
+    return It->second;
   if (Trigrams.isDefinitelyOut(Query))
     return false;
-  return RegEx && RegEx->match(Query);
+  for (auto& RegExKV : RegExes)
+    if (RegExKV.first->match(Query))
+      return RegExKV.second;
+  return 0;
 }
 
-SpecialCaseList::SpecialCaseList() : Sections(), IsCompiled(false) {}
-
 std::unique_ptr<SpecialCaseList>
 SpecialCaseList::create(const std::vector<std::string> &Paths,
                         std::string &Error) {
@@ -114,7 +111,6 @@ bool SpecialCaseList::createInternal(con
       return false;
     }
   }
-  compile();
   return true;
 }
 
@@ -123,7 +119,6 @@ bool SpecialCaseList::createInternal(con
   StringMap<size_t> Sections;
   if (!parse(MB, Sections, Error))
     return false;
-  compile();
   return true;
 }
 
@@ -132,11 +127,13 @@ bool SpecialCaseList::parse(const Memory
                             std::string &Error) {
   // Iterate through each line in the blacklist file.
   SmallVector<StringRef, 16> Lines;
-  SplitString(MB->getBuffer(), Lines, "\n\r");
+  MB->getBuffer().split(Lines, '\n');
 
-  int LineNo = 1;
+  unsigned LineNo = 1;
   StringRef Section = "*";
+
   for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) {
+    *I = I->trim();
     // Ignore empty lines and lines starting with "#"
     if (I->empty() || I->startswith("#"))
       continue;
@@ -181,11 +178,10 @@ bool SpecialCaseList::parse(const Memory
     if (SectionsMap.find(Section) == SectionsMap.end()) {
       std::unique_ptr<Matcher> M = make_unique<Matcher>();
       std::string REError;
-      if (!M->insert(Section, REError)) {
+      if (!M->insert(Section, LineNo, REError)) {
         Error = (Twine("malformed section ") + Section + ": '" + REError).str();
         return false;
       }
-      M->compile();
 
       SectionsMap[Section] = Sections.size();
       Sections.emplace_back(std::move(M));
@@ -193,7 +189,7 @@ bool SpecialCaseList::parse(const Memory
 
     auto &Entry = Sections[SectionsMap[Section]].Entries[Prefix][Category];
     std::string REError;
-    if (!Entry.insert(std::move(Regexp), REError)) {
+    if (!Entry.insert(std::move(Regexp), LineNo, REError)) {
       Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" +
                SplitLine.second + "': " + REError).str();
       return false;
@@ -202,38 +198,33 @@ bool SpecialCaseList::parse(const Memory
   return true;
 }
 
-void SpecialCaseList::compile() {
-  assert(!IsCompiled && "compile() should only be called once");
-  // Iterate through every section compiling regular expressions for every query
-  // and creating Section entries.
-  for (auto &Section : Sections)
-    for (auto &Prefix : Section.Entries)
-      for (auto &Category : Prefix.getValue())
-        Category.getValue().compile();
-
-  IsCompiled = true;
-}
-
 SpecialCaseList::~SpecialCaseList() {}
 
 bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
                                 StringRef Query, StringRef Category) const {
-  assert(IsCompiled && "SpecialCaseList::compile() was not called!");
+  return inSectionBlame(Section, Prefix, Query, Category);
+}
 
+unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
+                                         StringRef Query,
+                                         StringRef Category) const {
   for (auto &SectionIter : Sections)
-    if (SectionIter.SectionMatcher->match(Section) &&
-        inSection(SectionIter.Entries, Prefix, Query, Category))
-      return true;
-
-  return false;
+    if (SectionIter.SectionMatcher->match(Section)) {
+      unsigned Blame =
+          inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
+      if (Blame)
+        return Blame;
+    }
+  return 0;
 }
 
-bool SpecialCaseList::inSection(const SectionEntries &Entries, StringRef Prefix,
-                                StringRef Query, StringRef Category) const {
+unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
+                                         StringRef Prefix, StringRef Query,
+                                         StringRef Category) const {
   SectionEntries::const_iterator I = Entries.find(Prefix);
-  if (I == Entries.end()) return false;
+  if (I == Entries.end()) return 0;
   StringMap<Matcher>::const_iterator II = I->second.find(Category);
-  if (II == I->second.end()) return false;
+  if (II == I->second.end()) return 0;
 
   return II->getValue().match(Query);
 }

Modified: llvm/trunk/unittests/Support/SpecialCaseListTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/SpecialCaseListTest.cpp?rev=317617&r1=317616&r2=317617&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/SpecialCaseListTest.cpp (original)
+++ llvm/trunk/unittests/Support/SpecialCaseListTest.cpp Tue Nov  7 13:16:46 2017
@@ -58,6 +58,30 @@ TEST_F(SpecialCaseListTest, Basic) {
   EXPECT_FALSE(SCL->inSection("", "src", "hi"));
   EXPECT_FALSE(SCL->inSection("", "fun", "hello"));
   EXPECT_FALSE(SCL->inSection("", "src", "hello", "category"));
+
+  EXPECT_EQ(3u, SCL->inSectionBlame("", "src", "hello"));
+  EXPECT_EQ(4u, SCL->inSectionBlame("", "src", "bye"));
+  EXPECT_EQ(5u, SCL->inSectionBlame("", "src", "hi", "category"));
+  EXPECT_EQ(6u, SCL->inSectionBlame("", "src", "zzzz", "category"));
+  EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hi"));
+  EXPECT_EQ(0u, SCL->inSectionBlame("", "fun", "hello"));
+  EXPECT_EQ(0u, SCL->inSectionBlame("", "src", "hello", "category"));
+}
+
+TEST_F(SpecialCaseListTest, CorrectErrorLineNumberWithBlankLine) {
+  std::string Error;
+  EXPECT_EQ(nullptr, makeSpecialCaseList("# This is a comment.\n"
+                                         "\n"
+                                         "[not valid\n",
+                                         Error));
+  EXPECT_TRUE(
+      ((StringRef)Error).startswith("malformed section header on line 3:"));
+
+  EXPECT_EQ(nullptr, makeSpecialCaseList("\n\n\n"
+                                         "[not valid\n",
+                                         Error));
+  EXPECT_TRUE(
+      ((StringRef)Error).startswith("malformed section header on line 4:"));
 }
 
 TEST_F(SpecialCaseListTest, SectionRegexErrorHandling) {




More information about the llvm-commits mailing list