[llvm-commits] [llvm] r82717 - in /llvm/trunk: docs/TestingGuide.html include/llvm/Support/Regex.h lib/Support/Regex.cpp unittests/Support/RegexTest.cpp utils/FileCheck/FileCheck.cpp

Chris Lattner sabre at nondot.org
Thu Sep 24 14:47:32 PDT 2009


Author: lattner
Date: Thu Sep 24 16:47:32 2009
New Revision: 82717

URL: http://llvm.org/viewvc/llvm-project?rev=82717&view=rev
Log:
add and document regex support for FileCheck.  You can now do stuff like:

; CHECK: movl {{%e[a-z][xi]}}, %eax

or whatever.


Modified:
    llvm/trunk/docs/TestingGuide.html
    llvm/trunk/include/llvm/Support/Regex.h
    llvm/trunk/lib/Support/Regex.cpp
    llvm/trunk/unittests/Support/RegexTest.cpp
    llvm/trunk/utils/FileCheck/FileCheck.cpp

Modified: llvm/trunk/docs/TestingGuide.html
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/TestingGuide.html?rev=82717&r1=82716&r2=82717&view=diff

==============================================================================
--- llvm/trunk/docs/TestingGuide.html (original)
+++ llvm/trunk/docs/TestingGuide.html Thu Sep 24 16:47:32 2009
@@ -625,6 +625,40 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-Matching">FileCheck Pattern Matting Syntax</a></div>
+
+<div class="doc_text">
+
+<p>The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
+uses of FileCheck, fixed string matching is perfectly sufficient.  For some
+things, a more flexible form of matching is desired.  To support this, FileCheck
+allows you to specify regular expressions in matching strings, surrounded by
+double braces: <b>{{yourregex}}</b>.  Because we want to use fixed string
+matching for a majority of what we do, FileCheck has been designed to support
+mixing and matching fixed string matching with regular expressions.  This allows
+you to write things like this:</p>
+
+<div class="doc_code">
+<pre>
+; CHECK: movhpd	<b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
+</pre>
+</div>
+
+<p>In this case, any offset from the ESP register will be allowed, and any xmm
+register will be allowed.</p>
+
+<p>Because regular expressions are enclosed with double braces, they are
+visually distinct, and you don't need to use escape characters within the double
+braces like you would in C.  In the rare case that you want to match double
+braces explicitly from the input, you can use something ugly like
+<b>{{[{][{]}}</b> as your pattern.</p>
+
+</div>
+
+
+
+<!-- _______________________________________________________________________ -->
 <div class="doc_subsection"><a name="dgvars">Variables and
 substitutions</a></div>
 <!-- _______________________________________________________________________ -->

Modified: llvm/trunk/include/llvm/Support/Regex.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Regex.h?rev=82717&r1=82716&r2=82717&view=diff

==============================================================================
--- llvm/trunk/include/llvm/Support/Regex.h (original)
+++ llvm/trunk/include/llvm/Support/Regex.h Thu Sep 24 16:47:32 2009
@@ -11,11 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
+#include <string>
 
 struct llvm_regex;
+
 namespace llvm {
+  class StringRef;
+  template<typename T> class SmallVectorImpl;
+  
   class Regex {
   public:
     enum {
@@ -54,6 +57,8 @@
     /// Matches.
     /// For this feature to be enabled you must construct the regex using
     /// Regex("...", Regex::Sub) constructor.
+    ///
+    /// This returns true on a successful match.
     bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
   private:
     struct llvm_regex *preg;

Modified: llvm/trunk/lib/Support/Regex.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Regex.cpp?rev=82717&r1=82716&r2=82717&view=diff

==============================================================================
--- llvm/trunk/lib/Support/Regex.cpp (original)
+++ llvm/trunk/lib/Support/Regex.cpp Thu Sep 24 16:47:32 2009
@@ -14,13 +14,14 @@
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
 #include "regex_impl.h"
 #include <string>
 using namespace llvm;
 
 Regex::Regex(const StringRef &regex, unsigned Flags) {
   unsigned flags = 0;
-  preg = new struct llvm_regex;
+  preg = new llvm_regex();
   preg->re_endp = regex.end();
   if (Flags & IgnoreCase) 
     flags |= REG_ICASE;
@@ -60,7 +61,7 @@
   }
 
   // pmatch needs to have at least one element.
-  SmallVector<llvm_regmatch_t, 2> pm;
+  SmallVector<llvm_regmatch_t, 8> pm;
   pm.resize(nmatch > 0 ? nmatch : 1);
   pm[0].rm_so = 0;
   pm[0].rm_eo = String.size();

Modified: llvm/trunk/unittests/Support/RegexTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/RegexTest.cpp?rev=82717&r1=82716&r2=82717&view=diff

==============================================================================
--- llvm/trunk/unittests/Support/RegexTest.cpp (original)
+++ llvm/trunk/unittests/Support/RegexTest.cpp Thu Sep 24 16:47:32 2009
@@ -9,6 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/ADT/SmallVector.h"
 #include <cstring>
 
 using namespace llvm;

Modified: llvm/trunk/utils/FileCheck/FileCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/FileCheck/FileCheck.cpp?rev=82717&r1=82716&r2=82717&view=diff

==============================================================================
--- llvm/trunk/utils/FileCheck/FileCheck.cpp (original)
+++ llvm/trunk/utils/FileCheck/FileCheck.cpp Thu Sep 24 16:47:32 2009
@@ -19,6 +19,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
@@ -44,8 +45,9 @@
 //===----------------------------------------------------------------------===//
 
 class Pattern {
-  /// Str - The string to match.
-  StringRef Str;
+  /// Chunks - The pattern chunks to match.  If the bool is false, it is a fixed
+  /// string match, if it is true, it is a regex match.
+  SmallVector<std::pair<StringRef, bool>, 4> Chunks;
 public:
   
   Pattern() { }
@@ -55,10 +57,7 @@
   /// Match - Match the pattern string against the input buffer Buffer.  This
   /// returns the position that is matched or npos if there is no match.  If
   /// there is a match, the size of the matched string is returned in MatchLen.
-  size_t Match(StringRef Buffer, size_t &MatchLen) const {
-    MatchLen = Str.size();
-    return Buffer.find(Str);
-  }
+  size_t Match(StringRef Buffer, size_t &MatchLen) const;
 };
 
 bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
@@ -74,13 +73,119 @@
                     "error");
     return true;
   }
-
   
-  
-  Str = PatternStr;
+  // Scan the pattern to break it into regex and non-regex pieces.
+  while (!PatternStr.empty()) {
+    // Handle fixed string matches.
+    if (PatternStr.size() < 2 ||
+        PatternStr[0] != '{' || PatternStr[1] != '{') {
+      // Find the end, which is the start of the next regex.
+      size_t FixedMatchEnd = PatternStr.find("{{");
+      
+      Chunks.push_back(std::make_pair(PatternStr.substr(0, FixedMatchEnd),
+                                      false));
+      PatternStr = PatternStr.substr(FixedMatchEnd);
+      continue;
+    }
+    
+    // Otherwise, this is the start of a regex match.  Scan for the }}.
+    size_t End = PatternStr.find("}}");
+    if (End == StringRef::npos) {
+      SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                      "found start of regex string with no end '}}'", "error");
+      return true;
+    }
+    
+    Regex R(PatternStr.substr(2, End-2));
+    std::string Error;
+    if (!R.isValid(Error)) {
+      SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
+                      "invalid regex: " + Error, "error");
+      return true;
+    }
+    
+    Chunks.push_back(std::make_pair(PatternStr.substr(2, End-2), true));
+    PatternStr = PatternStr.substr(End+2);
+  }
+
   return false;
 }
 
+/// Match - Match the pattern string against the input buffer Buffer.  This
+/// returns the position that is matched or npos if there is no match.  If
+/// there is a match, the size of the matched string is returned in MatchLen.
+size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
+  size_t FirstMatch = StringRef::npos;
+  MatchLen = 0;
+  
+  SmallVector<StringRef, 4> MatchInfo;
+  
+  while (!Buffer.empty()) {
+    StringRef MatchAttempt = Buffer;
+    
+    unsigned ChunkNo = 0, e = Chunks.size();
+    for (; ChunkNo != e; ++ChunkNo) {
+      StringRef PatternStr = Chunks[ChunkNo].first;
+      
+      size_t ThisMatch = StringRef::npos;
+      size_t ThisLength = StringRef::npos;
+      if (!Chunks[ChunkNo].second) {
+        // Fixed string match.
+        ThisMatch = MatchAttempt.find(Chunks[ChunkNo].first);
+        ThisLength = Chunks[ChunkNo].first.size();
+      } else if (Regex(Chunks[ChunkNo].first, Regex::Sub).match(MatchAttempt, &MatchInfo)) {
+        // Successful regex match.
+        assert(!MatchInfo.empty() && "Didn't get any match");
+        StringRef FullMatch = MatchInfo[0];
+        MatchInfo.clear();
+        
+        ThisMatch = FullMatch.data()-MatchAttempt.data();
+        ThisLength = FullMatch.size();
+      }
+      
+      // Otherwise, what we do depends on if this is the first match or not.  If
+      // this is the first match, it doesn't match to match at the start of
+      // MatchAttempt.
+      if (ChunkNo == 0) {
+        // If the first match fails then this pattern will never match in
+        // Buffer.
+        if (ThisMatch == StringRef::npos)
+          return ThisMatch;
+        
+        FirstMatch = ThisMatch;
+        MatchAttempt = MatchAttempt.substr(FirstMatch);
+        ThisMatch = 0;
+      }
+      
+      // If this chunk didn't match, then the entire pattern didn't match from
+      // FirstMatch, try later in the buffer.
+      if (ThisMatch == StringRef::npos)
+        break;
+      
+      // Ok, if the match didn't match at the beginning of MatchAttempt, then we
+      // have something like "ABC{{DEF}} and something was in-between.  Reject
+      // the match.
+      if (ThisMatch != 0)
+        break;
+      
+      // Otherwise, match the string and move to the next chunk.
+      MatchLen += ThisLength;
+      MatchAttempt = MatchAttempt.substr(ThisLength);
+    }
+
+    // If the whole thing matched, we win.
+    if (ChunkNo == e)
+      return FirstMatch;
+    
+    // Otherwise, try matching again after FirstMatch to see if this pattern
+    // matches later in the buffer.
+    Buffer = Buffer.substr(FirstMatch+1);
+  }
+  
+  // If we ran out of stuff to scan, then we didn't match.
+  return StringRef::npos;
+}
+
 
 //===----------------------------------------------------------------------===//
 // Check Strings.
@@ -367,14 +472,14 @@
     
     // If this match had "not strings", verify that they don't exist in the
     // skipped region.
-    for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) {
+    for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) {
       size_t MatchLen = 0;
-      size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen);
+      size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen);
       if (Pos == StringRef::npos) continue;
      
       SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
                       CheckPrefix+"-NOT: string occurred!", "error");
-      SM.PrintMessage(CheckStr.NotStrings[i].first,
+      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
                       CheckPrefix+"-NOT: pattern specified here", "note");
       return 1;
     }





More information about the llvm-commits mailing list