[llvm-commits] [llvm] r82779 - in /llvm/trunk: test/CodeGen/X86/xor.ll utils/FileCheck/FileCheck.cpp

Daniel Dunbar daniel at zuster.org
Fri Sep 25 11:48:51 PDT 2009


Nice -/+ ratio! :)

 - Daniel

On Fri, Sep 25, 2009 at 10:23 AM, Chris Lattner <sabre at nondot.org> wrote:
> Author: lattner
> Date: Fri Sep 25 12:23:43 2009
> New Revision: 82779
>
> URL: http://llvm.org/viewvc/llvm-project?rev=82779&view=rev
> Log:
> reimplement the regex matching strategy by building a single
> regex and matching it instead of trying to match chunks at a time.
> Matching chunks at a time broke with check lines like
>  CHECK: foo {{.*}}bar
> because the .* would eat the entire rest of the line and bar would
> never match.
>
> Now we just escape the fixed strings for the user, so that something
> like:
>  CHECK: a() {{.*}}???
> is matched as:
>  CHECK: {{a\(\) .*\?\?\?}}
> transparently "under the covers".
>
>
> Modified:
>    llvm/trunk/test/CodeGen/X86/xor.ll
>    llvm/trunk/utils/FileCheck/FileCheck.cpp
>
> Modified: llvm/trunk/test/CodeGen/X86/xor.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xor.ll?rev=82779&r1=82778&r2=82779&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/xor.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/xor.ll Fri Sep 25 12:23:43 2009
> @@ -59,10 +59,10 @@
>
>  ; X64: test4:
>  ; X64:    notl %eax
> -; X64:    andl {{.*%eax}}
> +; X64:    andl {{.*}}%eax
>  ; X32: test4:
>  ; X32:    notl %edx
> -; X32:    andl {{.*%edx}}
> +; X32:    andl {{.*}}%edx
>  }
>
>  define i16 @test5(i16 %a, i16 %b) nounwind  {
> @@ -81,10 +81,10 @@
>        ret i16 %tmp3
>  ; X64: test5:
>  ; X64:    notw %ax
> -; X64:    andw {{.*%ax}}
> +; X64:    andw {{.*}}%ax
>  ; X32: test5:
>  ; X32:    notw %dx
> -; X32:    andw {{.*%dx}}
> +; X32:    andw {{.*}}%dx
>  }
>
>  define i8 @test6(i8 %a, i8 %b) nounwind  {
> @@ -103,10 +103,10 @@
>        ret i8 %tmp3
>  ; X64: test6:
>  ; X64:    notb %al
> -; X64:    andb {{.*%al}}
> +; X64:    andb {{.*}}%al
>  ; X32: test6:
>  ; X32:    notb %dl
> -; X32:    andb {{.*%dl}}
> +; X32:    andb {{.*}}%dl
>  }
>
>  define i32 @test7(i32 %a, i32 %b) nounwind  {
> @@ -125,9 +125,9 @@
>        ret i32 %tmp3
>  ; X64: test7:
>  ; X64:    xorl $2147483646, %eax
> -; X64:    andl {{.*%eax}}
> +; X64:    andl {{.*}}%eax
>  ; X32: test7:
>  ; X32:    xorl $2147483646, %edx
> -; X32:    andl {{.*%edx}}
> +; X32:    andl {{.*}}%edx
>  }
>
>
> Modified: llvm/trunk/utils/FileCheck/FileCheck.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/FileCheck/FileCheck.cpp?rev=82779&r1=82778&r2=82779&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/FileCheck/FileCheck.cpp (original)
> +++ llvm/trunk/utils/FileCheck/FileCheck.cpp Fri Sep 25 12:23:43 2009
> @@ -44,39 +44,13 @@
>  // Pattern Handling Code.
>  //===----------------------------------------------------------------------===//
>
> -class PatternChunk {
> -  StringRef Str;
> -  bool isRegEx;
> -public:
> -  PatternChunk(StringRef S, bool isRE) : Str(S), isRegEx(isRE) {}
> -
> -  size_t Match(StringRef Buffer, size_t &MatchLen) const {
> -    if (!isRegEx) {
> -      // Fixed string match.
> -      MatchLen = Str.size();
> -      return Buffer.find(Str);
> -    }
> -
> -    // Regex match.
> -    SmallVector<StringRef, 4> MatchInfo;
> -    if (!Regex(Str, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo))
> -      return StringRef::npos;
> -
> -    // Successful regex match.
> -    assert(!MatchInfo.empty() && "Didn't get any match");
> -    StringRef FullMatch = MatchInfo[0];
> -
> -    MatchLen = FullMatch.size();
> -    return FullMatch.data()-Buffer.data();
> -  }
> -};
> -
>  class Pattern {
> -  /// Chunks - The pattern chunks to match.  If the bool is false, it is a fixed
> -  /// string match, if it is true, it is a regex match.
> -  SmallVector<PatternChunk, 4> Chunks;
> -
> +  /// FixedStr - If non-empty, this pattern is a fixed string match with the
> +  /// specified fixed string.
>   StringRef FixedStr;
> +
> +  /// RegEx - If non-empty, this is a regex pattern.
> +  std::string RegExStr;
>  public:
>
>   Pattern() { }
> @@ -87,6 +61,9 @@
>   /// returns the position that is matched or npos if there is no match.  If
>   /// there is a match, the size of the matched string is returned in MatchLen.
>   size_t Match(StringRef Buffer, size_t &MatchLen) const;
> +
> +private:
> +  void AddFixedStringToRegEx(StringRef FixedStr);
>  };
>
>  bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
> @@ -109,17 +86,15 @@
>     return false;
>   }
>
> -  // Otherwise, there is at least one regex piece.
> -
> -  // Scan the pattern to break it into regex and non-regex pieces.
> +  // Otherwise, there is at least one regex piece.  Build up the regex pattern
> +  // by escaping scary characters in fixed strings, building up one big regex.
>   while (!PatternStr.empty()) {
>     // Handle fixed string matches.
>     if (PatternStr.size() < 2 ||
>         PatternStr[0] != '{' || PatternStr[1] != '{') {
>       // Find the end, which is the start of the next regex.
>       size_t FixedMatchEnd = PatternStr.find("{{");
> -
> -      Chunks.push_back(PatternChunk(PatternStr.substr(0, FixedMatchEnd),false));
> +      AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd));
>       PatternStr = PatternStr.substr(FixedMatchEnd);
>       continue;
>     }
> @@ -132,7 +107,8 @@
>       return true;
>     }
>
> -    Regex R(PatternStr.substr(2, End-2));
> +    StringRef RegexStr = PatternStr.substr(2, End-2);
> +    Regex R(RegexStr);
>     std::string Error;
>     if (!R.isValid(Error)) {
>       SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
> @@ -140,13 +116,41 @@
>       return true;
>     }
>
> -    Chunks.push_back(PatternChunk(PatternStr.substr(2, End-2), true));
> +    RegExStr += RegexStr.str();
>     PatternStr = PatternStr.substr(End+2);
>   }
>
>   return false;
>  }
>
> +void Pattern::AddFixedStringToRegEx(StringRef FixedStr) {
> +  // Add the characters from FixedStr to the regex, escaping as needed.  This
> +  // avoids "leaning toothpicks" in common patterns.
> +  for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
> +    switch (FixedStr[i]) {
> +    // These are the special characters matched in "p_ere_exp".
> +    case '(':
> +    case ')':
> +    case '^':
> +    case '$':
> +    case '|':
> +    case '*':
> +    case '+':
> +    case '?':
> +    case '.':
> +    case '[':
> +    case '\\':
> +    case '{':
> +      RegExStr += '\\';
> +      // FALL THROUGH.
> +    default:
> +      RegExStr += FixedStr[i];
> +      break;
> +    }
> +  }
> +}
> +
> +
>  /// Match - Match the pattern string against the input buffer Buffer.  This
>  /// returns the position that is matched or npos if there is no match.  If
>  /// there is a match, the size of the matched string is returned in MatchLen.
> @@ -157,58 +161,17 @@
>     return Buffer.find(FixedStr);
>   }
>
> -  size_t FirstMatch = StringRef::npos;
> -  MatchLen = 0;
> -
> -  while (!Buffer.empty()) {
> -    StringRef MatchAttempt = Buffer;
> -
> -    unsigned ChunkNo = 0, e = Chunks.size();
> -    for (; ChunkNo != e; ++ChunkNo) {
> -      size_t ThisMatch, ThisLength = StringRef::npos;
> -      ThisMatch = Chunks[ChunkNo].Match(MatchAttempt, ThisLength);
> -
> -      // Otherwise, what we do depends on if this is the first match or not.  If
> -      // this is the first match, it doesn't match to match at the start of
> -      // MatchAttempt.
> -      if (ChunkNo == 0) {
> -        // If the first match fails then this pattern will never match in
> -        // Buffer.
> -        if (ThisMatch == StringRef::npos)
> -          return ThisMatch;
> -
> -        FirstMatch = ThisMatch;
> -        MatchAttempt = MatchAttempt.substr(FirstMatch);
> -        ThisMatch = 0;
> -      }
> -
> -      // If this chunk didn't match, then the entire pattern didn't match from
> -      // FirstMatch, try later in the buffer.
> -      if (ThisMatch == StringRef::npos)
> -        break;
> -
> -      // Ok, if the match didn't match at the beginning of MatchAttempt, then we
> -      // have something like "ABC{{DEF}} and something was in-between.  Reject
> -      // the match.
> -      if (ThisMatch != 0)
> -        break;
> -
> -      // Otherwise, match the string and move to the next chunk.
> -      MatchLen += ThisLength;
> -      MatchAttempt = MatchAttempt.substr(ThisLength);
> -    }
> -
> -    // If the whole thing matched, we win.
> -    if (ChunkNo == e)
> -      return FirstMatch;
> -
> -    // Otherwise, try matching again after FirstMatch to see if this pattern
> -    // matches later in the buffer.
> -    Buffer = Buffer.substr(FirstMatch+1);
> -  }
> +  // Regex match.
> +  SmallVector<StringRef, 4> MatchInfo;
> +  if (!Regex(RegExStr, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo))
> +    return StringRef::npos;
> +
> +  // Successful regex match.
> +  assert(!MatchInfo.empty() && "Didn't get any match");
> +  StringRef FullMatch = MatchInfo[0];
>
> -  // If we ran out of stuff to scan, then we didn't match.
> -  return StringRef::npos;
> +  MatchLen = FullMatch.size();
> +  return FullMatch.data()-Buffer.data();
>  }
>
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>




More information about the llvm-commits mailing list