[llvm-commits] [llvm] r82779 - in /llvm/trunk: test/CodeGen/X86/xor.ll utils/FileCheck/FileCheck.cpp
Chris Lattner
sabre at nondot.org
Fri Sep 25 10:23:43 PDT 2009
Author: lattner
Date: Fri Sep 25 12:23:43 2009
New Revision: 82779
URL: http://llvm.org/viewvc/llvm-project?rev=82779&view=rev
Log:
reimplement the regex matching strategy by building a single
regex and matching it instead of trying to match chunks at a time.
Matching chunks at a time broke with check lines like
CHECK: foo {{.*}}bar
because the .* would eat the entire rest of the line and bar would
never match.
Now we just escape the fixed strings for the user, so that something
like:
CHECK: a() {{.*}}???
is matched as:
CHECK: {{a\(\) .*\?\?\?}}
transparently "under the covers".
Modified:
llvm/trunk/test/CodeGen/X86/xor.ll
llvm/trunk/utils/FileCheck/FileCheck.cpp
Modified: llvm/trunk/test/CodeGen/X86/xor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xor.ll?rev=82779&r1=82778&r2=82779&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xor.ll Fri Sep 25 12:23:43 2009
@@ -59,10 +59,10 @@
; X64: test4:
; X64: notl %eax
-; X64: andl {{.*%eax}}
+; X64: andl {{.*}}%eax
; X32: test4:
; X32: notl %edx
-; X32: andl {{.*%edx}}
+; X32: andl {{.*}}%edx
}
define i16 @test5(i16 %a, i16 %b) nounwind {
@@ -81,10 +81,10 @@
ret i16 %tmp3
; X64: test5:
; X64: notw %ax
-; X64: andw {{.*%ax}}
+; X64: andw {{.*}}%ax
; X32: test5:
; X32: notw %dx
-; X32: andw {{.*%dx}}
+; X32: andw {{.*}}%dx
}
define i8 @test6(i8 %a, i8 %b) nounwind {
@@ -103,10 +103,10 @@
ret i8 %tmp3
; X64: test6:
; X64: notb %al
-; X64: andb {{.*%al}}
+; X64: andb {{.*}}%al
; X32: test6:
; X32: notb %dl
-; X32: andb {{.*%dl}}
+; X32: andb {{.*}}%dl
}
define i32 @test7(i32 %a, i32 %b) nounwind {
@@ -125,9 +125,9 @@
ret i32 %tmp3
; X64: test7:
; X64: xorl $2147483646, %eax
-; X64: andl {{.*%eax}}
+; X64: andl {{.*}}%eax
; X32: test7:
; X32: xorl $2147483646, %edx
-; X32: andl {{.*%edx}}
+; X32: andl {{.*}}%edx
}
Modified: llvm/trunk/utils/FileCheck/FileCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/FileCheck/FileCheck.cpp?rev=82779&r1=82778&r2=82779&view=diff
==============================================================================
--- llvm/trunk/utils/FileCheck/FileCheck.cpp (original)
+++ llvm/trunk/utils/FileCheck/FileCheck.cpp Fri Sep 25 12:23:43 2009
@@ -44,39 +44,13 @@
// Pattern Handling Code.
//===----------------------------------------------------------------------===//
-class PatternChunk {
- StringRef Str;
- bool isRegEx;
-public:
- PatternChunk(StringRef S, bool isRE) : Str(S), isRegEx(isRE) {}
-
- size_t Match(StringRef Buffer, size_t &MatchLen) const {
- if (!isRegEx) {
- // Fixed string match.
- MatchLen = Str.size();
- return Buffer.find(Str);
- }
-
- // Regex match.
- SmallVector<StringRef, 4> MatchInfo;
- if (!Regex(Str, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo))
- return StringRef::npos;
-
- // Successful regex match.
- assert(!MatchInfo.empty() && "Didn't get any match");
- StringRef FullMatch = MatchInfo[0];
-
- MatchLen = FullMatch.size();
- return FullMatch.data()-Buffer.data();
- }
-};
-
class Pattern {
- /// Chunks - The pattern chunks to match. If the bool is false, it is a fixed
- /// string match, if it is true, it is a regex match.
- SmallVector<PatternChunk, 4> Chunks;
-
+ /// FixedStr - If non-empty, this pattern is a fixed string match with the
+ /// specified fixed string.
StringRef FixedStr;
+
+ /// RegEx - If non-empty, this is a regex pattern.
+ std::string RegExStr;
public:
Pattern() { }
@@ -87,6 +61,9 @@
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
size_t Match(StringRef Buffer, size_t &MatchLen) const;
+
+private:
+ void AddFixedStringToRegEx(StringRef FixedStr);
};
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
@@ -109,17 +86,15 @@
return false;
}
- // Otherwise, there is at least one regex piece.
-
- // Scan the pattern to break it into regex and non-regex pieces.
+ // Otherwise, there is at least one regex piece. Build up the regex pattern
+ // by escaping scary characters in fixed strings, building up one big regex.
while (!PatternStr.empty()) {
// Handle fixed string matches.
if (PatternStr.size() < 2 ||
PatternStr[0] != '{' || PatternStr[1] != '{') {
// Find the end, which is the start of the next regex.
size_t FixedMatchEnd = PatternStr.find("{{");
-
- Chunks.push_back(PatternChunk(PatternStr.substr(0, FixedMatchEnd),false));
+ AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd));
PatternStr = PatternStr.substr(FixedMatchEnd);
continue;
}
@@ -132,7 +107,8 @@
return true;
}
- Regex R(PatternStr.substr(2, End-2));
+ StringRef RegexStr = PatternStr.substr(2, End-2);
+ Regex R(RegexStr);
std::string Error;
if (!R.isValid(Error)) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
@@ -140,13 +116,41 @@
return true;
}
- Chunks.push_back(PatternChunk(PatternStr.substr(2, End-2), true));
+ RegExStr += RegexStr.str();
PatternStr = PatternStr.substr(End+2);
}
return false;
}
+void Pattern::AddFixedStringToRegEx(StringRef FixedStr) {
+ // Add the characters from FixedStr to the regex, escaping as needed. This
+ // avoids "leaning toothpicks" in common patterns.
+ for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
+ switch (FixedStr[i]) {
+ // These are the special characters matched in "p_ere_exp".
+ case '(':
+ case ')':
+ case '^':
+ case '$':
+ case '|':
+ case '*':
+ case '+':
+ case '?':
+ case '.':
+ case '[':
+ case '\\':
+ case '{':
+ RegExStr += '\\';
+ // FALL THROUGH.
+ default:
+ RegExStr += FixedStr[i];
+ break;
+ }
+ }
+}
+
+
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
@@ -157,58 +161,17 @@
return Buffer.find(FixedStr);
}
- size_t FirstMatch = StringRef::npos;
- MatchLen = 0;
-
- while (!Buffer.empty()) {
- StringRef MatchAttempt = Buffer;
-
- unsigned ChunkNo = 0, e = Chunks.size();
- for (; ChunkNo != e; ++ChunkNo) {
- size_t ThisMatch, ThisLength = StringRef::npos;
- ThisMatch = Chunks[ChunkNo].Match(MatchAttempt, ThisLength);
-
- // Otherwise, what we do depends on if this is the first match or not. If
- // this is the first match, it doesn't match to match at the start of
- // MatchAttempt.
- if (ChunkNo == 0) {
- // If the first match fails then this pattern will never match in
- // Buffer.
- if (ThisMatch == StringRef::npos)
- return ThisMatch;
-
- FirstMatch = ThisMatch;
- MatchAttempt = MatchAttempt.substr(FirstMatch);
- ThisMatch = 0;
- }
-
- // If this chunk didn't match, then the entire pattern didn't match from
- // FirstMatch, try later in the buffer.
- if (ThisMatch == StringRef::npos)
- break;
-
- // Ok, if the match didn't match at the beginning of MatchAttempt, then we
- // have something like "ABC{{DEF}} and something was in-between. Reject
- // the match.
- if (ThisMatch != 0)
- break;
-
- // Otherwise, match the string and move to the next chunk.
- MatchLen += ThisLength;
- MatchAttempt = MatchAttempt.substr(ThisLength);
- }
-
- // If the whole thing matched, we win.
- if (ChunkNo == e)
- return FirstMatch;
-
- // Otherwise, try matching again after FirstMatch to see if this pattern
- // matches later in the buffer.
- Buffer = Buffer.substr(FirstMatch+1);
- }
+ // Regex match.
+ SmallVector<StringRef, 4> MatchInfo;
+ if (!Regex(RegExStr, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo))
+ return StringRef::npos;
+
+ // Successful regex match.
+ assert(!MatchInfo.empty() && "Didn't get any match");
+ StringRef FullMatch = MatchInfo[0];
- // If we ran out of stuff to scan, then we didn't match.
- return StringRef::npos;
+ MatchLen = FullMatch.size();
+ return FullMatch.data()-Buffer.data();
}
More information about the llvm-commits
mailing list