[Mlir-commits] [mlir] 4a219bf - Fix a bug in the .mlir lexer, where a \0 character in a file is treated as a colon (due to an accidental fall through) instead of whitespace.
Chris Lattner
llvmlistbot at llvm.org
Mon Mar 23 17:35:24 PDT 2020
Author: Chris Lattner
Date: 2020-03-23T17:35:17-07:00
New Revision: 4a219bf7ff6fbe5cbd3b0c2120c8fe471e3f60fa
URL: https://github.com/llvm/llvm-project/commit/4a219bf7ff6fbe5cbd3b0c2120c8fe471e3f60fa
DIFF: https://github.com/llvm/llvm-project/commit/4a219bf7ff6fbe5cbd3b0c2120c8fe471e3f60fa.diff
LOG: Fix a bug in the .mlir lexer, where a \0 character in a file is treated as a colon (due to an accidental fall through) instead of whitespace.
Summary:
While here, simplify the lexer a bit by eliminating the unneeded 'operator'
classification of certain sigils, they can just be treated as 'punctuation'.
Reviewers: rriddle!
Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76647
Added:
Modified:
mlir/lib/Parser/Lexer.cpp
mlir/lib/Parser/Token.cpp
mlir/lib/Parser/Token.h
mlir/lib/Parser/TokenKinds.def
mlir/test/IR/parser.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp
index 697af7392fb2..9a3418eaf832 100644
--- a/mlir/lib/Parser/Lexer.cpp
+++ b/mlir/lib/Parser/Lexer.cpp
@@ -83,8 +83,8 @@ Token Lexer::lexToken() {
// marker that llvm::MemoryBuffer guarantees will be there.
if (curPtr - 1 == curBuffer.end())
return formToken(Token::eof, tokStart);
+ continue;
- LLVM_FALLTHROUGH;
case ':':
return formToken(Token::colon, tokStart);
case ',':
diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp
index 8fe16b05fde6..b619af08c433 100644
--- a/mlir/lib/Parser/Token.cpp
+++ b/mlir/lib/Parser/Token.cpp
@@ -145,9 +145,6 @@ StringRef Token::getTokenSpelling(Kind kind) {
#define TOK_PUNCTUATION(NAME, SPELLING) \
case NAME: \
return SPELLING;
-#define TOK_OPERATOR(NAME, SPELLING) \
- case NAME: \
- return SPELLING;
#define TOK_KEYWORD(SPELLING) \
case kw_##SPELLING: \
return #SPELLING;
diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h
index e6fa6c70853f..7952aca4546b 100644
--- a/mlir/lib/Parser/Token.h
+++ b/mlir/lib/Parser/Token.h
@@ -23,7 +23,6 @@ class Token {
#define TOK_IDENTIFIER(NAME) NAME,
#define TOK_LITERAL(NAME) NAME,
#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
-#define TOK_OPERATOR(NAME, SPELLING) NAME,
#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
#include "TokenKinds.def"
};
@@ -50,7 +49,8 @@ class Token {
bool isNot(Kind k) const { return kind != k; }
/// Return true if this token isn't one of the specified kinds.
- template <typename... T> bool isNot(Kind k1, Kind k2, T... others) const {
+ template <typename... T>
+ bool isNot(Kind k1, Kind k2, T... others) const {
return !isAny(k1, k2, others...);
}
diff --git a/mlir/lib/Parser/TokenKinds.def b/mlir/lib/Parser/TokenKinds.def
index 47c43f6522fb..0ec0c0ebf7bf 100644
--- a/mlir/lib/Parser/TokenKinds.def
+++ b/mlir/lib/Parser/TokenKinds.def
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && !defined(TOK_LITERAL)&&\
- !defined(TOK_PUNCTUATION) && !defined(TOK_OPERATOR) && !defined(TOK_KEYWORD)
-# error Must define one of the TOK_ macros.
+#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && \
+ !defined(TOK_LITERAL) && !defined(TOK_PUNCTUATION) && \
+ !defined(TOK_KEYWORD)
+#error Must define one of the TOK_ macros.
#endif
#ifndef TOK_MARKER
@@ -28,14 +29,10 @@
#ifndef TOK_PUNCTUATION
#define TOK_PUNCTUATION(NAME, SPELLING)
#endif
-#ifndef TOK_OPERATOR
-#define TOK_OPERATOR(NAME, SPELLING)
-#endif
#ifndef TOK_KEYWORD
#define TOK_KEYWORD(SPELLING)
#endif
-
// Markers
TOK_MARKER(eof)
TOK_MARKER(error)
@@ -49,34 +46,30 @@ TOK_IDENTIFIER(caret_identifier) // ^foo
TOK_IDENTIFIER(exclamation_identifier) // !foo
// Literals
-TOK_LITERAL(floatliteral) // 2.0
-TOK_LITERAL(integer) // 42
-TOK_LITERAL(string) // "foo"
-TOK_LITERAL(inttype) // i4, si8, ui16
+TOK_LITERAL(floatliteral) // 2.0
+TOK_LITERAL(integer) // 42
+TOK_LITERAL(string) // "foo"
+TOK_LITERAL(inttype) // i4, si8, ui16
// Punctuation.
-TOK_PUNCTUATION(arrow, "->")
-TOK_PUNCTUATION(at, "@")
-TOK_PUNCTUATION(colon, ":")
-TOK_PUNCTUATION(comma, ",")
-TOK_PUNCTUATION(question, "?")
-TOK_PUNCTUATION(l_paren, "(")
-TOK_PUNCTUATION(r_paren, ")")
-TOK_PUNCTUATION(l_brace, "{")
-TOK_PUNCTUATION(r_brace, "}")
-TOK_PUNCTUATION(l_square, "[")
-TOK_PUNCTUATION(r_square, "]")
-TOK_PUNCTUATION(less, "<")
-TOK_PUNCTUATION(greater, ">")
-TOK_PUNCTUATION(equal, "=")
-TOK_PUNCTUATION(ellipsis, "...")
-// TODO: More punctuation.
-
-// Operators.
-TOK_OPERATOR(plus, "+")
-TOK_OPERATOR(minus, "-")
-TOK_OPERATOR(star, "*")
-// TODO: More operator tokens
+TOK_PUNCTUATION(arrow, "->")
+TOK_PUNCTUATION(at, "@")
+TOK_PUNCTUATION(colon, ":")
+TOK_PUNCTUATION(comma, ",")
+TOK_PUNCTUATION(ellipsis, "...")
+TOK_PUNCTUATION(equal, "=")
+TOK_PUNCTUATION(greater, ">")
+TOK_PUNCTUATION(l_brace, "{")
+TOK_PUNCTUATION(l_paren, "(")
+TOK_PUNCTUATION(l_square, "[")
+TOK_PUNCTUATION(less, "<")
+TOK_PUNCTUATION(minus, "-")
+TOK_PUNCTUATION(plus, "+")
+TOK_PUNCTUATION(question, "?")
+TOK_PUNCTUATION(r_brace, "}")
+TOK_PUNCTUATION(r_paren, ")")
+TOK_PUNCTUATION(r_square, "]")
+TOK_PUNCTUATION(star, "*")
// Keywords. These turn "foo" into Token::kw_foo enums.
@@ -122,5 +115,4 @@ TOK_KEYWORD(vector)
#undef TOK_IDENTIFIER
#undef TOK_LITERAL
#undef TOK_PUNCTUATION
-#undef TOK_OPERATOR
#undef TOK_KEYWORD
diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index 253c1cc3e745..45ee1e1d89cd 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -1225,3 +1225,9 @@ func @pretty_names() {
return
}
+// CHECK-LABEL: func @zero_whitespace() {
+// CHECK-NEXT: return
+func @zero_whitespace() {
+ // This is a \0 byte.
+ return
+}
More information about the Mlir-commits
mailing list