[Mlir-commits] [mlir] ad3b358 - [MLIR Parser] Improve QoI for "expected token" errors

Tue May 10 07:44:24 PDT 2022

Author: Chris Lattner
Date: 2022-05-10T15:44:17+01:00
New Revision: ad3b358180e8e1d723b790cf5f68264e486c0b1a

URL: https://github.com/llvm/llvm-project/commit/ad3b358180e8e1d723b790cf5f68264e486c0b1a
DIFF: https://github.com/llvm/llvm-project/commit/ad3b358180e8e1d723b790cf5f68264e486c0b1a.diff

LOG: [MLIR Parser] Improve QoI for "expected token" errors

A typical problem with missing a token is that the missing
token is at the end of a line.  The problem with this is that
the error message gets reported on the start of the following
line (which is where the next / invalid token is) which can
be confusing.

Handle this by noticing this case and backing up to the end of
the previous line.

Differential Revision: https://reviews.llvm.org/D125295

Added: 
    

Modified: 
    mlir/lib/Parser/Parser.cpp
    mlir/lib/Parser/Parser.h
    mlir/test/Dialect/ControlFlow/invalid.mlir
    mlir/test/Dialect/LLVMIR/global.mlir
    mlir/test/Dialect/Linalg/invalid.mlir
    mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir
    mlir/test/Dialect/SPIRV/IR/logical-ops.mlir
    mlir/test/Dialect/SPIRV/IR/memory-ops.mlir
    mlir/test/Dialect/SPIRV/IR/misc-ops.mlir
    mlir/test/IR/invalid-locations.mlir
    mlir/test/IR/invalid.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp
index db891f39d289a..cc1d08699ecce 100644

--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@@ -134,7 +134,7 @@ Parser::parseCommaSeparatedListUntil(Token::Kind rightToken,
   // Handle the empty case.
   if (getToken().is(rightToken)) {
     if (!allowEmptyList)
-      return emitError("expected list element");
+      return emitWrongTokenError("expected list element");
     consumeToken(rightToken);
     return success();
   }
@@ -147,6 +147,15 @@ Parser::parseCommaSeparatedListUntil(Token::Kind rightToken,
   return success();
 }
 
+InFlightDiagnostic Parser::emitError(const Twine &message) {
+  auto loc = state.curToken.getLoc();
+  if (state.curToken.isNot(Token::eof))
+    return emitError(loc, message);
+
+  // If the error is to be emitted at EOF, move it back one character.
+  return emitError(SMLoc::getFromPointer(loc.getPointer() - 1), message);
+}
+
 InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
   auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);
 
@@ -157,13 +166,55 @@ InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
   return diag;
 }
 
+/// Emit an error about a "wrong token".  If the current token is at the
+/// start of a source line, this will apply heuristics to back up and report
+/// the error at the end of the previous line, which is where the expected
+/// token is supposed to be.
+InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) {
+  auto loc = state.curToken.getLoc();
+
+  // If the error is to be emitted at EOF, move it back one character.
+  if (state.curToken.is(Token::eof))
+    loc = SMLoc::getFromPointer(loc.getPointer() - 1);
+
+  // Determine if the token is at the start of the current line.
+  const char *bufferStart = state.lex.getBufferBegin();
+  const char *curPtr = loc.getPointer();
+
+  // Back up over entirely blank lines.
+  while (1) {
+    // Back up until we see a \n, but don't look past the buffer start.
+    curPtr = StringRef(bufferStart, curPtr - bufferStart).rtrim(" \t").end();
+
+    // For tokens with no preceding source line, just emit at the original
+    // location.
+    if (curPtr == bufferStart || curPtr[-1] != '\n')
+      return emitError(loc, message);
+
+    // Check to see if the preceding line has a comment on it.  We assume that a
+    // `//` is the start of a comment, which is mostly correct.
+    // TODO: This will do the wrong thing for // in a string literal.
+    --curPtr;
+    auto prevLine = StringRef(bufferStart, curPtr - bufferStart);
+    size_t newLineIndex = prevLine.rfind('\n');
+    if (newLineIndex != StringRef::npos)
+      prevLine = prevLine.drop_front(newLineIndex);
+    size_t commentStart = prevLine.find("//");
+    if (commentStart != StringRef::npos)
+      curPtr = prevLine.begin() + commentStart;
+
+    // Otherwise, we can move backwards at least this line.
+    loc = SMLoc::getFromPointer(curPtr);
+  }
+}
+
 /// Consume the specified token if present and return success.  On failure,
 /// output a diagnostic and return failure.
 ParseResult Parser::parseToken(Token::Kind expectedToken,
                                const Twine &message) {
   if (consumeIf(expectedToken))
     return success();
-  return emitError(message);
+  return emitWrongTokenError(message);
 }
 
 /// Parse an optional integer value from the stream.
@@ -872,23 +923,23 @@ ParseResult OperationParser::parseOperation() {
     // Parse the group of result ids.
     auto parseNextResult = [&]() -> ParseResult {
       // Parse the next result id.
-      if (!getToken().is(Token::percent_identifier))
-        return emitError("expected valid ssa identifier");
-
       Token nameTok = getToken();
-      consumeToken(Token::percent_identifier);
+      if (parseToken(Token::percent_identifier,
+                     "expected valid ssa identifier"))
+        return failure();
 
       // If the next token is a ':', we parse the expected result count.
       size_t expectedSubResults = 1;
       if (consumeIf(Token::colon)) {
         // Check that the next token is an integer.
         if (!getToken().is(Token::integer))
-          return emitError("expected integer number of results");
+          return emitWrongTokenError("expected integer number of results");
 
         // Check that number of results is > 0.
         auto val = getToken().getUInt64IntegerValue();
         if (!val.hasValue() || val.getValue() < 1)
-          return emitError("expected named operation to have atleast 1 result");
+          return emitError(
+              "expected named operation to have at least 1 result");
         consumeToken(Token::integer);
         expectedSubResults = *val;
       }
@@ -912,7 +963,7 @@ ParseResult OperationParser::parseOperation() {
   else if (nameTok.is(Token::string))
     op = parseGenericOperation();
   else
-    return emitError("expected operation name in quotes");
+    return emitWrongTokenError("expected operation name in quotes");
 
   // If parsing of the basic operation failed, then this whole thing fails.
   if (!op)
@@ -967,7 +1018,7 @@ ParseResult OperationParser::parseOperation() {
 ParseResult OperationParser::parseSuccessor(Block *&dest) {
   // Verify branch is identifier and get the matching block.
   if (!getToken().is(Token::caret_identifier))
-    return emitError("expected block name");
+    return emitWrongTokenError("expected block name");
   dest = getBlockNamed(getTokenSpelling(), getToken().getLoc());
   consumeToken();
   return success();
@@ -1296,8 +1347,6 @@ class CustomOpAsmParser : public AsmParserImpl<OpAsmParser> {
                                Delimiter delimiter = Delimiter::None,
                                bool allowResultNumber = true,
                                int requiredOperandCount = -1) override {
-    auto startLoc = parser.getToken().getLoc();
-
     // The no-delimiter case has some special handling for better diagnostics.
     if (delimiter == Delimiter::None) {
       // parseCommaSeparatedList doesn't handle the missing case for "none",
@@ -1309,10 +1358,9 @@ class CustomOpAsmParser : public AsmParserImpl<OpAsmParser> {
           return success();
 
         // Otherwise, try to produce a nice error message.
-        if (parser.getToken().is(Token::l_paren) ||
-            parser.getToken().is(Token::l_square))
-          return emitError(startLoc, "unexpected delimiter");
-        return emitError(startLoc, "invalid operand");
+        if (parser.getToken().isAny(Token::l_paren, Token::l_square))
+          return parser.emitError("unexpected delimiter");
+        return parser.emitWrongTokenError("expected operand");
       }
     }
 
@@ -1320,6 +1368,7 @@ class CustomOpAsmParser : public AsmParserImpl<OpAsmParser> {
       return parseOperand(result.emplace_back(), allowResultNumber);
     };
 
+    auto startLoc = parser.getToken().getLoc();
     if (parseCommaSeparatedList(delimiter, parseOneOperand, " in operand list"))
       return failure();
 

diff  --git a/mlir/lib/Parser/Parser.h b/mlir/lib/Parser/Parser.h
index 3abee1fb64744..c8f1c058dc252 100644
--- a/mlir/lib/Parser/Parser.h
+++ b/mlir/lib/Parser/Parser.h
@@ -68,17 +68,15 @@ class Parser {
   //===--------------------------------------------------------------------===//
 
   /// Emit an error and return failure.
-  InFlightDiagnostic emitError(const Twine &message = {}) {
-    // If the error is to be emitted at EOF, move it back one character.
-    if (state.curToken.is(Token::eof)) {
-      return emitError(
-          SMLoc::getFromPointer(state.curToken.getLoc().getPointer() - 1),
-          message);
-    }
-    return emitError(state.curToken.getLoc(), message);
-  }
+  InFlightDiagnostic emitError(const Twine &message = {});
   InFlightDiagnostic emitError(SMLoc loc, const Twine &message = {});
 
+  /// Emit an error about a "wrong token".  If the current token is at the
+  /// start of a source line, this will apply heuristics to back up and report
+  /// the error at the end of the previous line, which is where the expected
+  /// token is supposed to be.
+  InFlightDiagnostic emitWrongTokenError(const Twine &message = {});
+
   /// Encode the specified source location information into an attribute for
   /// attachment to the IR.
   Location getEncodedSourceLocation(SMLoc loc) {

diff  --git a/mlir/test/Dialect/ControlFlow/invalid.mlir b/mlir/test/Dialect/ControlFlow/invalid.mlir
index fd95e97e208cf..b51d8095c9974 100644
--- a/mlir/test/Dialect/ControlFlow/invalid.mlir
+++ b/mlir/test/Dialect/ControlFlow/invalid.mlir
@@ -38,8 +38,8 @@ func.func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
 func.func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
   cf.switch %flag : i32, [
     default: ^bb1(%caseOperand : i32),
-    45: ^bb2(%caseOperand : i32)
     // expected-error at +1 {{expected ']'}}
+    45: ^bb2(%caseOperand : i32)
     43: ^bb3(%caseOperand : i32)
   ]
 

diff  --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir
index 7f582df959936..b53f6d4cd2835 100644
--- a/mlir/test/Dialect/LLVMIR/global.mlir
+++ b/mlir/test/Dialect/LLVMIR/global.mlir
@@ -144,7 +144,7 @@ llvm.mlir.global internal @more_than_one_type(0) : i64, i32
 llvm.mlir.global internal @foo(0: i32) : i32
 
 func.func @bar() {
-  // expected-error @+2{{expected ':'}}
+  // expected-error @+1{{expected ':'}}
   llvm.mlir.addressof @foo
 }
 

diff  --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index f58deab260343..6881dce30a9fd 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -60,7 +60,7 @@ func.func @index_dim_negative(%arg0: memref<f32>) {
 // -----
 
 func.func @generic_no_region(%arg0: memref<f32>) {
-  // expected-error @+5 {{expected '{' to begin a region}}
+  // expected-error @+4 {{expected '{' to begin a region}}
   linalg.generic {
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = []

diff  --git a/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir b/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir
index dbdc152c3ca90..4a2c9fd9dbcb7 100644
--- a/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir
@@ -24,7 +24,7 @@ func.func @branch_argument() -> () {
 // -----
 
 func.func @missing_accessor() -> () {
-  // expected-error @+2 {{expected block name}}
+  // expected-error @+1 {{expected block name}}
   spv.Branch
 }
 

diff  --git a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir
index acc5efc9b039e..a4d5eb3cd868e 100644
--- a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir
@@ -90,7 +90,7 @@ func.func @logicalBinary2(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>)
 
 func.func @logicalBinary(%arg0 : i1, %arg1 : i1)
 {
-  // expected-error @+2 {{expected ':'}}
+  // expected-error @+1 {{expected ':'}}
   %0 = spv.LogicalAnd %arg0, %arg1
   return
 }
@@ -139,7 +139,7 @@ func.func @logicalUnary2(%arg0 : vector<4xi1>)
 
 func.func @logicalUnary(%arg0 : i1)
 {
-  // expected-error @+2 {{expected ':'}}
+  // expected-error @+1 {{expected ':'}}
   %0 = spv.LogicalNot %arg0
   return
 }
@@ -230,7 +230,7 @@ func.func @select_op_vec_condn_vec(%arg0: vector<3xi1>) -> () {
 func.func @select_op(%arg0: i1) -> () {
   %0 = spv.Constant 2 : i32
   %1 = spv.Constant 3 : i32
-  // expected-error @+2 {{expected ','}}
+  // expected-error @+1 {{expected ','}}
   %2 = spv.Select %arg0, %0, %1 : i1
   return
 }

diff  --git a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir
index 5bc438c450aa6..0ff8235e52fdc 100644
--- a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir
@@ -251,7 +251,7 @@ func.func @simple_load_missing_operand() -> () {
 
 func.func @simple_load_missing_rettype() -> () {
   %0 = spv.Variable : !spv.ptr<f32, Function>
-  // expected-error @+2 {{expected ':'}}
+  // expected-error @+1 {{expected ':'}}
   %1 = spv.Load "Function" %0
   return
 }
@@ -396,7 +396,7 @@ func.func @simple_store_missing_ptr_type(%arg0 : f32) -> () {
 
 func.func @simple_store_missing_operand(%arg0 : f32) -> () {
   %0 = spv.Variable : !spv.ptr<f32, Function>
-  // expected-error @+1 {{custom op 'spv.Store' invalid operand}} : f32
+  // expected-error @+1 {{expected operand}}
   spv.Store  "Function" , %arg0 : f32
   return
 }

diff  --git a/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir b/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir
index ffd02a5739fe5..bdf779dfb20ee 100644
--- a/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir
@@ -23,7 +23,7 @@ func.func @undef() -> () {
 // -----
 
 func.func @undef() -> () {
-  // expected-error @+2{{expected ':'}}
+  // expected-error @+1{{expected ':'}}
   %0 = spv.Undef
   spv.Return
 }

diff  --git a/mlir/test/IR/invalid-locations.mlir b/mlir/test/IR/invalid-locations.mlir
index 59cf0fe25537c..6993e15597f64 100644
--- a/mlir/test/IR/invalid-locations.mlir
+++ b/mlir/test/IR/invalid-locations.mlir
@@ -11,7 +11,7 @@ func.func @location_missing_l_paren() {
 
 func.func @location_missing_r_paren() {
 ^bb:
-  return loc(unknown // expected-error at +1 {{expected ')' in location}}
+  return loc(unknown // expected-error {{expected ')' in location}}
 }
 
 // -----
@@ -60,7 +60,7 @@ func.func @location_callsite_missing_caller() {
 
 func.func @location_callsite_missing_r_paren() {
 ^bb:
-  return loc(callsite( unknown at unknown  // expected-error at +1 {{expected ')' in callsite location}}
+  return loc(callsite( unknown at unknown  // expected-error {{expected ')' in callsite location}}
 }
 
 // -----

diff  --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index 2c39df5095d35..6f15a6e3832f4 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -158,7 +158,7 @@ func.func @block_arg_no_type() {
 
 func.func @block_arg_no_close_paren() {
 ^bb42:
-  cf.br ^bb2( // expected-error at +1 {{expected ':'}}
+  cf.br ^bb2( // expected-error {{expected ':'}}
   return
 }
 
@@ -245,7 +245,7 @@ func.func @malformed_for_to() {
 
 func.func @incomplete_for() {
   affine.for %i = 1 to 10 step 2
-}        // expected-error {{expected '{' to begin a region}}
+}        // expected-error @-1 {{expected '{' to begin a region}}
 
 // -----
 
@@ -1099,7 +1099,7 @@ func.func @multi_result_missing_count() {
 // -----
 
 func.func @multi_result_zero_count() {
-  // expected-error at +1 {{expected named operation to have atleast 1 result}}
+  // expected-error at +1 {{expected named operation to have at least 1 result}}
   %0:0 = "foo" () : () -> (i32, i32)
   return
 }
@@ -1651,6 +1651,27 @@ func.func @foo() {} // expected-error {{expected non-empty function body}}
 
 // -----
 
-// expected-error at +2 {{expected ']'}}
+// expected-error at +1 {{expected ']'}}
 "f"() { b = [@m:
 
+// -----
+
+// This makes sure we emit an error at the end of the correct line, the : is
+// expected at the end of foo, not on the return line.
+func.func @error_at_end_of_line() {
+  // expected-error at +1 {{expected ':' followed by operation type}}
+  %0 = "foo"() 
+  return
+}
+
+// -----
+
+// This makes sure we emit an error at the end of the correct line, the : is
+// expected at the end of foo, not on the return line.
+func.func @error_at_end_of_line() {
+  %0 = "foo"() 
+  // expected-error at -1 {{expected ':' followed by operation type}}
+
+  // This is a comment and so is the thing above.
+  return
+}