[Mlir-commits] [mlir] 6ef80e1 - [mlir][AsmParser] Fix comment detection in string literals (#174125)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Jan 15 14:26:16 PST 2026
Author: Nick Kreeger
Date: 2026-01-15T17:26:12-05:00
New Revision: 6ef80e147c63ad84cd616cb4d664b1caf0dbc4bf
URL: https://github.com/llvm/llvm-project/commit/6ef80e147c63ad84cd616cb4d664b1caf0dbc4bf
DIFF: https://github.com/llvm/llvm-project/commit/6ef80e147c63ad84cd616cb4d664b1caf0dbc4bf.diff
LOG: [mlir][AsmParser] Fix comment detection in string literals (#174125)
The error positioning logic in emitWrongTokenError() would incorrectly
identify '//' inside string literals as comment starts when backing up
to position diagnostics. For example, a URL like "http://example.com"
would cause the error to point at the '://' instead of the end of the
line.
Add findCommentStart() helper that properly skips '//' occurrences
inside string literals by tracking quote state and handling escape
sequences.
Added:
mlir/test/IR/parser-string-literal-comment.mlir
mlir/test/IR/parser-string-literal-url.mlir
Modified:
mlir/lib/AsmParser/Parser.cpp
Removed:
################################################################################
diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 0efe30727a2ea..22928df03fbc7 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -198,6 +198,45 @@ InFlightDiagnostic Parser::emitError(const Twine &message) {
return emitError(SMLoc::getFromPointer(loc.getPointer() - 1), message);
}
+/// Find the start of a line comment (`//`) in the given string, ignoring
+/// occurrences inside string literals. Returns StringRef::npos if no comment
+/// is found.
+static size_t findCommentStart(StringRef line) {
+ // Fast path: no comment in line at all.
+ size_t slashPos = line.find("//");
+ if (slashPos == StringRef::npos)
+ return StringRef::npos;
+
+ // Fast path: comment at start of line, or no quote before the '//'.
+ if (slashPos == 0)
+ return 0;
+ size_t quotePos = line.find('"');
+ if (quotePos == StringRef::npos || quotePos > slashPos)
+ return slashPos;
+
+ // A quote appears before '//'. Parse carefully to handle string literals.
+ bool inString = false;
+ for (size_t i = 0, e = line.size(); i < e; ++i) {
+ char c = line[i];
+ if (inString) {
+ // Skip escaped characters inside strings.
+ if (c == '\\') {
+ ++i;
+ continue;
+ }
+ if (c == '"')
+ inString = false;
+ } else {
+ if (c == '"') {
+ inString = true;
+ } else if (c == '/' && i + 1 < e && line[i + 1] == '/') {
+ return i;
+ }
+ }
+ }
+ return StringRef::npos;
+}
+
InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);
@@ -247,16 +286,15 @@ InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) {
// Drop the \n so we emit the diagnostic at the end of the line.
startOfBuffer = startOfBuffer.drop_back();
- // Check to see if the preceding line has a comment on it. We assume that a
- // `//` is the start of a comment, which is mostly correct.
- // TODO: This will do the wrong thing for // in a string literal.
+ // Check to see if the preceding line has a comment on it.
auto prevLine = startOfBuffer;
size_t newLineIndex = prevLine.find_last_of("\n\r");
if (newLineIndex != StringRef::npos)
prevLine = prevLine.drop_front(newLineIndex);
- // If we find a // in the current line, then emit the diagnostic before it.
- size_t commentStart = prevLine.find("//");
+ // If we find a // in the current line (outside of string literals), then
+ // emit the diagnostic before it.
+ size_t commentStart = findCommentStart(prevLine);
if (commentStart != StringRef::npos)
startOfBuffer = startOfBuffer.drop_back(prevLine.size() - commentStart);
}
diff --git a/mlir/test/IR/parser-string-literal-comment.mlir b/mlir/test/IR/parser-string-literal-comment.mlir
new file mode 100644
index 0000000000000..9d3a5983bc3ea
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-comment.mlir
@@ -0,0 +1,10 @@
+// RUN: mlir-opt -allow-unregistered-dialect --verify-diagnostics %s
+
+// Test that '//' in a string literal doesn't confuse comment detection when
+// backing up to position an error. The error should point to the end of the
+// line, not at the '//' inside the string.
+
+func.func @string_with_slashes() {
+ // expected-error at +1 {{expected operation name in quotes}}
+ "test.op"() {url = "http://example.com"} : () -> ()
+
diff --git a/mlir/test/IR/parser-string-literal-url.mlir b/mlir/test/IR/parser-string-literal-url.mlir
new file mode 100644
index 0000000000000..c78d92c6355e7
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-url.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
+
+// Test that URLs with '//' in string literals parse correctly.
+
+// CHECK-LABEL: func.func @url_in_string
+// CHECK: "test.op"() {url = "http://example.com/path"} : () -> ()
+// CHECK: "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+// CHECK: "test.op"() {proto = "file:///local/path"} : () -> ()
+func.func @url_in_string() {
+ "test.op"() {url = "http://example.com/path"} : () -> ()
+ "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+ "test.op"() {proto = "file:///local/path"} : () -> ()
+ return
+}
More information about the Mlir-commits
mailing list