[Mlir-commits] [mlir] [mlir][AsmParser] Fix comment detection in string literals (PR #174125)
Nick Kreeger
llvmlistbot at llvm.org
Thu Jan 15 05:58:12 PST 2026
https://github.com/nkreeger updated https://github.com/llvm/llvm-project/pull/174125
>From 4c6ff4dbb8a5e5e69b4ee21f9a8ee09584837e2f Mon Sep 17 00:00:00 2001
From: Nick Kreeger <nick.kreeger at gmail.com>
Date: Wed, 31 Dec 2025 21:50:09 -0600
Subject: [PATCH 1/2] [mlir][AsmParser] Fix comment detection in string
literals
The error positioning logic in emitWrongTokenError() would incorrectly identify '//' inside string literals as comment starts when backing up to position diagnostics. For example, a URL like "http://example.com" would cause the error to point at the '://' instead of the end of the line.
Add findCommentStart() helper that properly skips '//' occurrences inside string literals by tracking quote state and handling escape sequences.
---
mlir/lib/AsmParser/Parser.cpp | 46 +++++++++++++++++--
.../IR/parser-string-literal-comment.mlir | 10 ++++
mlir/test/IR/parser-string-literal-url.mlir | 14 ++++++
3 files changed, 65 insertions(+), 5 deletions(-)
create mode 100644 mlir/test/IR/parser-string-literal-comment.mlir
create mode 100644 mlir/test/IR/parser-string-literal-url.mlir
diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 74936e32bd9d9..88d2d01368d0d 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -198,6 +198,43 @@ InFlightDiagnostic Parser::emitError(const Twine &message) {
return emitError(SMLoc::getFromPointer(loc.getPointer() - 1), message);
}
+/// Find the start of a line comment (`//`) in the given string, ignoring
+/// occurrences inside string literals. Returns StringRef::npos if no comment
+/// is found.
+static size_t findCommentStart(StringRef line) {
+ // Fast path: no comment in line at all.
+ size_t slashPos = line.find("//");
+ if (slashPos == StringRef::npos)
+ return StringRef::npos;
+
+ // Fast path: no quote before the '//', so it's a real comment.
+ size_t quotePos = line.find('"');
+ if (quotePos == StringRef::npos || quotePos > slashPos)
+ return slashPos;
+
+ // A quote appears before '//'. Parse carefully to handle string literals.
+ bool inString = false;
+ for (size_t i = 0, e = line.size(); i < e; ++i) {
+ char c = line[i];
+ if (inString) {
+ // Skip escaped characters inside strings.
+ if (c == '\\' && i + 1 < e) {
+ ++i;
+ continue;
+ }
+ if (c == '"')
+ inString = false;
+ } else {
+ if (c == '"') {
+ inString = true;
+ } else if (c == '/' && i + 1 < e && line[i + 1] == '/') {
+ return i;
+ }
+ }
+ }
+ return StringRef::npos;
+}
+
InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);
@@ -247,16 +284,15 @@ InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) {
// Drop the \n so we emit the diagnostic at the end of the line.
startOfBuffer = startOfBuffer.drop_back();
- // Check to see if the preceding line has a comment on it. We assume that a
- // `//` is the start of a comment, which is mostly correct.
- // TODO: This will do the wrong thing for // in a string literal.
+ // Check to see if the preceding line has a comment on it.
auto prevLine = startOfBuffer;
size_t newLineIndex = prevLine.find_last_of("\n\r");
if (newLineIndex != StringRef::npos)
prevLine = prevLine.drop_front(newLineIndex);
- // If we find a // in the current line, then emit the diagnostic before it.
- size_t commentStart = prevLine.find("//");
+ // If we find a // in the current line (outside of string literals), then
+ // emit the diagnostic before it.
+ size_t commentStart = findCommentStart(prevLine);
if (commentStart != StringRef::npos)
startOfBuffer = startOfBuffer.drop_back(prevLine.size() - commentStart);
}
diff --git a/mlir/test/IR/parser-string-literal-comment.mlir b/mlir/test/IR/parser-string-literal-comment.mlir
new file mode 100644
index 0000000000000..d47eb3bd59f14
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-comment.mlir
@@ -0,0 +1,10 @@
+// RUN: not mlir-opt -allow-unregistered-dialect %s 2>&1 | FileCheck %s
+
+// Test that '//' in a string literal doesn't confuse comment detection when
+// backing up to position an error. The error should point to the end of the
+// line (column 54), not at the '//' inside the string (column 28).
+
+// CHECK: {{.*}}:[[# @LINE + 2]]:54: error: expected operation name in quotes
+func.func @string_with_slashes() {
+ "test.op"() {url = "http://example.com"} : () -> ()
+
diff --git a/mlir/test/IR/parser-string-literal-url.mlir b/mlir/test/IR/parser-string-literal-url.mlir
new file mode 100644
index 0000000000000..c78d92c6355e7
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-url.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
+
+// Test that URLs with '//' in string literals parse correctly.
+
+// CHECK-LABEL: func.func @url_in_string
+// CHECK: "test.op"() {url = "http://example.com/path"} : () -> ()
+// CHECK: "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+// CHECK: "test.op"() {proto = "file:///local/path"} : () -> ()
+func.func @url_in_string() {
+ "test.op"() {url = "http://example.com/path"} : () -> ()
+ "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+ "test.op"() {proto = "file:///local/path"} : () -> ()
+ return
+}
>From 4fabc73be7461a79984bfcd7d70a88154226782e Mon Sep 17 00:00:00 2001
From: Nick Kreeger <nick.kreeger at microsoft.com>
Date: Thu, 15 Jan 2026 13:57:54 +0000
Subject: [PATCH 2/2] Review cleanup.
---
mlir/lib/AsmParser/Parser.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 88d2d01368d0d..3f2cb8df2e9c5 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -207,7 +207,9 @@ static size_t findCommentStart(StringRef line) {
if (slashPos == StringRef::npos)
return StringRef::npos;
- // Fast path: no quote before the '//', so it's a real comment.
+ // Fast path: comment at start of line, or no quote before the '//'.
+ if (slashPos == 0)
+ return 0;
size_t quotePos = line.find('"');
if (quotePos == StringRef::npos || quotePos > slashPos)
return slashPos;
@@ -218,7 +220,7 @@ static size_t findCommentStart(StringRef line) {
char c = line[i];
if (inString) {
// Skip escaped characters inside strings.
- if (c == '\\' && i + 1 < e) {
+ if (c == '\\') {
++i;
continue;
}
More information about the Mlir-commits
mailing list