[Mlir-commits] [mlir] [mlir][AsmParser] Fix comment detection in string literals (PR #174125)

Nick Kreeger llvmlistbot at llvm.org
Thu Jan 15 07:51:31 PST 2026


https://github.com/nkreeger updated https://github.com/llvm/llvm-project/pull/174125

>From 4c6ff4dbb8a5e5e69b4ee21f9a8ee09584837e2f Mon Sep 17 00:00:00 2001
From: Nick Kreeger <nick.kreeger at gmail.com>
Date: Wed, 31 Dec 2025 21:50:09 -0600
Subject: [PATCH 1/3] [mlir][AsmParser] Fix comment detection in string
 literals

The error positioning logic in emitWrongTokenError() would incorrectly identify '//' inside string literals as comment starts when backing up to position diagnostics. For example, a URL like "http://example.com" would cause the error to point at the '://' instead of the end of the line.

Add findCommentStart() helper that properly skips '//' occurrences inside string literals by tracking quote state and handling escape sequences.
---
 mlir/lib/AsmParser/Parser.cpp                 | 46 +++++++++++++++++--
 .../IR/parser-string-literal-comment.mlir     | 10 ++++
 mlir/test/IR/parser-string-literal-url.mlir   | 14 ++++++
 3 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 mlir/test/IR/parser-string-literal-comment.mlir
 create mode 100644 mlir/test/IR/parser-string-literal-url.mlir

diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 74936e32bd9d9..88d2d01368d0d 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -198,6 +198,43 @@ InFlightDiagnostic Parser::emitError(const Twine &message) {
   return emitError(SMLoc::getFromPointer(loc.getPointer() - 1), message);
 }
 
+/// Find the start of a line comment (`//`) in the given string, ignoring
+/// occurrences inside string literals. Returns StringRef::npos if no comment
+/// is found.
+static size_t findCommentStart(StringRef line) {
+  // Fast path: no comment in line at all.
+  size_t slashPos = line.find("//");
+  if (slashPos == StringRef::npos)
+    return StringRef::npos;
+
+  // Fast path: no quote before the '//', so it's a real comment.
+  size_t quotePos = line.find('"');
+  if (quotePos == StringRef::npos || quotePos > slashPos)
+    return slashPos;
+
+  // A quote appears before '//'. Parse carefully to handle string literals.
+  bool inString = false;
+  for (size_t i = 0, e = line.size(); i < e; ++i) {
+    char c = line[i];
+    if (inString) {
+      // Skip escaped characters inside strings.
+      if (c == '\\' && i + 1 < e) {
+        ++i;
+        continue;
+      }
+      if (c == '"')
+        inString = false;
+    } else {
+      if (c == '"') {
+        inString = true;
+      } else if (c == '/' && i + 1 < e && line[i + 1] == '/') {
+        return i;
+      }
+    }
+  }
+  return StringRef::npos;
+}
+
 InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
   auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);
 
@@ -247,16 +284,15 @@ InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) {
     // Drop the \n so we emit the diagnostic at the end of the line.
     startOfBuffer = startOfBuffer.drop_back();
 
-    // Check to see if the preceding line has a comment on it.  We assume that a
-    // `//` is the start of a comment, which is mostly correct.
-    // TODO: This will do the wrong thing for // in a string literal.
+    // Check to see if the preceding line has a comment on it.
     auto prevLine = startOfBuffer;
     size_t newLineIndex = prevLine.find_last_of("\n\r");
     if (newLineIndex != StringRef::npos)
       prevLine = prevLine.drop_front(newLineIndex);
 
-    // If we find a // in the current line, then emit the diagnostic before it.
-    size_t commentStart = prevLine.find("//");
+    // If we find a // in the current line (outside of string literals), then
+    // emit the diagnostic before it.
+    size_t commentStart = findCommentStart(prevLine);
     if (commentStart != StringRef::npos)
       startOfBuffer = startOfBuffer.drop_back(prevLine.size() - commentStart);
   }
diff --git a/mlir/test/IR/parser-string-literal-comment.mlir b/mlir/test/IR/parser-string-literal-comment.mlir
new file mode 100644
index 0000000000000..d47eb3bd59f14
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-comment.mlir
@@ -0,0 +1,10 @@
+// RUN: not mlir-opt -allow-unregistered-dialect %s 2>&1 | FileCheck %s
+
+// Test that '//' in a string literal doesn't confuse comment detection when
+// backing up to position an error. The error should point to the end of the
+// line (column 54), not at the '//' inside the string (column 28).
+
+// CHECK: {{.*}}:[[# @LINE + 2]]:54: error: expected operation name in quotes
+func.func @string_with_slashes() {
+  "test.op"() {url = "http://example.com"} : () -> ()
+
diff --git a/mlir/test/IR/parser-string-literal-url.mlir b/mlir/test/IR/parser-string-literal-url.mlir
new file mode 100644
index 0000000000000..c78d92c6355e7
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-url.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
+
+// Test that URLs with '//' in string literals parse correctly.
+
+// CHECK-LABEL: func.func @url_in_string
+// CHECK: "test.op"() {url = "http://example.com/path"} : () -> ()
+// CHECK: "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+// CHECK: "test.op"() {proto = "file:///local/path"} : () -> ()
+func.func @url_in_string() {
+  "test.op"() {url = "http://example.com/path"} : () -> ()
+  "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+  "test.op"() {proto = "file:///local/path"} : () -> ()
+  return
+}

>From 4fabc73be7461a79984bfcd7d70a88154226782e Mon Sep 17 00:00:00 2001
From: Nick Kreeger <nick.kreeger at microsoft.com>
Date: Thu, 15 Jan 2026 13:57:54 +0000
Subject: [PATCH 2/3] Review cleanup.

---
 mlir/lib/AsmParser/Parser.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 88d2d01368d0d..3f2cb8df2e9c5 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -207,7 +207,9 @@ static size_t findCommentStart(StringRef line) {
   if (slashPos == StringRef::npos)
     return StringRef::npos;
 
-  // Fast path: no quote before the '//', so it's a real comment.
+  // Fast path: comment at start of line, or no quote before the '//'.
+  if (slashPos == 0)
+    return 0;
   size_t quotePos = line.find('"');
   if (quotePos == StringRef::npos || quotePos > slashPos)
     return slashPos;
@@ -218,7 +220,7 @@ static size_t findCommentStart(StringRef line) {
     char c = line[i];
     if (inString) {
       // Skip escaped characters inside strings.
-      if (c == '\\' && i + 1 < e) {
+      if (c == '\\') {
         ++i;
         continue;
       }

>From aca4a5032231d6e5983992b15bc64845af0fa146 Mon Sep 17 00:00:00 2001
From: Nick Kreeger <nick.kreeger at microsoft.com>
Date: Thu, 15 Jan 2026 15:51:19 +0000
Subject: [PATCH 3/3] use --verify-diagnostics

---
 mlir/test/IR/parser-string-literal-comment.mlir | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/test/IR/parser-string-literal-comment.mlir b/mlir/test/IR/parser-string-literal-comment.mlir
index d47eb3bd59f14..9d3a5983bc3ea 100644
--- a/mlir/test/IR/parser-string-literal-comment.mlir
+++ b/mlir/test/IR/parser-string-literal-comment.mlir
@@ -1,10 +1,10 @@
-// RUN: not mlir-opt -allow-unregistered-dialect %s 2>&1 | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect --verify-diagnostics %s
 
 // Test that '//' in a string literal doesn't confuse comment detection when
 // backing up to position an error. The error should point to the end of the
-// line (column 54), not at the '//' inside the string (column 28).
+// line, not at the '//' inside the string.
 
-// CHECK: {{.*}}:[[# @LINE + 2]]:54: error: expected operation name in quotes
 func.func @string_with_slashes() {
+  // expected-error at +1 {{expected operation name in quotes}}
   "test.op"() {url = "http://example.com"} : () -> ()
 



More information about the Mlir-commits mailing list