[Mlir-commits] [mlir] 6ef80e1 - [mlir][AsmParser] Fix comment detection in string literals (#174125)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Jan 15 14:26:16 PST 2026


Author: Nick Kreeger
Date: 2026-01-15T17:26:12-05:00
New Revision: 6ef80e147c63ad84cd616cb4d664b1caf0dbc4bf

URL: https://github.com/llvm/llvm-project/commit/6ef80e147c63ad84cd616cb4d664b1caf0dbc4bf
DIFF: https://github.com/llvm/llvm-project/commit/6ef80e147c63ad84cd616cb4d664b1caf0dbc4bf.diff

LOG: [mlir][AsmParser] Fix comment detection in string literals (#174125)

The error positioning logic in emitWrongTokenError() would incorrectly
identify '//' inside string literals as comment starts when backing up
to position diagnostics. For example, a URL like "http://example.com"
would cause the error to point at the '://' instead of the end of the
line.

Add findCommentStart() helper that properly skips '//' occurrences
inside string literals by tracking quote state and handling escape
sequences.

Added: 
    mlir/test/IR/parser-string-literal-comment.mlir
    mlir/test/IR/parser-string-literal-url.mlir

Modified: 
    mlir/lib/AsmParser/Parser.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 0efe30727a2ea..22928df03fbc7 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -198,6 +198,45 @@ InFlightDiagnostic Parser::emitError(const Twine &message) {
   return emitError(SMLoc::getFromPointer(loc.getPointer() - 1), message);
 }
 
+/// Find the start of a line comment (`//`) in the given string, ignoring
+/// occurrences inside string literals. Returns StringRef::npos if no comment
+/// is found.
+static size_t findCommentStart(StringRef line) {
+  // Fast path: no comment in line at all.
+  size_t slashPos = line.find("//");
+  if (slashPos == StringRef::npos)
+    return StringRef::npos;
+
+  // Fast path: comment at start of line, or no quote before the '//'.
+  if (slashPos == 0)
+    return 0;
+  size_t quotePos = line.find('"');
+  if (quotePos == StringRef::npos || quotePos > slashPos)
+    return slashPos;
+
+  // A quote appears before '//'. Parse carefully to handle string literals.
+  bool inString = false;
+  for (size_t i = 0, e = line.size(); i < e; ++i) {
+    char c = line[i];
+    if (inString) {
+      // Skip escaped characters inside strings.
+      if (c == '\\') {
+        ++i;
+        continue;
+      }
+      if (c == '"')
+        inString = false;
+    } else {
+      if (c == '"') {
+        inString = true;
+      } else if (c == '/' && i + 1 < e && line[i + 1] == '/') {
+        return i;
+      }
+    }
+  }
+  return StringRef::npos;
+}
+
 InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
   auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);
 
@@ -247,16 +286,15 @@ InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) {
     // Drop the \n so we emit the diagnostic at the end of the line.
     startOfBuffer = startOfBuffer.drop_back();
 
-    // Check to see if the preceding line has a comment on it.  We assume that a
-    // `//` is the start of a comment, which is mostly correct.
-    // TODO: This will do the wrong thing for // in a string literal.
+    // Check to see if the preceding line has a comment on it.
     auto prevLine = startOfBuffer;
     size_t newLineIndex = prevLine.find_last_of("\n\r");
     if (newLineIndex != StringRef::npos)
       prevLine = prevLine.drop_front(newLineIndex);
 
-    // If we find a // in the current line, then emit the diagnostic before it.
-    size_t commentStart = prevLine.find("//");
+    // If we find a // in the current line (outside of string literals), then
+    // emit the diagnostic before it.
+    size_t commentStart = findCommentStart(prevLine);
     if (commentStart != StringRef::npos)
       startOfBuffer = startOfBuffer.drop_back(prevLine.size() - commentStart);
   }

diff  --git a/mlir/test/IR/parser-string-literal-comment.mlir b/mlir/test/IR/parser-string-literal-comment.mlir
new file mode 100644
index 0000000000000..9d3a5983bc3ea
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-comment.mlir
@@ -0,0 +1,10 @@
+// RUN: mlir-opt -allow-unregistered-dialect --verify-diagnostics %s
+
+// Test that '//' in a string literal doesn't confuse comment detection when
+// backing up to position an error. The error should point to the end of the
+// line, not at the '//' inside the string.
+
+func.func @string_with_slashes() {
+  // expected-error at +1 {{expected operation name in quotes}}
+  "test.op"() {url = "http://example.com"} : () -> ()
+

diff  --git a/mlir/test/IR/parser-string-literal-url.mlir b/mlir/test/IR/parser-string-literal-url.mlir
new file mode 100644
index 0000000000000..c78d92c6355e7
--- /dev/null
+++ b/mlir/test/IR/parser-string-literal-url.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s
+
+// Test that URLs with '//' in string literals parse correctly.
+
+// CHECK-LABEL: func.func @url_in_string
+// CHECK: "test.op"() {url = "http://example.com/path"} : () -> ()
+// CHECK: "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+// CHECK: "test.op"() {proto = "file:///local/path"} : () -> ()
+func.func @url_in_string() {
+  "test.op"() {url = "http://example.com/path"} : () -> ()
+  "test.op"() {url = "https://example.com//double//slashes"} : () -> ()
+  "test.op"() {proto = "file:///local/path"} : () -> ()
+  return
+}


        


More information about the Mlir-commits mailing list