[llvm] [LLVM][AsmParser] Add support for C style comments (PR #111554)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 1 09:08:42 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/111554

>From 43e3a01063173e2e7050fadad8c3339a64f8c447 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 3 Oct 2024 15:44:17 -0700
Subject: [PATCH] [LLVM][AsmParser] Add support for C style comments

Add support for C style comments in LLVM assembly.
---
 llvm/docs/LangRef.rst                         |  3 +-
 llvm/include/llvm/AsmParser/LLLexer.h         |  2 +
 llvm/lib/AsmParser/LLLexer.cpp                | 42 +++++++++++++++++--
 llvm/test/Assembler/c-style-comment.ll        | 22 ++++++++++
 .../Assembler/invalid-c-style-comment0.ll     |  6 +++
 .../Assembler/invalid-c-style-comment1.ll     |  8 ++++
 .../Assembler/invalid-c-style-comment2.ll     |  7 ++++
 7 files changed, 85 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Assembler/c-style-comment.ll
 create mode 100644 llvm/test/Assembler/invalid-c-style-comment0.ll
 create mode 100644 llvm/test/Assembler/invalid-c-style-comment1.ll
 create mode 100644 llvm/test/Assembler/invalid-c-style-comment2.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 177924dca4d178..e3c1f8697d53a6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -122,13 +122,14 @@ And the hard way:
 .. code-block:: llvm
 
     %0 = add i32 %X, %X           ; yields i32:%0
-    %1 = add i32 %0, %0           ; yields i32:%1
+    %1 = add i32 %0, %0           /* yields i32:%1 */
     %result = add i32 %1, %1
 
 This last way of multiplying ``%X`` by 8 illustrates several important
 lexical features of LLVM:
 
 #. Comments are delimited with a '``;``' and go until the end of line.
+   Alternatively, comments can start with ``/*`` and terminate with ``*/``.
 #. Unnamed temporaries are created when the result of a computation is
    not assigned to a named value.
 #. By default, unnamed temporaries are numbered sequentially (using a
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index a9f51fb925f5d5..8e0c5638eef37d 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -94,7 +94,9 @@ namespace llvm {
     lltok::Kind LexToken();
 
     int getNextChar();
+    int peekNextChar() const;
     void SkipLineComment();
+    bool SkipCComment();
     lltok::Kind ReadString(lltok::Kind kind);
     bool ReadVarName();
 
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 56abd03d623541..43a33a677b2bd4 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -175,17 +175,25 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
 }
 
 int LLLexer::getNextChar() {
-  char CurChar = *CurPtr++;
+  int NextChar = peekNextChar();
+  // Keeping CurPtr unchanged at EOF, so that another call to `getNextChar`
+  // returns EOF again.
+  if (NextChar != EOF)
+    ++CurPtr;
+  return NextChar;
+}
+
+int LLLexer::peekNextChar() const {
+  char CurChar = *CurPtr;
   switch (CurChar) {
   default: return (unsigned char)CurChar;
   case 0:
     // A nul character in the stream is either the end of the current buffer or
     // a random nul in the file.  Disambiguate that here.
-    if (CurPtr-1 != CurBuf.end())
+    if (CurPtr != CurBuf.end())
       return 0;  // Just whitespace.
 
     // Otherwise, return end of file.
-    --CurPtr;  // Another call to lex will return EOF again.
     return EOF;
   }
 }
@@ -200,7 +208,6 @@ lltok::Kind LLLexer::LexToken() {
       // Handle letters: [a-zA-Z_]
       if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
         return LexIdentifier();
-
       return lltok::Error;
     case EOF: return lltok::Eof;
     case 0:
@@ -251,6 +258,12 @@ lltok::Kind LLLexer::LexToken() {
     case ',': return lltok::comma;
     case '*': return lltok::star;
     case '|': return lltok::bar;
+    case '/':
+      if (peekNextChar() != '*')
+        return lltok::Error;
+      if (SkipCComment())
+        return lltok::Error;
+      continue;
     }
   }
 }
@@ -262,6 +275,27 @@ void LLLexer::SkipLineComment() {
   }
 }
 
+/// SkipCComment - This skips C-style /**/ comments. Returns true if there
+/// was an error.
+bool LLLexer::SkipCComment() {
+  getNextChar(); // skip the star.
+
+  while (true) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      LexError("unterminated comment");
+      return true;
+    case '*':
+      // End of the comment?
+      if (peekNextChar() == '/') {
+        getNextChar(); // Eat the '/'.
+        return false;
+      }
+    }
+  }
+}
+
 /// Lex all tokens that start with an @ character.
 ///   GlobalVar   @\"[^\"]*\"
 ///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
diff --git a/llvm/test/Assembler/c-style-comment.ll b/llvm/test/Assembler/c-style-comment.ll
new file mode 100644
index 00000000000000..9cc17ad90d0bc2
--- /dev/null
+++ b/llvm/test/Assembler/c-style-comment.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+/* Simple C style comment */
+
+; CHECK: @B = external global i32
+ at B = external global i32
+
+/* multiline C ctyle comment at "top-level"
+ * This is the second line
+ * and this is third
+ */
+
+
+; CHECK: @foo
+define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry: /* inline comment */
+  %cmp = fcmp olt <4 x float> %a, /* to be ignored */ %b
+  ret <4 x i1> %cmp /* ignore */
+}
+
+/* End of the assembly file */
+
diff --git a/llvm/test/Assembler/invalid-c-style-comment0.ll b/llvm/test/Assembler/invalid-c-style-comment0.ll
new file mode 100644
index 00000000000000..f042cdd151af92
--- /dev/null
+++ b/llvm/test/Assembler/invalid-c-style-comment0.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck %s -DFILE=%s
+
+ at B = external global i32
+
+; CHECK: [[FILE]]:[[@LINE+1]]:1: error: unterminated comment
+/* End of the assembly file
diff --git a/llvm/test/Assembler/invalid-c-style-comment1.ll b/llvm/test/Assembler/invalid-c-style-comment1.ll
new file mode 100644
index 00000000000000..7f2b966238cf49
--- /dev/null
+++ b/llvm/test/Assembler/invalid-c-style-comment1.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck %s -DFILE=%s
+
+ at B = external global i32
+
+/*   /* Nested comments not supported */
+
+; CHECK: [[FILE]]:[[@LINE+1]]:1: error: redefinition of global '@B'
+ at B = external global i32
diff --git a/llvm/test/Assembler/invalid-c-style-comment2.ll b/llvm/test/Assembler/invalid-c-style-comment2.ll
new file mode 100644
index 00000000000000..2a759d84682e6e
--- /dev/null
+++ b/llvm/test/Assembler/invalid-c-style-comment2.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck %s -DFILE=%s
+
+ at B = external global i32
+
+; CHECK: [[FILE]]:[[@LINE+1]]:2: error: expected top-level entity
+*/
+



More information about the llvm-commits mailing list