[PATCH] MC: AsmLexer: handle multi-character CommentStrings correctly

Janne Grunau j at jannau.net
Sun Jul 20 01:35:54 PDT 2014


adds isAtStartOfComment(const char *Ptr) and uses it to handle multi-character CommentStrings.

http://reviews.llvm.org/D4597

Files:
  include/llvm/MC/MCParser/AsmLexer.h
  lib/MC/MCParser/AsmLexer.cpp
  test/MC/AArch64/single-slash.s

Index: include/llvm/MC/MCParser/AsmLexer.h
===================================================================
--- include/llvm/MC/MCParser/AsmLexer.h
+++ include/llvm/MC/MCParser/AsmLexer.h
@@ -49,7 +49,11 @@
 
   const AsmToken peekTok(bool ShouldSkipSpace = true) override;
 
+  /// isAtStartOfComment - Check if \p Char starts a line comment
+  /// \return true if \p Char matches the CommentString. Alsways false for
+  ///         targets with a multi-character CommentString.
   bool isAtStartOfComment(char Char);
+  bool isAtStartOfComment(const char *Ptr);
   bool isAtStatementSeparator(const char *Ptr);
 
   const MCAsmInfo &getMAI() const { return MAI; }
Index: lib/MC/MCParser/AsmLexer.cpp
===================================================================
--- lib/MC/MCParser/AsmLexer.cpp
+++ lib/MC/MCParser/AsmLexer.cpp
@@ -417,7 +417,7 @@
 StringRef AsmLexer::LexUntilEndOfStatement() {
   TokStart = CurPtr;
 
-  while (!isAtStartOfComment(*CurPtr) &&    // Start of line comment.
+  while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
          !isAtStatementSeparator(CurPtr) && // End of statement marker.
          *CurPtr != '\n' && *CurPtr != '\r' &&
          (*CurPtr != 0 || CurPtr != CurBuf.end())) {
@@ -459,10 +459,21 @@
 }
 
 bool AsmLexer::isAtStartOfComment(char Char) {
-  // FIXME: This won't work for multi-character comment indicators like "//".
+  if (strlen(MAI.getCommentString()) > 1)
+    return false;
   return Char == *MAI.getCommentString();
 }
 
+bool AsmLexer::isAtStartOfComment(const char *Ptr) {
+  const char *CommentString = MAI.getCommentString();
+  if (strlen(CommentString) > 1 &&
+      // FIXME: special case for the bogus comment string in X86MCAsmInfoDarwin
+      CommentString[0] == '#' && CommentString[1] == '#') {
+    return CommentString[0] == Ptr[0];
+  }
+  return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
+}
+
 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
   return strncmp(Ptr, MAI.getSeparatorString(),
                  strlen(MAI.getSeparatorString())) == 0;
@@ -473,7 +484,7 @@
   // This always consumes at least one character.
   int CurChar = getNextChar();
 
-  if (isAtStartOfComment(CurChar)) {
+  if (isAtStartOfComment(TokStart)) {
     // If this comment starts with a '#', then return the Hash token and let
     // the assembler parser see if it can be parsed as a cpp line filename
     // comment. We do this only if we are at the start of a line.
Index: test/MC/AArch64/single-slash.s
===================================================================
--- /dev/null
+++ test/MC/AArch64/single-slash.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s | FileCheck %s
+
+// Test that a single slash is not mistaken as the start of comment.
+
+//CHECK: movz    x0, #0x10
+    movz x0, #(32 / 2)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4597.11692.patch
Type: text/x-patch
Size: 2849 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140720/26f06d25/attachment.bin>


More information about the llvm-commits mailing list