[PATCH] D23017: [MC] Fix handling of end-of-line preprocessor comments

Nirav Dave via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 1 08:36:24 PDT 2016


niravd created this revision.
niravd added reviewers: rnk, majnemer.
niravd added a subscriber: llvm-commits.

When parsing assembly where the line comment syntax is not hash, the
lexer cannot distinguish between hash's that start a hash line comment
and one that is part of an assembly statement and must be distinguished
during parsing. Previously, this was incompletely handled by not checking
for EndOfStatement at the end of statements and interpreting hash
prefixed statements as comments.

Change EndOfStatement Parsing to check for Hash comments and reintroduce
Hash statement parsing to catch previously handled cases.

https://reviews.llvm.org/D23017

Files:
  lib/MC/MCParser/AsmParser.cpp
  test/MC/ARM/preserve-comments-arm.s

Index: test/MC/ARM/preserve-comments-arm.s
===================================================================
--- /dev/null
+++ test/MC/ARM/preserve-comments-arm.s
@@ -0,0 +1,10 @@
+	#RUN: llvm-mc -preserve-comments -n -triple arm-eabi < %s > %t
+	#RUN: sed 's/[\t]#/	@/g' %s > %t2
+	#RUN: diff %t %t2
+	.text
+
+	mov	r0, r0
+foo:	#Comment here
+	mov	r0, r0	@ EOL comment
+	.ident	""
+
Index: lib/MC/MCParser/AsmParser.cpp
===================================================================
--- lib/MC/MCParser/AsmParser.cpp
+++ lib/MC/MCParser/AsmParser.cpp
@@ -262,9 +262,23 @@
     return false;
   }
 
+  bool parseEOL(const Twine &ErrMsg) {
+    if (getTok().getKind() == AsmToken::Hash) {
+      StringRef CommentStr = parseStringToEndOfStatement();
+      Lexer.Lex();
+      Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
+    }
+    if (getTok().getKind() != AsmToken::EndOfStatement)
+      return TokError(ErrMsg);
+    Lex();
+    return false;
+  }
+
   /// parseToken - If current token has the specified kind, eat it and
   /// return success.  Otherwise, emit the specified error and return failure.
   bool parseToken(AsmToken::TokenKind T, const Twine &ErrMsg) {
+    if (T == AsmToken::EndOfStatement)
+      return parseEOL(ErrMsg);
     if (getTok().getKind() != T)
       return TokError(ErrMsg);
     Lex();
@@ -1409,6 +1423,16 @@
     Lex();
     return false;
   }
+  if (Lexer.is(AsmToken::Hash)) {
+    // Seeing a hash here means that it was an end-of-line comment in
+    // an asm syntax where hash's are not comment and the previous
+    // statement parser did not check the end of statement. Relex as
+    // EndOfStatement.
+    StringRef CommentStr = parseStringToEndOfStatement();
+    Lexer.Lex();
+    Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
+    return false;
+  }
   // Statements always start with an identifier.
   AsmToken ID = getTok();
   SMLoc IDLoc = ID.getLoc();
@@ -1542,6 +1566,16 @@
     if (!Sym->isUndefined() || Sym->isVariable())
       return Error(IDLoc, "invalid symbol redefinition");
 
+    // End of Labels should be treated as end of line for lexing
+    // purposes but that information is not available to the Lexer who
+    // does not understand Labels. This may cause us to see a Hash
+    // here instead of a preprocessor line comment.
+    if (getTok().is(AsmToken::Hash)) {
+      StringRef CommentStr = parseStringToEndOfStatement();
+      Lexer.Lex();
+      Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
+    }
+
     // Consume any end of statement token, if present, to avoid spurious
     // AddBlankLine calls().
     if (getTok().is(AsmToken::EndOfStatement)) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23017.66327.patch
Type: text/x-patch
Size: 2693 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160801/31385ac7/attachment.bin>


More information about the llvm-commits mailing list