[PATCH] MC: AsmLexer: handle multi-character CommentStrings correctly
Janne Grunau
j at jannau.net
Mon Aug 4 12:57:45 PDT 2014
added explicit tests with single '#' comments for x86 darwin. A couple of existing tests will catch incorrect parsing of single '#' comments too though.
http://reviews.llvm.org/D4597
Files:
include/llvm/MC/MCParser/AsmLexer.h
lib/MC/MCParser/AsmLexer.cpp
test/MC/AArch64/single-slash.s
test/MC/AsmParser/comments-x86-darwin.s
Index: include/llvm/MC/MCParser/AsmLexer.h
===================================================================
--- include/llvm/MC/MCParser/AsmLexer.h
+++ include/llvm/MC/MCParser/AsmLexer.h
@@ -49,7 +49,7 @@
const AsmToken peekTok(bool ShouldSkipSpace = true) override;
- bool isAtStartOfComment(char Char);
+ bool isAtStartOfComment(const char *Ptr);
bool isAtStatementSeparator(const char *Ptr);
const MCAsmInfo &getMAI() const { return MAI; }
Index: lib/MC/MCParser/AsmLexer.cpp
===================================================================
--- lib/MC/MCParser/AsmLexer.cpp
+++ lib/MC/MCParser/AsmLexer.cpp
@@ -417,7 +417,7 @@
StringRef AsmLexer::LexUntilEndOfStatement() {
TokStart = CurPtr;
- while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
+ while (!isAtStartOfComment(CurPtr) && // Start of line comment.
!isAtStatementSeparator(CurPtr) && // End of statement marker.
*CurPtr != '\n' && *CurPtr != '\r' &&
(*CurPtr != 0 || CurPtr != CurBuf.end())) {
@@ -458,9 +458,16 @@
return Token;
}
-bool AsmLexer::isAtStartOfComment(char Char) {
- // FIXME: This won't work for multi-character comment indicators like "//".
- return Char == *MAI.getCommentString();
+bool AsmLexer::isAtStartOfComment(const char *Ptr) {
+ const char *CommentString = MAI.getCommentString();
+
+ if (CommentString[1] == '\0')
+ return CommentString[0] == Ptr[0];
+
+ if (CommentString[1] == '#')
+ // FIXME: special case for the bogus comment string in X86MCAsmInfoDarwin
+ return CommentString[0] == Ptr[0];
+ return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
}
bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
@@ -473,7 +480,7 @@
// This always consumes at least one character.
int CurChar = getNextChar();
- if (isAtStartOfComment(CurChar)) {
+ if (isAtStartOfComment(TokStart)) {
// If this comment starts with a '#', then return the Hash token and let
// the assembler parser see if it can be parsed as a cpp line filename
// comment. We do this only if we are at the start of a line.
Index: test/MC/AArch64/single-slash.s
===================================================================
--- /dev/null
+++ test/MC/AArch64/single-slash.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s | FileCheck %s
+
+// Test that a single slash is not mistaken as the start of comment.
+
+//CHECK: movz x0, #0x10
+ movz x0, #(32 / 2)
Index: test/MC/AsmParser/comments-x86-darwin.s
===================================================================
--- /dev/null
+++ test/MC/AsmParser/comments-x86-darwin.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s 2>&1 | FileCheck %s
+# ensure that single '#' comments are worink as expected on x86 darwin
+.align 3 # test single hash after align
+// CHECK: .align 3
+foo: # single hash should be ignored as comment
+// CHECK-LABEL: foo:
+ movl %esp, %ebp # same after an instruction
+// CHECK: movl %esp, %ebp
+# movl %esp, %ebp ## start of the line
+// CHECK-NOT: movl %esp, %ebp
+ # movl %esp, %ebp ## not quite start of the line
+// CHECK-NOT: movl %esp, %ebp
+bar:
+// CHECK-LABEL: bar:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4597.12175.patch
Type: text/x-patch
Size: 3244 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140804/84eedad2/attachment.bin>
More information about the llvm-commits
mailing list