[llvm] r196607 - Integrated assembler incorrectly lexes ARM-style comments
David Peixotto
dpeixott at codeaurora.org
Tue Jan 14 14:28:25 PST 2014
On 1/14/2014 12:59 PM, Joerg Sonnenberger wrote:
> On Tue, Jan 14, 2014 at 11:04:31AM -0800, David Peixotto wrote:
>> diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
>> index 53b380f..d88c2a1 100644
>> --- a/include/llvm/MC/MCParser/MCAsmLexer.h
>> +++ b/include/llvm/MC/MCParser/MCAsmLexer.h
>> @@ -118,6 +118,7 @@ class MCAsmLexer {
>> protected: // Can only create subclasses.
>> const char *TokStart;
>> bool SkipSpace;
>> + bool AllowAtInIdentifier; // Cached here to avoid repeated MAI query.
>>
>> MCAsmLexer();
>>
>
> This comment is no longer correct now, so just drop it please.
Done. Attached an updated patch.
-------------- next part --------------
>From 60ca0dde9fc92d727cfbf768c00e734ce833bb66 Mon Sep 17 00:00:00 2001
From: David Peixotto <dpeixott at codeaurora.org>
Date: Fri, 10 Jan 2014 18:07:06 -0800
Subject: [PATCH] Fix parsing of .symver directive on ARM
ARM assembly syntax uses @ for a comment, execpt for the second
parameter of the .symver directive which requires @ as part of the
symbol name. This commit fixes the parsing of this directive by
adding a special case for ARM for this one argumnet.
To make the change we had to move the AllowAtInIdentifier variable
to the MCAsmLexer interface (from AsmLexer) and expose a setter for
the value. The ELFAsmParser then toggles this value when parsing
the second argument to the .symver directive for a target that
uses @ as a comment symbol
---
include/llvm/MC/MCParser/AsmLexer.h | 1 -
include/llvm/MC/MCParser/MCAsmLexer.h | 4 +
lib/MC/MCParser/ELFAsmParser.cpp | 7 ++
test/MC/ARM/arm-elf-symver.s | 152 +++++++++++++++++++++++++++++++++
test/MC/ARM/comment.s | 23 +++++
5 files changed, 186 insertions(+), 1 deletions(-)
create mode 100644 test/MC/ARM/arm-elf-symver.s
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index a97b450..1b3ab57 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
const char *CurPtr;
const MemoryBuffer *CurBuf;
bool isAtStartOfLine;
- bool AllowAtInIdentifier; // Cached here to avoid repeated MAI query.
void operator=(const AsmLexer&) LLVM_DELETED_FUNCTION;
AsmLexer(const AsmLexer&) LLVM_DELETED_FUNCTION;
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 53b380f..8edf3a4 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -118,6 +118,7 @@ class MCAsmLexer {
protected: // Can only create subclasses.
const char *TokStart;
bool SkipSpace;
+ bool AllowAtInIdentifier;
MCAsmLexer();
@@ -170,6 +171,9 @@ public:
/// setSkipSpace - Set whether spaces should be ignored by the lexer
void setSkipSpace(bool val) { SkipSpace = val; }
+
+ bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
+ void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
};
} // End llvm namespace
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 8807975..ca5532d 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -590,7 +590,14 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
if (getLexer().isNot(AsmToken::Comma))
return TokError("expected a comma");
+ // ARM assembly uses @ for a comment...
+ // except when parsing the second parameter of the .symver directive.
+ // Force the next symbol to allow @ in the identifier, which is
+ // required for this directive and then reset it to its initial state.
+ const bool AllowAtInIdentifier = getLexer().getAllowAtInIdentifier();
+ getLexer().setAllowAtInIdentifier(true);
Lex();
+ getLexer().setAllowAtInIdentifier(AllowAtInIdentifier);
StringRef AliasName;
if (getParser().parseIdentifier(AliasName))
diff --git a/test/MC/ARM/arm-elf-symver.s b/test/MC/ARM/arm-elf-symver.s
new file mode 100644
index 0000000..0d141b7
--- /dev/null
+++ b/test/MC/ARM/arm-elf-symver.s
@@ -0,0 +1,152 @@
+@ RUN: llvm-mc -filetype=obj -triple arm-none-linux-gnueabi %s -o - | llvm-readobj -r -t | FileCheck %s
+@ RUN: llvm-mc -filetype=obj -triple thumb-none-linux-gnueabi %s -o - | llvm-readobj -r -t | FileCheck %s
+
+defined1:
+defined2:
+defined3:
+ .symver defined1, bar1 at zed
+ .symver undefined1, bar2 at zed
+
+ .symver defined2, bar3@@zed
+
+ .symver defined3, bar5@@@zed
+ .symver undefined3, bar6@@@zed
+
+ .long defined1
+ .long undefined1
+ .long defined2
+ .long defined3
+ .long undefined3
+
+ .global global1
+ .symver global1, g1@@zed
+global1:
+
+@ CHECK: Relocations [
+@ CHECK-NEXT: Section (2) .rel.text {
+@ CHECK-NEXT: 0x0 R_ARM_ABS32 defined1 0x0
+@ CHECK-NEXT: 0x4 R_ARM_ABS32 bar2 at zed 0x0
+@ CHECK-NEXT: 0x8 R_ARM_ABS32 defined2 0x0
+@ CHECK-NEXT: 0xC R_ARM_ABS32 defined3 0x0
+@ CHECK-NEXT: 0x10 R_ARM_ABS32 bar6 at zed 0x0
+@ CHECK-NEXT: }
+@ CHECK-NEXT: ]
+
+@ CHECK: Symbol {
+@ CHECK: Name: bar1 at zed (28)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: bar3@@zed (46)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: bar5@@zed (56)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: defined1 (1)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: defined2 (10)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: defined3 (19)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: .text (0)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: Section (0x3)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: .data (0)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: Section (0x3)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .data (0x3)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: .bss (0)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Local (0x0)
+@ CHECK-NEXT: Type: Section (0x3)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .bss (0x4)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: g1@@zed (88)
+@ CHECK-NEXT: Value: 0x14
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Global (0x1)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: global1 (80)
+@ CHECK-NEXT: Value: 0x14
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Global (0x1)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: .text (0x1)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: bar2 at zed (37)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Global (0x1)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: (0x0)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: Symbol {
+@ CHECK-NEXT: Name: bar6 at zed (66)
+@ CHECK-NEXT: Value: 0x0
+@ CHECK-NEXT: Size: 0
+@ CHECK-NEXT: Binding: Global (0x1)
+@ CHECK-NEXT: Type: None (0x0)
+@ CHECK-NEXT: Other: 0
+@ CHECK-NEXT: Section: (0x0)
+@ CHECK-NEXT: }
+@ CHECK-NEXT: ]
diff --git a/test/MC/ARM/comment.s b/test/MC/ARM/comment.s
index e95f313..c24bc1a 100644
--- a/test/MC/ARM/comment.s
+++ b/test/MC/ARM/comment.s
@@ -9,6 +9,18 @@ foo:
.long baz at got
add r0, r0 at ignore this extra junk
+@ the symver directive should allow @ in the second symbol name
+defined1:
+defined2:
+defined3:
+bar:
+ .symver defined1, bar1 at zed
+ .symver defined2, bar3@@zed
+ .symver defined3, bar5@@@zed
+
+far:
+ .long baz at got
+
@CHECK-LABEL: foo:
@CHECK: bl boo
@CHECK-NOT: @
@@ -21,4 +33,15 @@ foo:
@CHECK: add r0, r0
@CHECK-NOT: @
+ at CHECK-LABEL: bar:
+ at CHECK: bar1 at zed = defined1
+ at CHECK: bar3@@zed = defined2
+ at CHECK: bar5@@@zed = defined3
+
+@ Make sure we did not mess up the parser state and it still lexes
+@ comments correctly by excluding the @ in normal symbols
+ at CHECK-LABEL: far:
+ at CHECK: .long baz
+ at CHECK-NOT: @
+
@ERROR-NOT: error:
--
1.7.8.3
More information about the llvm-commits
mailing list