[llvm] r196607 - Integrated assembler incorrectly lexes ARM-style comments

David Peixotto dpeixott at codeaurora.org
Tue Jan 14 14:28:25 PST 2014


On 1/14/2014 12:59 PM, Joerg Sonnenberger wrote:
> On Tue, Jan 14, 2014 at 11:04:31AM -0800, David Peixotto wrote:
>> diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
>> index 53b380f..d88c2a1 100644
>> --- a/include/llvm/MC/MCParser/MCAsmLexer.h
>> +++ b/include/llvm/MC/MCParser/MCAsmLexer.h
>> @@ -118,6 +118,7 @@ class MCAsmLexer {
>>   protected: // Can only create subclasses.
>>     const char *TokStart;
>>     bool SkipSpace;
>> +  bool AllowAtInIdentifier; // Cached here to avoid repeated MAI query.
>>
>>     MCAsmLexer();
>>
>
> This comment is no longer correct now, so just drop it please.

Done. Attached an updated patch.
-------------- next part --------------
>From 60ca0dde9fc92d727cfbf768c00e734ce833bb66 Mon Sep 17 00:00:00 2001
From: David Peixotto <dpeixott at codeaurora.org>
Date: Fri, 10 Jan 2014 18:07:06 -0800
Subject: [PATCH] Fix parsing of .symver directive on ARM

ARM assembly syntax uses @ for a comment, execpt for the second
parameter of the .symver directive which requires @ as part of the
symbol name. This commit fixes the parsing of this directive by
adding a special case for ARM for this one argumnet.

To make the change we had to move the AllowAtInIdentifier variable
to the MCAsmLexer interface (from AsmLexer) and expose a setter for
the value.  The ELFAsmParser then toggles this value when parsing
the second argument to the .symver directive for a target that
uses @ as a comment symbol
---
 include/llvm/MC/MCParser/AsmLexer.h   |    1 -
 include/llvm/MC/MCParser/MCAsmLexer.h |    4 +
 lib/MC/MCParser/ELFAsmParser.cpp      |    7 ++
 test/MC/ARM/arm-elf-symver.s          |  152 +++++++++++++++++++++++++++++++++
 test/MC/ARM/comment.s                 |   23 +++++
 5 files changed, 186 insertions(+), 1 deletions(-)
 create mode 100644 test/MC/ARM/arm-elf-symver.s

diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index a97b450..1b3ab57 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
   const char *CurPtr;
   const MemoryBuffer *CurBuf;
   bool isAtStartOfLine;
-  bool AllowAtInIdentifier; // Cached here to avoid repeated MAI query.
 
   void operator=(const AsmLexer&) LLVM_DELETED_FUNCTION;
   AsmLexer(const AsmLexer&) LLVM_DELETED_FUNCTION;
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 53b380f..8edf3a4 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -118,6 +118,7 @@ class MCAsmLexer {
 protected: // Can only create subclasses.
   const char *TokStart;
   bool SkipSpace;
+  bool AllowAtInIdentifier;
 
   MCAsmLexer();
 
@@ -170,6 +171,9 @@ public:
 
   /// setSkipSpace - Set whether spaces should be ignored by the lexer
   void setSkipSpace(bool val) { SkipSpace = val; }
+
+  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
+  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
 };
 
 } // End llvm namespace
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 8807975..ca5532d 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -590,7 +590,14 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("expected a comma");
 
+  // ARM assembly uses @ for a comment...
+  // except when parsing the second parameter of the .symver directive.
+  // Force the next symbol to allow @ in the identifier, which is
+  // required for this directive and then reset it to its initial state.
+  const bool AllowAtInIdentifier = getLexer().getAllowAtInIdentifier();
+  getLexer().setAllowAtInIdentifier(true);
   Lex();
+  getLexer().setAllowAtInIdentifier(AllowAtInIdentifier);
 
   StringRef AliasName;
   if (getParser().parseIdentifier(AliasName))
diff --git a/test/MC/ARM/arm-elf-symver.s b/test/MC/ARM/arm-elf-symver.s
new file mode 100644
index 0000000..0d141b7
--- /dev/null
+++ b/test/MC/ARM/arm-elf-symver.s
@@ -0,0 +1,152 @@
+@ RUN: llvm-mc -filetype=obj -triple arm-none-linux-gnueabi %s -o - | llvm-readobj -r -t | FileCheck %s
+@ RUN: llvm-mc -filetype=obj -triple thumb-none-linux-gnueabi %s -o - | llvm-readobj -r -t | FileCheck %s
+
+defined1:
+defined2:
+defined3:
+        .symver defined1, bar1 at zed
+        .symver undefined1, bar2 at zed
+
+        .symver defined2, bar3@@zed
+
+        .symver defined3, bar5@@@zed
+        .symver undefined3, bar6@@@zed
+
+        .long defined1
+        .long undefined1
+        .long defined2
+        .long defined3
+        .long undefined3
+
+        .global global1
+        .symver global1, g1@@zed
+global1:
+
+@ CHECK: Relocations [
+@ CHECK-NEXT:   Section (2) .rel.text {
+@ CHECK-NEXT:     0x0 R_ARM_ABS32 defined1 0x0
+@ CHECK-NEXT:     0x4 R_ARM_ABS32 bar2 at zed 0x0
+@ CHECK-NEXT:     0x8 R_ARM_ABS32 defined2 0x0
+@ CHECK-NEXT:     0xC R_ARM_ABS32 defined3 0x0
+@ CHECK-NEXT:     0x10 R_ARM_ABS32 bar6 at zed 0x0
+@ CHECK-NEXT:   }
+@ CHECK-NEXT: ]
+
+@ CHECK:  Symbol {
+@ CHECK:    Name: bar1 at zed (28)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: bar3@@zed (46)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: bar5@@zed (56)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: defined1 (1)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: defined2 (10)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: defined3 (19)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: .text (0)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: Section (0x3)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: .data (0)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: Section (0x3)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .data (0x3)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: .bss (0)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Local (0x0)
+@ CHECK-NEXT:     Type: Section (0x3)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .bss (0x4)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: g1@@zed (88)
+@ CHECK-NEXT:     Value: 0x14
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Global (0x1)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: global1 (80)
+@ CHECK-NEXT:     Value: 0x14
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Global (0x1)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section: .text (0x1)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: bar2 at zed (37)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Global (0x1)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section:  (0x0)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT:   Symbol {
+@ CHECK-NEXT:     Name: bar6 at zed (66)
+@ CHECK-NEXT:     Value: 0x0
+@ CHECK-NEXT:     Size: 0
+@ CHECK-NEXT:     Binding: Global (0x1)
+@ CHECK-NEXT:     Type: None (0x0)
+@ CHECK-NEXT:     Other: 0
+@ CHECK-NEXT:     Section:  (0x0)
+@ CHECK-NEXT:   }
+@ CHECK-NEXT: ]
diff --git a/test/MC/ARM/comment.s b/test/MC/ARM/comment.s
index e95f313..c24bc1a 100644
--- a/test/MC/ARM/comment.s
+++ b/test/MC/ARM/comment.s
@@ -9,6 +9,18 @@ foo:
   .long baz at got
   add r0, r0 at ignore this extra junk
 
+@ the symver directive should allow @ in the second symbol name
+defined1:
+defined2:
+defined3:
+bar:
+  .symver defined1, bar1 at zed
+  .symver defined2, bar3@@zed
+  .symver defined3, bar5@@@zed
+
+far:
+  .long baz at got
+
 @CHECK-LABEL: foo:
 @CHECK: bl boo
 @CHECK-NOT: @
@@ -21,4 +33,15 @@ foo:
 @CHECK: add r0, r0
 @CHECK-NOT: @
 
+ at CHECK-LABEL: bar:
+ at CHECK: bar1 at zed = defined1
+ at CHECK: bar3@@zed = defined2
+ at CHECK: bar5@@@zed = defined3
+
+@ Make sure we did not mess up the parser state and it still lexes
+@ comments correctly by excluding the @ in normal symbols
+ at CHECK-LABEL: far:
+ at CHECK:  .long baz
+ at CHECK-NOT: @
+
 @ERROR-NOT: error:
-- 
1.7.8.3



More information about the llvm-commits mailing list