[PATCH] MC asm parser: allow @'s in symbol names in MS assembly

Hans Wennborg hans at chromium.org
Fri Oct 18 12:12:01 PDT 2013


  Add another test.

Hi rnk, rafael, majnemer,

http://llvm-reviews.chandlerc.com/D1978

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D1978?vs=5035&id=5036#toc

Files:
  include/llvm/MC/MCAsmInfo.h
  lib/MC/MCAsmInfo.cpp
  lib/MC/MCParser/AsmLexer.cpp
  lib/MC/MCParser/AsmParser.cpp
  lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
  test/MC/COFF/tricky-names.ll
  test/MC/ELF/bad-relocation.s

Index: include/llvm/MC/MCAsmInfo.h
===================================================================
--- include/llvm/MC/MCAsmInfo.h
+++ include/llvm/MC/MCAsmInfo.h
@@ -156,6 +156,10 @@
     /// symbol names.  This defaults to true.
     bool AllowPeriodsInName;
 
+    /// \brief This is true if the assembler allows @ characters in symbol
+    /// names. Defaults to false.
+    bool AllowAtInName;
+
     /// AllowUTF8 - This is true if the assembler accepts UTF-8 input.
     // FIXME: Make this a more general encoding setting?
     bool AllowUTF8;
@@ -485,6 +489,9 @@
     bool doesAllowPeriodsInName() const {
       return AllowPeriodsInName;
     }
+    bool doesAllowAtInName() const {
+      return AllowAtInName;
+    }
     bool doesAllowUTF8() const {
       return AllowUTF8;
     }
Index: lib/MC/MCAsmInfo.cpp
===================================================================
--- lib/MC/MCAsmInfo.cpp
+++ lib/MC/MCAsmInfo.cpp
@@ -53,6 +53,7 @@
   AllowQuotesInName = false;
   AllowNameToStartWithDigit = false;
   AllowPeriodsInName = true;
+  AllowAtInName = false;
   AllowUTF8 = true;
   UseDataRegionDirectives = false;
   ZeroDirective = "\t.zero\t";
Index: lib/MC/MCParser/AsmLexer.cpp
===================================================================
--- lib/MC/MCParser/AsmLexer.cpp
+++ lib/MC/MCParser/AsmLexer.cpp
@@ -138,9 +138,9 @@
   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
 }
 
-/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
 static bool IsIdentifierChar(char c) {
-  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?';
 }
 AsmToken AsmLexer::LexIdentifier() {
   // Check for floating point literals.
Index: lib/MC/MCParser/AsmParser.cpp
===================================================================
--- lib/MC/MCParser/AsmParser.cpp
+++ lib/MC/MCParser/AsmParser.cpp
@@ -769,6 +769,7 @@
     Res = MCUnaryExpr::CreateLNot(Res, getContext());
     return false;
   case AsmToken::Dollar:
+  case AsmToken::At:
   case AsmToken::String:
   case AsmToken::Identifier: {
     StringRef Identifier;
@@ -792,19 +793,25 @@
     EndLoc = SMLoc::getFromPointer(Identifier.end());
 
     // This is a symbol reference.
+    StringRef SymbolName = Identifier;
+    MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
     std::pair<StringRef, StringRef> Split = Identifier.split('@');
-    MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
 
     // Lookup the symbol variant if used.
-    MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
     if (Split.first.size() != Identifier.size()) {
       Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
-      if (Variant == MCSymbolRefExpr::VK_Invalid) {
+      if (Variant != MCSymbolRefExpr::VK_Invalid) {
+        SymbolName = Split.first;
+      } else if (MAI.doesAllowAtInName()) {
+        Variant = MCSymbolRefExpr::VK_None;
+      } else {
         Variant = MCSymbolRefExpr::VK_None;
         return TokError("invalid variant '" + Split.second + "'");
       }
     }
 
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
     if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
@@ -2105,25 +2112,25 @@
 ///   ::= string
 bool AsmParser::parseIdentifier(StringRef &Res) {
   // The assembler has relaxed rules for accepting identifiers, in particular we
-  // allow things like '.globl $foo', which would normally be separate
-  // tokens. At this level, we have already lexed so we cannot (currently)
+  // allow things like '.globl $foo' and '.def @feat.00', which would normally be
+  // separate tokens. At this level, we have already lexed so we cannot (currently)
   // handle this as a context dependent token, instead we detect adjacent tokens
   // and return the combined identifier.
-  if (Lexer.is(AsmToken::Dollar)) {
-    SMLoc DollarLoc = getLexer().getLoc();
+  if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
+    SMLoc PrefixLoc = getLexer().getLoc();
 
-    // Consume the dollar sign, and check for a following identifier.
+    // Consume the prefix character, and check for a following identifier.
     Lex();
     if (Lexer.isNot(AsmToken::Identifier))
       return true;
 
-    // We have a '$' followed by an identifier, make sure they are adjacent.
-    if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+    // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
+    if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer())
       return true;
 
     // Construct the joined identifier and consume the token.
     Res =
-        StringRef(DollarLoc.getPointer(), getTok().getIdentifier().size() + 1);
+        StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
     Lex();
     return false;
   }
Index: lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
===================================================================
--- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -135,6 +135,8 @@
   AssemblerDialect = AsmWriterFlavor;
 
   TextAlignFillValue = 0x90;
+
+  AllowAtInName = true;
 }
 
 void X86MCAsmInfoGNUCOFF::anchor() { }
Index: test/MC/COFF/tricky-names.ll
===================================================================
--- /dev/null
+++ test/MC/COFF/tricky-names.ll
@@ -0,0 +1,38 @@
+; Check how tricky symbols are printed in the asm output.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s --check-prefix=ASM
+
+; Check that we can roundtrip these names through our assembler.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -t | FileCheck %s --check-prefix=READOBJ
+
+
+@"\01??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ" = global i32 0
+@"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4" = global i32 0
+@"\01 at foo.bar" = global i32 0
+
+define weak i32 @"\01??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51"() section ".text" {
+  %a = load i32* @"\01??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ"
+  %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4"
+  %c = load i32* @"\01 at foo.bar"
+  %x = add i32 %a, %b
+  %y = add i32 %x, %c
+  ret i32 %y
+}
+
+; Check that these symbols are not quoted. They occur in output that gets passed to GAS.
+; ASM: .globl __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4
+; ASM-NOT: .globl "__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4"
+; ASM: .globl @foo.bar
+; ASM-NOT: .globl "@foo.bar"
+
+; READOBJ: Symbol
+; READOBJ: Name: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ
+; READOBJ: Symbol
+; READOBJ: Name: __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4
+; READOBJ: Symbol
+; READOBJ: Name: @foo.bar
Index: test/MC/ELF/bad-relocation.s
===================================================================
--- /dev/null
+++ test/MC/ELF/bad-relocation.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck  %s
+
+// CHECK: error: invalid variant 'BADRELOC'
+
+        .text
+foo:
+	leal	.Lfoo at BADRELOC(%ebx), %eax
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1978.2.patch
Type: text/x-patch
Size: 8055 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131018/a8645014/attachment.bin>


More information about the llvm-commits mailing list