[PATCH] MC asm parser: allow @'s in symbol names in MS assembly
Hans Wennborg
hans at chromium.org
Fri Oct 18 12:12:01 PDT 2013
Add another test.
Hi rnk, rafael, majnemer,
http://llvm-reviews.chandlerc.com/D1978
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D1978?vs=5035&id=5036#toc
Files:
include/llvm/MC/MCAsmInfo.h
lib/MC/MCAsmInfo.cpp
lib/MC/MCParser/AsmLexer.cpp
lib/MC/MCParser/AsmParser.cpp
lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
test/MC/COFF/tricky-names.ll
test/MC/ELF/bad-relocation.s
Index: include/llvm/MC/MCAsmInfo.h
===================================================================
--- include/llvm/MC/MCAsmInfo.h
+++ include/llvm/MC/MCAsmInfo.h
@@ -156,6 +156,10 @@
/// symbol names. This defaults to true.
bool AllowPeriodsInName;
+ /// \brief This is true if the assembler allows @ characters in symbol
+ /// names. Defaults to false.
+ bool AllowAtInName;
+
/// AllowUTF8 - This is true if the assembler accepts UTF-8 input.
// FIXME: Make this a more general encoding setting?
bool AllowUTF8;
@@ -485,6 +489,9 @@
bool doesAllowPeriodsInName() const {
return AllowPeriodsInName;
}
+ bool doesAllowAtInName() const {
+ return AllowAtInName;
+ }
bool doesAllowUTF8() const {
return AllowUTF8;
}
Index: lib/MC/MCAsmInfo.cpp
===================================================================
--- lib/MC/MCAsmInfo.cpp
+++ lib/MC/MCAsmInfo.cpp
@@ -53,6 +53,7 @@
AllowQuotesInName = false;
AllowNameToStartWithDigit = false;
AllowPeriodsInName = true;
+ AllowAtInName = false;
AllowUTF8 = true;
UseDataRegionDirectives = false;
ZeroDirective = "\t.zero\t";
Index: lib/MC/MCParser/AsmLexer.cpp
===================================================================
--- lib/MC/MCParser/AsmLexer.cpp
+++ lib/MC/MCParser/AsmLexer.cpp
@@ -138,9 +138,9 @@
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
}
-/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
static bool IsIdentifierChar(char c) {
- return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+ return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?';
}
AsmToken AsmLexer::LexIdentifier() {
// Check for floating point literals.
Index: lib/MC/MCParser/AsmParser.cpp
===================================================================
--- lib/MC/MCParser/AsmParser.cpp
+++ lib/MC/MCParser/AsmParser.cpp
@@ -769,6 +769,7 @@
Res = MCUnaryExpr::CreateLNot(Res, getContext());
return false;
case AsmToken::Dollar:
+ case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
@@ -792,19 +793,25 @@
EndLoc = SMLoc::getFromPointer(Identifier.end());
// This is a symbol reference.
+ StringRef SymbolName = Identifier;
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
std::pair<StringRef, StringRef> Split = Identifier.split('@');
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Lookup the symbol variant if used.
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
if (Split.first.size() != Identifier.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
- if (Variant == MCSymbolRefExpr::VK_Invalid) {
+ if (Variant != MCSymbolRefExpr::VK_Invalid) {
+ SymbolName = Split.first;
+ } else if (MAI.doesAllowAtInName()) {
+ Variant = MCSymbolRefExpr::VK_None;
+ } else {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
}
}
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
@@ -2105,25 +2112,25 @@
/// ::= string
bool AsmParser::parseIdentifier(StringRef &Res) {
// The assembler has relaxed rules for accepting identifiers, in particular we
- // allow things like '.globl $foo', which would normally be separate
- // tokens. At this level, we have already lexed so we cannot (currently)
+ // allow things like '.globl $foo' and '.def @feat.00', which would normally be
+ // separate tokens. At this level, we have already lexed so we cannot (currently)
// handle this as a context dependent token, instead we detect adjacent tokens
// and return the combined identifier.
- if (Lexer.is(AsmToken::Dollar)) {
- SMLoc DollarLoc = getLexer().getLoc();
+ if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
+ SMLoc PrefixLoc = getLexer().getLoc();
- // Consume the dollar sign, and check for a following identifier.
+ // Consume the prefix character, and check for a following identifier.
Lex();
if (Lexer.isNot(AsmToken::Identifier))
return true;
- // We have a '$' followed by an identifier, make sure they are adjacent.
- if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
+ if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer())
return true;
// Construct the joined identifier and consume the token.
Res =
- StringRef(DollarLoc.getPointer(), getTok().getIdentifier().size() + 1);
+ StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
Lex();
return false;
}
Index: lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
===================================================================
--- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -135,6 +135,8 @@
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
+
+ AllowAtInName = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
Index: test/MC/COFF/tricky-names.ll
===================================================================
--- /dev/null
+++ test/MC/COFF/tricky-names.ll
@@ -0,0 +1,38 @@
+; Check how tricky symbols are printed in the asm output.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s --check-prefix=ASM
+
+; Check that we can roundtrip these names through our assembler.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -t | FileCheck %s --check-prefix=READOBJ
+
+
+@"\01??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ" = global i32 0
+@"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4" = global i32 0
+@"\01 at foo.bar" = global i32 0
+
+define weak i32 @"\01??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51"() section ".text" {
+ %a = load i32* @"\01??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ"
+ %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4"
+ %c = load i32* @"\01 at foo.bar"
+ %x = add i32 %a, %b
+ %y = add i32 %x, %c
+ ret i32 %y
+}
+
+; Check that these symbols are not quoted. They occur in output that gets passed to GAS.
+; ASM: .globl __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4
+; ASM-NOT: .globl "__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4"
+; ASM: .globl @foo.bar
+; ASM-NOT: .globl "@foo.bar"
+
+; READOBJ: Symbol
+; READOBJ: Name: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ
+; READOBJ: Symbol
+; READOBJ: Name: __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS at 4
+; READOBJ: Symbol
+; READOBJ: Name: @foo.bar
Index: test/MC/ELF/bad-relocation.s
===================================================================
--- /dev/null
+++ test/MC/ELF/bad-relocation.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: error: invalid variant 'BADRELOC'
+
+ .text
+foo:
+ leal .Lfoo at BADRELOC(%ebx), %eax
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1978.2.patch
Type: text/x-patch
Size: 8055 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131018/a8645014/attachment.bin>
More information about the llvm-commits
mailing list