[PATCH] COFF MC: better handling of tricky symbol and section names

Hans Wennborg hans at chromium.org
Tue Oct 15 16:14:54 PDT 2013


Hi rnk,

Because of mangling, we produce symbol and section names with lots of funny characters, notably @ and ?.

MC will currently choke on trying to parse the code it emits, and this patch tries to fix that:

- Make @ trigger quoting of symbol names
- Quote section names that contain funny characters
- Just parse section names like other identifiers (to allow for quotes)
- Don't split identifiers on the @ character if the identifier is a string.

This allows us to parse the asm that we emit for non-trivial programs.

http://llvm-reviews.chandlerc.com/D1945

Files:
  lib/MC/MCParser/AsmParser.cpp
  lib/MC/MCParser/COFFAsmParser.cpp
  lib/MC/MCSectionCOFF.cpp
  lib/MC/MCSymbol.cpp
  test/CodeGen/X86/coff-feat00.ll
  test/CodeGen/X86/fastcall-correct-mangling.ll
  test/CodeGen/X86/stdcall.ll
  test/MC/COFF/quoted-names.ll

Index: lib/MC/MCParser/AsmParser.cpp
===================================================================
--- lib/MC/MCParser/AsmParser.cpp
+++ lib/MC/MCParser/AsmParser.cpp
@@ -793,6 +793,10 @@
 
     // This is a symbol reference.
     std::pair<StringRef, StringRef> Split = Identifier.split('@');
+    if (FirstTokenKind == AsmToken::String) {
+      // The identifier was quoted (a string), so don't split it.
+      Split = std::make_pair(Identifier, "");
+    }
     MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
 
     // Lookup the symbol variant if used.
Index: lib/MC/MCParser/COFFAsmParser.cpp
===================================================================
--- lib/MC/MCParser/COFFAsmParser.cpp
+++ lib/MC/MCParser/COFFAsmParser.cpp
@@ -295,12 +295,7 @@
 }
 
 bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
-  if (!getLexer().is(AsmToken::Identifier))
-    return true;
-
-  SectionName = getTok().getIdentifier();
-  Lex();
-  return false;
+  return getParser().parseIdentifier(SectionName);
 }
 
 // .section name [, "flags"]
Index: lib/MC/MCSectionCOFF.cpp
===================================================================
--- lib/MC/MCSectionCOFF.cpp
+++ lib/MC/MCSectionCOFF.cpp
@@ -39,6 +39,24 @@
   Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
 }
 
+static bool isAcceptableSectionNameChar(char C) {
+  if ((C < 'a' || C > 'z') &&
+      (C < 'A' || C > 'Z') &&
+      (C < '0' || C > '9') &&
+      C != '_' && C != '$' && C != '.')
+    return false;
+  return true;
+}
+
+/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be
+/// syntactically correct.
+static bool sectionNameNeedsQuoting(StringRef Name) {
+  for (unsigned i = 0, e = Name.size(); i != e; ++i)
+    if (!isAcceptableSectionNameChar(Name[i]))
+      return true;
+  return false;
+}
+
 void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
                                          raw_ostream &OS,
                                          const MCExpr *Subsection) const {
@@ -49,7 +67,10 @@
     return;
   }
 
-  OS << "\t.section\t" << getSectionName() << ",\"";
+  if (sectionNameNeedsQuoting(getSectionName()))
+    OS << "\t.section\t" << '"' << getSectionName() << '"' << ",\"";
+  else
+    OS << "\t.section\t" << getSectionName() << ",\"";
   if (getKind().isText())
     OS << 'x';
   if (getKind().isWriteable())
Index: lib/MC/MCSymbol.cpp
===================================================================
--- lib/MC/MCSymbol.cpp
+++ lib/MC/MCSymbol.cpp
@@ -21,7 +21,7 @@
   if ((C < 'a' || C > 'z') &&
       (C < 'A' || C > 'Z') &&
       (C < '0' || C > '9') &&
-      C != '_' && C != '$' && C != '.' && C != '@')
+      C != '_' && C != '$' && C != '.')
     return false;
   return true;
 }
Index: test/CodeGen/X86/coff-feat00.ll
===================================================================
--- test/CodeGen/X86/coff-feat00.ll
+++ test/CodeGen/X86/coff-feat00.ll
@@ -4,4 +4,4 @@
   ret i32 0
 }
 
-; CHECK: @feat.00 = 1
+; CHECK: "@feat.00" = 1
Index: test/CodeGen/X86/fastcall-correct-mangling.ll
===================================================================
--- test/CodeGen/X86/fastcall-correct-mangling.ll
+++ test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -3,7 +3,7 @@
 ; Check that a fastcall function gets correct mangling
 
 define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
-; CHECK: @func at 20:
+; CHECK: "@func at 20":
         ret void
 }
 
Index: test/CodeGen/X86/stdcall.ll
===================================================================
--- test/CodeGen/X86/stdcall.ll
+++ test/CodeGen/X86/stdcall.ll
@@ -5,7 +5,7 @@
 
 define internal x86_stdcallcc void @MyFunc() nounwind {
 entry:
-; CHECK: MyFunc at 0:
+; CHECK: "_MyFunc at 0":
 ; CHECK: ret
   ret void
 }
@@ -20,5 +20,5 @@
 
 @B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
 ; CHECK: _B:
-; CHECK: .long _MyFunc at 0
+; CHECK: .long "_MyFunc at 0"
 
Index: test/MC/COFF/quoted-names.ll
===================================================================
--- /dev/null
+++ test/MC/COFF/quoted-names.ll
@@ -0,0 +1,20 @@
+; Check that certain symbol and section names are quoted in the asm output.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s
+
+; Check that the symbol and section names can round-trip through the assembler.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -s -section-symbols | FileCheck %s --check-prefix=READOBJ
+
+@"\01??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ" = global i32 0
+
+define weak i32 @"\01??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51"() section ".text" {
+  %res = load i32* @"\01??__E_Generic_object@?$_Error_objects at H@std@@YAXXZ"
+  ret i32 %res
+}
+
+; CHECK: .section ".text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51","xr"
+; CHECK: .globl "??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51"
+; CHECK: "??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51"
+
+; READOBJ: Symbol
+; READOBJ: Name: ??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put at _WV?$back_insert_iterator at V?$basic_string at _WU?$char_traits at _W@std@@V?$allocator at _W@2@@std@@@std@@@std@@51
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1945.1.patch
Type: text/x-patch
Size: 5594 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131015/ef05cd0b/attachment.bin>


More information about the llvm-commits mailing list