[llvm] 8893d40 - MC: Support quoted symbol names

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri May 9 19:00:22 PDT 2025


Author: Fangrui Song
Date: 2025-05-09T19:00:17-07:00
New Revision: 8893d407a68ece98d6911e3f186305dbc43ee90e

URL: https://github.com/llvm/llvm-project/commit/8893d407a68ece98d6911e3f186305dbc43ee90e
DIFF: https://github.com/llvm/llvm-project/commit/8893d407a68ece98d6911e3f186305dbc43ee90e.diff

LOG: MC: Support quoted symbol names

gas has supported " quoted symbols since 2015.
Both \ and " need to be escaped.
https://sourceware.org/pipermail/binutils/2015-August/090003.html

We don't unescape \\ or \" in assembly strings, leading to clang -c
--save-temps vs clang -c difference for the following C code:

```
int x asm("a\"\\b");
```

Fix #138390

MC/COFF/safeseh.h looks incorrect. \01 in `.safeseh "\01foo"` is not a
correct escape sequence. Change it to \\

Pull Request: https://github.com/llvm/llvm-project/pull/138817

Added: 
    

Modified: 
    bolt/test/runtime/X86/fdata-escape-chars.ll
    llvm/lib/MC/MCContext.cpp
    llvm/lib/MC/MCSymbol.cpp
    llvm/test/MC/AsmParser/quoted.s
    llvm/test/MC/COFF/safeseh.s
    llvm/test/MC/ELF/symbol-names.s

Removed: 
    


################################################################################
diff  --git a/bolt/test/runtime/X86/fdata-escape-chars.ll b/bolt/test/runtime/X86/fdata-escape-chars.ll
index 4ea781ad184be..1dce03ed54ea0 100644
--- a/bolt/test/runtime/X86/fdata-escape-chars.ll
+++ b/bolt/test/runtime/X86/fdata-escape-chars.ll
@@ -82,7 +82,7 @@ define internal void @static_symb_backslash_b() #0 {
 ; INSTR_CHECK: Binary Function "main"
 ; INSTR_CHECK: Exec Count  : 1
 ; INSTR_CHECK: {{([[:xdigit:]]+)}}:   callq   "symb whitespace" # Count: 1
-; INSTR_CHECK: {{([[:xdigit:]]+)}}:   callq   "symb backslash\" # Count: 2
+; INSTR_CHECK: {{([[:xdigit:]]+)}}:   callq   "symb backslash\\" # Count: 2
 ; INSTR_CHECK: Binary Function "static symb backslash\/1(*2)"
 ; INSTR_CHECK: Exec Count  : 1
 ; INSTR_CHECK: {{([[:xdigit:]]+)}}:   callq   "symb whitespace" # Count: 1

diff  --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index f70087e14f702..1a6f3b3c17ea0 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -212,6 +212,27 @@ MCDataFragment *MCContext::allocInitialFragment(MCSection &Sec) {
 MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
   SmallString<128> NameSV;
   StringRef NameRef = Name.toStringRef(NameSV);
+  if (NameRef.contains('\\')) {
+    NameSV = NameRef;
+    size_t S = 0;
+    // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+    // other characters following \\, which we do not implement due to code
+    // structure.
+    for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
+      char C = NameSV[I];
+      if (C == '\\' && I + 1 != E) {
+        switch (NameSV[I + 1]) {
+        case '"':
+        case '\\':
+          C = NameSV[++I];
+          break;
+        }
+      }
+      NameSV[S++] = C;
+    }
+    NameSV.resize(S);
+    NameRef = NameSV;
+  }
 
   assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
 

diff  --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp
index 2a709f4aef80c..3ca85b76a35d9 100644
--- a/llvm/lib/MC/MCSymbol.cpp
+++ b/llvm/lib/MC/MCSymbol.cpp
@@ -74,6 +74,8 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
       OS << "\\n";
     else if (C == '"')
       OS << "\\\"";
+    else if (C == '\\')
+      OS << "\\\\";
     else
       OS << C;
   }

diff  --git a/llvm/test/MC/AsmParser/quoted.s b/llvm/test/MC/AsmParser/quoted.s
index 16b0997827603..710f2d0155f1c 100644
--- a/llvm/test/MC/AsmParser/quoted.s
+++ b/llvm/test/MC/AsmParser/quoted.s
@@ -9,6 +9,9 @@
 "a b":
   call "a b"
 
+# CHECK: "a b\\":
+"a b\\":
+
 #--- err.s
  "a\":
 # ERR: 1:2: error: unterminated string constant

diff  --git a/llvm/test/MC/COFF/safeseh.s b/llvm/test/MC/COFF/safeseh.s
index d21628daff5fa..afcdc771ff994 100644
--- a/llvm/test/MC/COFF/safeseh.s
+++ b/llvm/test/MC/COFF/safeseh.s
@@ -2,5 +2,5 @@
 
 // check that we quote the output of .safeseh
 
-.safeseh "\01foo"
-// CHECK: .safeseh "\01foo"
+.safeseh "\\foo"
+// CHECK: .safeseh "\\foo"

diff  --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index f605c723d4d4d..427187c329acf 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -1,12 +1,28 @@
-// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj --symbols - | FileCheck %s
+// RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump -tdr - | FileCheck %s
 
 // MC allows ?'s in symbol names as an extension.
 
+// CHECK-LABEL:SYMBOL TABLE:
+// CHECK-NEXT: 0000000000000001 l     F .text  0000000000000000 a"b\{{$}}
+// CHECK-NEXT: 0000000000000006 l       .text  0000000000000000 a\{{$}}
+// CHECK-NEXT: 0000000000000000 g     F .text  0000000000000000 foo?bar
+// CHECK-NEXT: 0000000000000000 *UND*          0000000000000000 a"b\q{{$}}
+// CHECK-EMPTY:
+
 .text
 .globl foo?bar
 .type foo?bar, @function
 foo?bar:
 ret
 
-// CHECK: Symbol
-// CHECK: Name: foo?bar
+// CHECK-LABEL:<a"b\>:
+// CHECK-NEXT:   callq  {{.*}} <a"b\>
+// CHECK-LABEL:<a\>:
+// CHECK-NEXT:   callq  {{.*}}
+// CHECK-NEXT:     R_X86_64_PLT32 a"b\q-0x4
+.type "a\"b\\", @function
+"a\"b\\":
+  call "a\"b\\"
+"a\\":
+/// GAS emits a warning for \q
+  call "a\"b\q"


        


More information about the llvm-commits mailing list