[llvm] MC: Support quoted symbol names (PR #138817)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 7 00:55:47 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mc

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

gas has supported " quoted symbols since 2015:
https://sourceware.org/pipermail/binutils/2015-August/090003.html

Close #<!-- -->138390


---
Full diff: https://github.com/llvm/llvm-project/pull/138817.diff


2 Files Affected:

- (modified) llvm/lib/MC/MCContext.cpp (+21) 
- (modified) llvm/test/MC/ELF/symbol-names.s (+16-3) 


``````````diff
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index f70087e14f702..41caf20b331b2 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -212,6 +212,27 @@ MCDataFragment *MCContext::allocInitialFragment(MCSection &Sec) {
 MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
   SmallString<128> NameSV;
   StringRef NameRef = Name.toStringRef(NameSV);
+  if (NameRef.contains('\\')) {
+    NameSV = NameRef;
+    size_t S = 0;
+    // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+    // other characters following \\, which we do not implement due to code
+    // structure.
+    for (size_t I = 0, E = NameSV.size(); I < E; ++I) {
+      char C = NameSV[I];
+      if (C == '\\') {
+        switch (NameSV[I + 1]) {
+        case '"':
+        case '\\':
+          C = NameSV[++I];
+          break;
+        }
+      }
+      NameSV[S++] = C;
+    }
+    NameSV.resize(S);
+    NameRef = NameSV;
+  }
 
   assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
 
diff --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index f605c723d4d4d..8c891052ebd9a 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -1,12 +1,25 @@
-// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj --symbols - | FileCheck %s
+// RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump -tdr - | FileCheck %s
 
 // MC allows ?'s in symbol names as an extension.
 
+// CHECK-LABEL:SYMBOL TABLE:
+// CHECK-NEXT: 0000000000000001 l     F .text  0000000000000000 a"b\{{$}}
+// CHECK-NEXT: 0000000000000000 g     F .text  0000000000000000 foo?bar
+// CHECK-NEXT: 0000000000000000 *UND*          0000000000000000 a"b\q{{$}}
+// CHECK-EMPTY:
+
 .text
 .globl foo?bar
 .type foo?bar, @function
 foo?bar:
 ret
 
-// CHECK: Symbol
-// CHECK: Name: foo?bar
+// CHECK-LABEL:<a"b\>:
+// CHECK-NEXT:   callq  {{.*}} <a"b\>
+// CHECK-NEXT:   callq  {{.*}}
+// CHECK-NEXT:     R_X86_64_PLT32 a"b\q-0x4
+.type "a\"b\\", @function
+"a\"b\\":
+  call "a\"b\\"
+/// GAS emits a warning for \q
+  call "a\"b\q"

``````````

</details>


https://github.com/llvm/llvm-project/pull/138817


More information about the llvm-commits mailing list