[llvm] 8893d40 - MC: Support quoted symbol names
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 19:00:22 PDT 2025
Author: Fangrui Song
Date: 2025-05-09T19:00:17-07:00
New Revision: 8893d407a68ece98d6911e3f186305dbc43ee90e
URL: https://github.com/llvm/llvm-project/commit/8893d407a68ece98d6911e3f186305dbc43ee90e
DIFF: https://github.com/llvm/llvm-project/commit/8893d407a68ece98d6911e3f186305dbc43ee90e.diff
LOG: MC: Support quoted symbol names
gas has supported " quoted symbols since 2015.
Both \ and " need to be escaped.
https://sourceware.org/pipermail/binutils/2015-August/090003.html
We don't unescape \\ or \" in assembly strings, leading to clang -c
--save-temps vs clang -c difference for the following C code:
```
int x asm("a\"\\b");
```
Fix #138390
MC/COFF/safeseh.h looks incorrect. \01 in `.safeseh "\01foo"` is not a
correct escape sequence. Change it to \\
Pull Request: https://github.com/llvm/llvm-project/pull/138817
Added:
Modified:
bolt/test/runtime/X86/fdata-escape-chars.ll
llvm/lib/MC/MCContext.cpp
llvm/lib/MC/MCSymbol.cpp
llvm/test/MC/AsmParser/quoted.s
llvm/test/MC/COFF/safeseh.s
llvm/test/MC/ELF/symbol-names.s
Removed:
################################################################################
diff --git a/bolt/test/runtime/X86/fdata-escape-chars.ll b/bolt/test/runtime/X86/fdata-escape-chars.ll
index 4ea781ad184be..1dce03ed54ea0 100644
--- a/bolt/test/runtime/X86/fdata-escape-chars.ll
+++ b/bolt/test/runtime/X86/fdata-escape-chars.ll
@@ -82,7 +82,7 @@ define internal void @static_symb_backslash_b() #0 {
; INSTR_CHECK: Binary Function "main"
; INSTR_CHECK: Exec Count : 1
; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1
-; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb backslash\" # Count: 2
+; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb backslash\\" # Count: 2
; INSTR_CHECK: Binary Function "static symb backslash\/1(*2)"
; INSTR_CHECK: Exec Count : 1
; INSTR_CHECK: {{([[:xdigit:]]+)}}: callq "symb whitespace" # Count: 1
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index f70087e14f702..1a6f3b3c17ea0 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -212,6 +212,27 @@ MCDataFragment *MCContext::allocInitialFragment(MCSection &Sec) {
MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
SmallString<128> NameSV;
StringRef NameRef = Name.toStringRef(NameSV);
+ if (NameRef.contains('\\')) {
+ NameSV = NameRef;
+ size_t S = 0;
+ // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+ // other characters following \\, which we do not implement due to code
+ // structure.
+ for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
+ char C = NameSV[I];
+ if (C == '\\' && I + 1 != E) {
+ switch (NameSV[I + 1]) {
+ case '"':
+ case '\\':
+ C = NameSV[++I];
+ break;
+ }
+ }
+ NameSV[S++] = C;
+ }
+ NameSV.resize(S);
+ NameRef = NameSV;
+ }
assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp
index 2a709f4aef80c..3ca85b76a35d9 100644
--- a/llvm/lib/MC/MCSymbol.cpp
+++ b/llvm/lib/MC/MCSymbol.cpp
@@ -74,6 +74,8 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
OS << "\\n";
else if (C == '"')
OS << "\\\"";
+ else if (C == '\\')
+ OS << "\\\\";
else
OS << C;
}
diff --git a/llvm/test/MC/AsmParser/quoted.s b/llvm/test/MC/AsmParser/quoted.s
index 16b0997827603..710f2d0155f1c 100644
--- a/llvm/test/MC/AsmParser/quoted.s
+++ b/llvm/test/MC/AsmParser/quoted.s
@@ -9,6 +9,9 @@
"a b":
call "a b"
+# CHECK: "a b\\":
+"a b\\":
+
#--- err.s
"a\":
# ERR: 1:2: error: unterminated string constant
diff --git a/llvm/test/MC/COFF/safeseh.s b/llvm/test/MC/COFF/safeseh.s
index d21628daff5fa..afcdc771ff994 100644
--- a/llvm/test/MC/COFF/safeseh.s
+++ b/llvm/test/MC/COFF/safeseh.s
@@ -2,5 +2,5 @@
// check that we quote the output of .safeseh
-.safeseh "\01foo"
-// CHECK: .safeseh "\01foo"
+.safeseh "\\foo"
+// CHECK: .safeseh "\\foo"
diff --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index f605c723d4d4d..427187c329acf 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -1,12 +1,28 @@
-// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj --symbols - | FileCheck %s
+// RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump -tdr - | FileCheck %s
// MC allows ?'s in symbol names as an extension.
+// CHECK-LABEL:SYMBOL TABLE:
+// CHECK-NEXT: 0000000000000001 l F .text 0000000000000000 a"b\{{$}}
+// CHECK-NEXT: 0000000000000006 l .text 0000000000000000 a\{{$}}
+// CHECK-NEXT: 0000000000000000 g F .text 0000000000000000 foo?bar
+// CHECK-NEXT: 0000000000000000 *UND* 0000000000000000 a"b\q{{$}}
+// CHECK-EMPTY:
+
.text
.globl foo?bar
.type foo?bar, @function
foo?bar:
ret
-// CHECK: Symbol
-// CHECK: Name: foo?bar
+// CHECK-LABEL:<a"b\>:
+// CHECK-NEXT: callq {{.*}} <a"b\>
+// CHECK-LABEL:<a\>:
+// CHECK-NEXT: callq {{.*}}
+// CHECK-NEXT: R_X86_64_PLT32 a"b\q-0x4
+.type "a\"b\\", @function
+"a\"b\\":
+ call "a\"b\\"
+"a\\":
+/// GAS emits a warning for \q
+ call "a\"b\q"
More information about the llvm-commits
mailing list