[llvm] 0cf6688 - MC: Better handle backslash-escaped symbols (#158780)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 16 09:42:13 PDT 2025


Author: Fangrui Song
Date: 2025-09-16T16:42:09Z
New Revision: 0cf668889823e7dc526b0b3039c22452f61538f2

URL: https://github.com/llvm/llvm-project/commit/0cf668889823e7dc526b0b3039c22452f61538f2
DIFF: https://github.com/llvm/llvm-project/commit/0cf668889823e7dc526b0b3039c22452f61538f2.diff

LOG: MC: Better handle backslash-escaped symbols (#158780)

The MCContext::getOrCreateSymbol change in #138817 was a workaround.
With #158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in
llvm/lib/MC/MCParser to handle backslash-escaped symbols.

Added: 
    llvm/test/CodeGen/X86/symbol-name.ll

Modified: 
    llvm/include/llvm/MC/MCContext.h
    llvm/lib/MC/MCContext.cpp
    llvm/lib/MC/MCParser/AsmParser.cpp
    llvm/lib/MC/MCParser/COFFMasmParser.cpp
    llvm/lib/MC/MCParser/ELFAsmParser.cpp
    llvm/lib/MC/MCParser/MCAsmParser.cpp
    llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/lib/MC/MCParser/WasmAsmParser.cpp
    llvm/test/MC/ELF/cgprofile.s
    llvm/test/MC/ELF/symbol-names.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d96609c0fec21..4a528eecfc900 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -484,6 +484,10 @@ class MCContext {
   /// \param Name - The symbol name, which must be unique across all symbols.
   LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name);
 
+  /// Variant of getOrCreateSymbol that handles backslash-escaped symbols.
+  /// For example, parse "a\"b\\" as a"\.
+  LLVM_ABI MCSymbol *parseSymbol(const Twine &Name);
+
   /// Gets a symbol that will be defined to the final stack offset of a local
   /// variable after codegen.
   ///

diff  --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 5d9ddc2f1306c..5df8692d2e7ba 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -203,27 +203,6 @@ MCInst *MCContext::createMCInst() {
 MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
   SmallString<128> NameSV;
   StringRef NameRef = Name.toStringRef(NameSV);
-  if (NameRef.contains('\\')) {
-    NameSV = NameRef;
-    size_t S = 0;
-    // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
-    // other characters following \\, which we do not implement due to code
-    // structure.
-    for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
-      char C = NameSV[I];
-      if (C == '\\' && I + 1 != E) {
-        switch (NameSV[I + 1]) {
-        case '"':
-        case '\\':
-          C = NameSV[++I];
-          break;
-        }
-      }
-      NameSV[S++] = C;
-    }
-    NameSV.resize(S);
-    NameRef = NameSV;
-  }
 
   assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
 
@@ -244,6 +223,34 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
   return Entry.second.Symbol;
 }
 
+MCSymbol *MCContext::parseSymbol(const Twine &Name) {
+  SmallString<128> SV;
+  StringRef NameRef = Name.toStringRef(SV);
+  if (NameRef.contains('\\')) {
+    SV = NameRef;
+    size_t S = 0;
+    // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+    // other characters following \\, which we do not implement due to code
+    // structure.
+    for (size_t I = 0, E = SV.size(); I != E; ++I) {
+      char C = SV[I];
+      if (C == '\\' && I + 1 != E) {
+        switch (SV[I + 1]) {
+        case '"':
+        case '\\':
+          C = SV[++I];
+          break;
+        }
+      }
+      SV[S++] = C;
+    }
+    SV.resize(S);
+    NameRef = SV;
+  }
+
+  return getOrCreateSymbol(NameRef);
+}
+
 MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName,
                                                  unsigned Idx) {
   return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +

diff  --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 5fa1539790c73..acea3ab23680a 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1213,8 +1213,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
 
     MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
     if (!Sym)
-      Sym = getContext().getOrCreateSymbol(MAI.isHLASM() ? SymbolName.upper()
-                                                         : SymbolName);
+      Sym = getContext().parseSymbol(MAI.isHLASM() ? SymbolName.upper()
+                                                   : SymbolName);
 
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
@@ -1845,7 +1845,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
                                        RewrittenLabel);
         IDVal = RewrittenLabel;
       }
-      Sym = getContext().getOrCreateSymbol(IDVal);
+      Sym = getContext().parseSymbol(IDVal);
     } else
       Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
     // End of Labels should be treated as end of line for lexing
@@ -4885,7 +4885,7 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
     if (discardLTOSymbol(Name))
       return false;
 
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+    MCSymbol *Sym = getContext().parseSymbol(Name);
 
     // Assembler local symbols don't make any sense here, except for directives
     // that the symbol should be tagged.
@@ -6142,7 +6142,7 @@ bool HLASMAsmParser::parseAsHLASMLabel(ParseStatementInfo &Info,
     return Error(LabelLoc,
                  "Cannot have just a label for an HLASM inline asm statement");
 
-  MCSymbol *Sym = getContext().getOrCreateSymbol(
+  MCSymbol *Sym = getContext().parseSymbol(
       getContext().getAsmInfo()->isHLASM() ? LabelVal.upper() : LabelVal);
 
   // Emit the label.
@@ -6270,7 +6270,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
     Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc);
     return false;
   } else
-    Sym = Parser.getContext().getOrCreateSymbol(Name);
+    Sym = Parser.getContext().parseSymbol(Name);
 
   Sym->setRedefinable(allow_redef);
 

diff  --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index ef2815b037f2f..04e12e56c4262 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -510,8 +510,8 @@ bool COFFMasmParser::parseDirectiveAlias(StringRef Directive, SMLoc Loc) {
       getParser().parseAngleBracketString(ActualName))
     return Error(getTok().getLoc(), "expected <actualName>");
 
-  MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
-  MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName);
+  MCSymbol *Alias = getContext().parseSymbol(AliasName);
+  MCSymbol *Actual = getContext().parseSymbol(ActualName);
 
   getStreamer().emitWeakReference(Alias, Actual);
 

diff  --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 19da9f57a4a6f..6195355626fd5 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -163,7 +163,7 @@ bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
         continue;
       }
 
-      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+      MCSymbol *Sym = getContext().parseSymbol(Name);
 
       getStreamer().emitSymbolAttribute(Sym, Attr);
 

diff  --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp
index 3721541c71e11..c1b7e57184de1 100644
--- a/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -168,7 +168,7 @@ bool MCAsmParser::parseSymbol(MCSymbol *&Res) {
   if (parseIdentifier(Name))
     return true;
 
-  Res = getContext().getOrCreateSymbol(Name);
+  Res = getContext().parseSymbol(Name);
   return false;
 }
 

diff  --git a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
index 7fa05088c9725..299d4b46a8a84 100644
--- a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -50,8 +50,8 @@ bool MCAsmParserExtension::parseDirectiveCGProfile(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *FromSym = getContext().getOrCreateSymbol(From);
-  MCSymbol *ToSym = getContext().getOrCreateSymbol(To);
+  MCSymbol *FromSym = getContext().parseSymbol(From);
+  MCSymbol *ToSym = getContext().parseSymbol(To);
 
   getStreamer().emitCGProfileEntry(
       MCSymbolRefExpr::create(FromSym, getContext(), FromLoc),

diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index b38c2f7e41634..7f0ea7830b495 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1480,7 +1480,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
       auto VarIt = Variables.find(SymbolName.lower());
       if (VarIt != Variables.end())
         SymbolName = VarIt->second.Name;
-      Sym = getContext().getOrCreateSymbol(SymbolName);
+      Sym = getContext().parseSymbol(SymbolName);
     }
 
     // If this is an absolute variable reference, substitute it now to preserve
@@ -1965,7 +1965,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
     if (IDVal == "@@") {
       Sym = Ctx.createDirectionalLocalSymbol(0);
     } else {
-      Sym = getContext().getOrCreateSymbol(IDVal);
+      Sym = getContext().parseSymbol(IDVal);
     }
 
     // End of Labels should be treated as end of line for lexing
@@ -3009,8 +3009,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
     return false;
   }
 
-  auto *Sym =
-      static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Var.Name));
+  auto *Sym = static_cast<MCSymbolCOFF *>(getContext().parseSymbol(Var.Name));
   const MCConstantExpr *PrevValue =
       Sym->isVariable()
           ? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
@@ -3318,7 +3317,7 @@ bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
                                           StringRef Name, SMLoc NameLoc) {
   if (StructInProgress.empty()) {
     // Initialize named data value.
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+    MCSymbol *Sym = getContext().parseSymbol(Name);
     getStreamer().emitLabel(Sym);
     unsigned Count;
     if (emitIntegralValues(Size, &Count))
@@ -3509,7 +3508,7 @@ bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
                                               SMLoc NameLoc) {
   if (StructInProgress.empty()) {
     // Initialize named data value.
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+    MCSymbol *Sym = getContext().parseSymbol(Name);
     getStreamer().emitLabel(Sym);
     unsigned Count;
     if (emitRealValues(Semantics, &Count))
@@ -4003,7 +4002,7 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
                                                 SMLoc DirLoc, StringRef Name) {
   if (StructInProgress.empty()) {
     // Initialize named data value.
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+    MCSymbol *Sym = getContext().parseSymbol(Name);
     getStreamer().emitLabel(Sym);
     unsigned Count;
     if (emitStructValues(Structure, &Count))

diff  --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 1befcacb3952f..75e8948f8ce9e 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -240,8 +240,7 @@ class WasmAsmParser : public MCAsmParserExtension {
       return error("Expected label after .type directive, got: ",
                    Lexer->getTok());
     auto *WasmSym = static_cast<MCSymbolWasm *>(
-        getStreamer().getContext().getOrCreateSymbol(
-            Lexer->getTok().getString()));
+        getStreamer().getContext().parseSymbol(Lexer->getTok().getString()));
     Lex();
     if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
           Lexer->is(AsmToken::Identifier)))

diff  --git a/llvm/test/CodeGen/X86/symbol-name.ll b/llvm/test/CodeGen/X86/symbol-name.ll
new file mode 100644
index 0000000000000..dd9be14fb053e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/symbol-name.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -mtriple=x86_64 -relocation-model=pic | FileCheck %s
+
+; CHECK:      .globl  "\\\""
+; CHECK-NEXT: "\\\"":
+@"\\\22" = constant i8 0

diff  --git a/llvm/test/MC/ELF/cgprofile.s b/llvm/test/MC/ELF/cgprofile.s
index f8469ddc68877..28d8b72185556 100644
--- a/llvm/test/MC/ELF/cgprofile.s
+++ b/llvm/test/MC/ELF/cgprofile.s
@@ -5,11 +5,11 @@ a: .word b
 
   .cg_profile a, b, 32
   .cg_profile freq, a, 11
-  .cg_profile late, late2, 20
+  .cg_profile "late\\", late2, 20
   .cg_profile .L.local, b, 42
 
-	.globl late
-late:
+	.globl "late\\"
+"late\\":
 late2: .word 0
 late3:
 .L.local:
@@ -31,7 +31,7 @@ late3:
 # CHECK-NEXT:   0010: 14000000 00000000 2A000000 00000000
 # CHECK-NEXT: )
 
-# CHECK:      Name: .rel.llvm.call-graph-profile (28)
+# CHECK:      Name: .rel.llvm.call-graph-profile
 # CHECK-NEXT: Type: SHT_REL (0x9)
 # CHECK-NEXT: Flags [ (0x40)
 # CHECK-NEXT:   SHF_INFO_LINK
@@ -83,7 +83,7 @@ late3:
 # CHECK-NEXT: Type:
 # CHECK-NEXT: Other:
 # CHECK-NEXT: Section: Undefined
-# CHECK:      Name: late
+# CHECK:      Name: late\ ([[#]])
 # CHECK-NEXT: Value:
 # CHECK-NEXT: Size:
 # CHECK-NEXT: Binding: Global

diff  --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index 427187c329acf..f1593dd2f8099 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -5,6 +5,7 @@
 // CHECK-LABEL:SYMBOL TABLE:
 // CHECK-NEXT: 0000000000000001 l     F .text  0000000000000000 a"b\{{$}}
 // CHECK-NEXT: 0000000000000006 l       .text  0000000000000000 a\{{$}}
+// CHECK-NEXT: 000000000000000b l       .text  0000000000000000 a\\{{$}}
 // CHECK-NEXT: 0000000000000000 g     F .text  0000000000000000 foo?bar
 // CHECK-NEXT: 0000000000000000 *UND*          0000000000000000 a"b\q{{$}}
 // CHECK-EMPTY:
@@ -26,3 +27,5 @@ ret
 "a\\":
 /// GAS emits a warning for \q
   call "a\"b\q"
+
+"a\\\\" = .


        


More information about the llvm-commits mailing list