[llvm] 0cf6688 - MC: Better handle backslash-escaped symbols (#158780)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 09:42:13 PDT 2025
Author: Fangrui Song
Date: 2025-09-16T16:42:09Z
New Revision: 0cf668889823e7dc526b0b3039c22452f61538f2
URL: https://github.com/llvm/llvm-project/commit/0cf668889823e7dc526b0b3039c22452f61538f2
DIFF: https://github.com/llvm/llvm-project/commit/0cf668889823e7dc526b0b3039c22452f61538f2.diff
LOG: MC: Better handle backslash-escaped symbols (#158780)
The MCContext::getOrCreateSymbol change in #138817 was a workaround.
With #158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in
llvm/lib/MC/MCParser to handle backslash-escaped symbols.
Added:
llvm/test/CodeGen/X86/symbol-name.ll
Modified:
llvm/include/llvm/MC/MCContext.h
llvm/lib/MC/MCContext.cpp
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/MC/MCParser/COFFMasmParser.cpp
llvm/lib/MC/MCParser/ELFAsmParser.cpp
llvm/lib/MC/MCParser/MCAsmParser.cpp
llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
llvm/lib/MC/MCParser/MasmParser.cpp
llvm/lib/MC/MCParser/WasmAsmParser.cpp
llvm/test/MC/ELF/cgprofile.s
llvm/test/MC/ELF/symbol-names.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d96609c0fec21..4a528eecfc900 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -484,6 +484,10 @@ class MCContext {
/// \param Name - The symbol name, which must be unique across all symbols.
LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name);
+ /// Variant of getOrCreateSymbol that handles backslash-escaped symbols.
+ /// For example, parse "a\"b\\" as a"\.
+ LLVM_ABI MCSymbol *parseSymbol(const Twine &Name);
+
/// Gets a symbol that will be defined to the final stack offset of a local
/// variable after codegen.
///
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 5d9ddc2f1306c..5df8692d2e7ba 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -203,27 +203,6 @@ MCInst *MCContext::createMCInst() {
MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
SmallString<128> NameSV;
StringRef NameRef = Name.toStringRef(NameSV);
- if (NameRef.contains('\\')) {
- NameSV = NameRef;
- size_t S = 0;
- // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
- // other characters following \\, which we do not implement due to code
- // structure.
- for (size_t I = 0, E = NameSV.size(); I != E; ++I) {
- char C = NameSV[I];
- if (C == '\\' && I + 1 != E) {
- switch (NameSV[I + 1]) {
- case '"':
- case '\\':
- C = NameSV[++I];
- break;
- }
- }
- NameSV[S++] = C;
- }
- NameSV.resize(S);
- NameRef = NameSV;
- }
assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
@@ -244,6 +223,34 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
return Entry.second.Symbol;
}
+MCSymbol *MCContext::parseSymbol(const Twine &Name) {
+ SmallString<128> SV;
+ StringRef NameRef = Name.toStringRef(SV);
+ if (NameRef.contains('\\')) {
+ SV = NameRef;
+ size_t S = 0;
+ // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+ // other characters following \\, which we do not implement due to code
+ // structure.
+ for (size_t I = 0, E = SV.size(); I != E; ++I) {
+ char C = SV[I];
+ if (C == '\\' && I + 1 != E) {
+ switch (SV[I + 1]) {
+ case '"':
+ case '\\':
+ C = SV[++I];
+ break;
+ }
+ }
+ SV[S++] = C;
+ }
+ SV.resize(S);
+ NameRef = SV;
+ }
+
+ return getOrCreateSymbol(NameRef);
+}
+
MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName,
unsigned Idx) {
return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 5fa1539790c73..acea3ab23680a 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1213,8 +1213,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
if (!Sym)
- Sym = getContext().getOrCreateSymbol(MAI.isHLASM() ? SymbolName.upper()
- : SymbolName);
+ Sym = getContext().parseSymbol(MAI.isHLASM() ? SymbolName.upper()
+ : SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
@@ -1845,7 +1845,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
RewrittenLabel);
IDVal = RewrittenLabel;
}
- Sym = getContext().getOrCreateSymbol(IDVal);
+ Sym = getContext().parseSymbol(IDVal);
} else
Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
// End of Labels should be treated as end of line for lexing
@@ -4885,7 +4885,7 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (discardLTOSymbol(Name))
return false;
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
// Assembler local symbols don't make any sense here, except for directives
// that the symbol should be tagged.
@@ -6142,7 +6142,7 @@ bool HLASMAsmParser::parseAsHLASMLabel(ParseStatementInfo &Info,
return Error(LabelLoc,
"Cannot have just a label for an HLASM inline asm statement");
- MCSymbol *Sym = getContext().getOrCreateSymbol(
+ MCSymbol *Sym = getContext().parseSymbol(
getContext().getAsmInfo()->isHLASM() ? LabelVal.upper() : LabelVal);
// Emit the label.
@@ -6270,7 +6270,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc);
return false;
} else
- Sym = Parser.getContext().getOrCreateSymbol(Name);
+ Sym = Parser.getContext().parseSymbol(Name);
Sym->setRedefinable(allow_redef);
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index ef2815b037f2f..04e12e56c4262 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -510,8 +510,8 @@ bool COFFMasmParser::parseDirectiveAlias(StringRef Directive, SMLoc Loc) {
getParser().parseAngleBracketString(ActualName))
return Error(getTok().getLoc(), "expected <actualName>");
- MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
- MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName);
+ MCSymbol *Alias = getContext().parseSymbol(AliasName);
+ MCSymbol *Actual = getContext().parseSymbol(ActualName);
getStreamer().emitWeakReference(Alias, Actual);
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 19da9f57a4a6f..6195355626fd5 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -163,7 +163,7 @@ bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
continue;
}
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitSymbolAttribute(Sym, Attr);
diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp
index 3721541c71e11..c1b7e57184de1 100644
--- a/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -168,7 +168,7 @@ bool MCAsmParser::parseSymbol(MCSymbol *&Res) {
if (parseIdentifier(Name))
return true;
- Res = getContext().getOrCreateSymbol(Name);
+ Res = getContext().parseSymbol(Name);
return false;
}
diff --git a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
index 7fa05088c9725..299d4b46a8a84 100644
--- a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -50,8 +50,8 @@ bool MCAsmParserExtension::parseDirectiveCGProfile(StringRef, SMLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
- MCSymbol *FromSym = getContext().getOrCreateSymbol(From);
- MCSymbol *ToSym = getContext().getOrCreateSymbol(To);
+ MCSymbol *FromSym = getContext().parseSymbol(From);
+ MCSymbol *ToSym = getContext().parseSymbol(To);
getStreamer().emitCGProfileEntry(
MCSymbolRefExpr::create(FromSym, getContext(), FromLoc),
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index b38c2f7e41634..7f0ea7830b495 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1480,7 +1480,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
auto VarIt = Variables.find(SymbolName.lower());
if (VarIt != Variables.end())
SymbolName = VarIt->second.Name;
- Sym = getContext().getOrCreateSymbol(SymbolName);
+ Sym = getContext().parseSymbol(SymbolName);
}
// If this is an absolute variable reference, substitute it now to preserve
@@ -1965,7 +1965,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
if (IDVal == "@@") {
Sym = Ctx.createDirectionalLocalSymbol(0);
} else {
- Sym = getContext().getOrCreateSymbol(IDVal);
+ Sym = getContext().parseSymbol(IDVal);
}
// End of Labels should be treated as end of line for lexing
@@ -3009,8 +3009,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
return false;
}
- auto *Sym =
- static_cast<MCSymbolCOFF *>(getContext().getOrCreateSymbol(Var.Name));
+ auto *Sym = static_cast<MCSymbolCOFF *>(getContext().parseSymbol(Var.Name));
const MCConstantExpr *PrevValue =
Sym->isVariable()
? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
@@ -3318,7 +3317,7 @@ bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
StringRef Name, SMLoc NameLoc) {
if (StructInProgress.empty()) {
// Initialize named data value.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitLabel(Sym);
unsigned Count;
if (emitIntegralValues(Size, &Count))
@@ -3509,7 +3508,7 @@ bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
SMLoc NameLoc) {
if (StructInProgress.empty()) {
// Initialize named data value.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitLabel(Sym);
unsigned Count;
if (emitRealValues(Semantics, &Count))
@@ -4003,7 +4002,7 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
SMLoc DirLoc, StringRef Name) {
if (StructInProgress.empty()) {
// Initialize named data value.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().parseSymbol(Name);
getStreamer().emitLabel(Sym);
unsigned Count;
if (emitStructValues(Structure, &Count))
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 1befcacb3952f..75e8948f8ce9e 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -240,8 +240,7 @@ class WasmAsmParser : public MCAsmParserExtension {
return error("Expected label after .type directive, got: ",
Lexer->getTok());
auto *WasmSym = static_cast<MCSymbolWasm *>(
- getStreamer().getContext().getOrCreateSymbol(
- Lexer->getTok().getString()));
+ getStreamer().getContext().parseSymbol(Lexer->getTok().getString()));
Lex();
if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
Lexer->is(AsmToken::Identifier)))
diff --git a/llvm/test/CodeGen/X86/symbol-name.ll b/llvm/test/CodeGen/X86/symbol-name.ll
new file mode 100644
index 0000000000000..dd9be14fb053e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/symbol-name.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -mtriple=x86_64 -relocation-model=pic | FileCheck %s
+
+; CHECK: .globl "\\\""
+; CHECK-NEXT: "\\\"":
+@"\\\22" = constant i8 0
diff --git a/llvm/test/MC/ELF/cgprofile.s b/llvm/test/MC/ELF/cgprofile.s
index f8469ddc68877..28d8b72185556 100644
--- a/llvm/test/MC/ELF/cgprofile.s
+++ b/llvm/test/MC/ELF/cgprofile.s
@@ -5,11 +5,11 @@ a: .word b
.cg_profile a, b, 32
.cg_profile freq, a, 11
- .cg_profile late, late2, 20
+ .cg_profile "late\\", late2, 20
.cg_profile .L.local, b, 42
- .globl late
-late:
+ .globl "late\\"
+"late\\":
late2: .word 0
late3:
.L.local:
@@ -31,7 +31,7 @@ late3:
# CHECK-NEXT: 0010: 14000000 00000000 2A000000 00000000
# CHECK-NEXT: )
-# CHECK: Name: .rel.llvm.call-graph-profile (28)
+# CHECK: Name: .rel.llvm.call-graph-profile
# CHECK-NEXT: Type: SHT_REL (0x9)
# CHECK-NEXT: Flags [ (0x40)
# CHECK-NEXT: SHF_INFO_LINK
@@ -83,7 +83,7 @@ late3:
# CHECK-NEXT: Type:
# CHECK-NEXT: Other:
# CHECK-NEXT: Section: Undefined
-# CHECK: Name: late
+# CHECK: Name: late\ ([[#]])
# CHECK-NEXT: Value:
# CHECK-NEXT: Size:
# CHECK-NEXT: Binding: Global
diff --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index 427187c329acf..f1593dd2f8099 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -5,6 +5,7 @@
// CHECK-LABEL:SYMBOL TABLE:
// CHECK-NEXT: 0000000000000001 l F .text 0000000000000000 a"b\{{$}}
// CHECK-NEXT: 0000000000000006 l .text 0000000000000000 a\{{$}}
+// CHECK-NEXT: 000000000000000b l .text 0000000000000000 a\\{{$}}
// CHECK-NEXT: 0000000000000000 g F .text 0000000000000000 foo?bar
// CHECK-NEXT: 0000000000000000 *UND* 0000000000000000 a"b\q{{$}}
// CHECK-EMPTY:
@@ -26,3 +27,5 @@ ret
"a\\":
/// GAS emits a warning for \q
call "a\"b\q"
+
+"a\\\\" = .
More information about the llvm-commits
mailing list