[llvm] e015626 - MC: Allow .set to reassign non-MCConstantExpr expressions
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 21:58:23 PDT 2025
Author: Fangrui Song
Date: 2025-05-26T21:58:18-07:00
New Revision: e015626f189dc76f8df9fdc25a47638c6a2f3feb
URL: https://github.com/llvm/llvm-project/commit/e015626f189dc76f8df9fdc25a47638c6a2f3feb
DIFF: https://github.com/llvm/llvm-project/commit/e015626f189dc76f8df9fdc25a47638c6a2f3feb.diff
LOG: MC: Allow .set to reassign non-MCConstantExpr expressions
GNU Assembler supports symbol reassignment via .set, .equ, or =.
However, LLVM's integrated assembler only allows reassignment for
MCConstantExpr cases, as it struggles with scenarios like:
```
.data
.set x, 0
.long x // reference the first instance
x = .-.data
.long x // reference the second instance
.set x,.-.data
.long x // reference the third instance
```
Between two assignments binds, we cannot ensure that a reference binds
to the earlier assignment. We use MCSymbol::IsUsed and other conditions
to reject potentially unsafe reassignments, but certain MCConstantExpr
uses could be unsafe as well.
This patch enables reassignment by cloning the symbol upon reassignment
and updating the symbol table. Existing references to the original
symbol remain unchanged, and the original symbol is excluded from the
emitted symbol table.
Added:
Modified:
llvm/include/llvm/MC/MCContext.h
llvm/include/llvm/MC/MCSymbol.h
llvm/lib/MC/ELFObjectWriter.cpp
llvm/lib/MC/MCContext.cpp
llvm/lib/MC/MCExpr.cpp
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/MC/MCParser/MasmParser.cpp
llvm/lib/MC/MCSymbol.cpp
llvm/test/MC/ARM/thumb_set-diagnostics.s
llvm/test/MC/AsmParser/redef.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 73c6d57f107f7..a2a53427be986 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -492,6 +492,11 @@ class MCContext {
/// Get the symbol for \p Name, or null.
MCSymbol *lookupSymbol(const Twine &Name) const;
+ /// Clone a symbol for the .set directive, replacing it in the symbol table.
+ /// Existing references to the original symbol remain unchanged, and the
+ /// original symbol is not emitted to the symbol table.
+ MCSymbol *cloneSymbol(MCSymbol &Sym);
+
/// Set value for a symbol.
void setSymbolValue(MCStreamer &Streamer, const Twine &Sym, uint64_t Val);
diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h
index 3885ce0bff37f..77515c82e096a 100644
--- a/llvm/include/llvm/MC/MCSymbol.h
+++ b/llvm/include/llvm/MC/MCSymbol.h
@@ -90,9 +90,6 @@ class MCSymbol {
/// True if this symbol can be redefined.
unsigned IsRedefinable : 1;
- /// IsUsed - True if this symbol has been used.
- mutable unsigned IsUsed : 1;
-
mutable unsigned IsRegistered : 1;
/// True if this symbol is visible outside this translation unit. Note: ELF
@@ -165,9 +162,9 @@ class MCSymbol {
};
MCSymbol(SymbolKind Kind, const MCSymbolTableEntry *Name, bool isTemporary)
- : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false),
- IsRegistered(false), IsExternal(false), IsPrivateExtern(false),
- IsWeakExternal(false), Kind(Kind), IsUsedInReloc(false), IsResolving(0),
+ : IsTemporary(isTemporary), IsRedefinable(false), IsRegistered(false),
+ IsExternal(false), IsPrivateExtern(false), IsWeakExternal(false),
+ Kind(Kind), IsUsedInReloc(false), IsResolving(0),
SymbolContents(SymContentsUnset), CommonAlignLog2(0), Flags(0) {
Offset = 0;
HasName = !!Name;
@@ -175,6 +172,9 @@ class MCSymbol {
getNameEntryPtr() = Name;
}
+ MCSymbol(const MCSymbol &) = default;
+ MCSymbol &operator=(const MCSymbol &) = delete;
+
// Provide custom new/delete as we will only allocate space for a name
// if we need one.
void *operator new(size_t s, const MCSymbolTableEntry *Name, MCContext &Ctx);
@@ -201,9 +201,6 @@ class MCSymbol {
}
public:
- MCSymbol(const MCSymbol &) = delete;
- MCSymbol &operator=(const MCSymbol &) = delete;
-
/// getName - Get the symbol name.
StringRef getName() const {
if (!HasName)
@@ -224,9 +221,6 @@ class MCSymbol {
/// isTemporary - Check if this is an assembler temporary symbol.
bool isTemporary() const { return IsTemporary; }
- /// isUsed - Check if this is used.
- bool isUsed() const { return IsUsed; }
-
/// Check if this symbol is redefinable.
bool isRedefinable() const { return IsRedefinable; }
/// Mark this symbol as redefinable.
@@ -306,10 +300,9 @@ class MCSymbol {
return SymbolContents == SymContentsVariable;
}
- /// getVariableValue - Get the value for variable symbols.
+ /// Get the expression of the variable symbol.
const MCExpr *getVariableValue(bool SetUsed = true) const {
assert(isVariable() && "Invalid accessor!");
- IsUsed |= SetUsed;
return Value;
}
@@ -404,7 +397,7 @@ class MCSymbol {
return Fragment;
// If the symbol is a non-weak alias, get information about
// the aliasee. (Don't try to resolve weak aliases.)
- Fragment = getVariableValue(false)->findAssociatedFragment();
+ Fragment = getVariableValue()->findAssociatedFragment();
return Fragment;
}
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 592081d81792a..933a64d4ddc5a 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -446,8 +446,7 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
// needs. MCBinaryExpr is not handled.
const MCSymbolELF *Sym = &Symbol;
while (Sym->isVariable()) {
- if (auto *Expr =
- dyn_cast<MCSymbolRefExpr>(Sym->getVariableValue(false))) {
+ if (auto *Expr = dyn_cast<MCSymbolRefExpr>(Sym->getVariableValue())) {
Sym = cast<MCSymbolELF>(&Expr->getSymbol());
if (!Sym->getSize())
continue;
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index dc92acafb25d2..e0e5e7bfc37d7 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -309,6 +309,35 @@ MCSymbol *MCContext::createSymbolImpl(const MCSymbolTableEntry *Name,
MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
}
+MCSymbol *MCContext::cloneSymbol(MCSymbol &Sym) {
+ MCSymbol *NewSym = nullptr;
+ auto Name = Sym.getNameEntryPtr();
+ switch (getObjectFileType()) {
+ case MCContext::IsCOFF:
+ NewSym = new (Name, *this) MCSymbolCOFF(cast<MCSymbolCOFF>(Sym));
+ break;
+ case MCContext::IsELF:
+ NewSym = new (Name, *this) MCSymbolELF(cast<MCSymbolELF>(Sym));
+ break;
+ case MCContext::IsMachO:
+ NewSym = new (Name, *this) MCSymbolMachO(cast<MCSymbolMachO>(Sym));
+ break;
+ default:
+ reportFatalUsageError(".set redefinition is not supported");
+ break;
+ }
+ // Set the name and redirect the `Symbols` entry to `NewSym`.
+ NewSym->getNameEntryPtr() = Name;
+ const_cast<MCSymbolTableEntry *>(Name)->second.Symbol = NewSym;
+ // Ensure the next `registerSymbol` call will add the new symbol to `Symbols`.
+ NewSym->setIsRegistered(false);
+
+ // Ensure the original symbol is not emitted to the symbol table.
+ Sym.IsTemporary = true;
+ Sym.setExternal(false);
+ return NewSym;
+}
+
MCSymbol *MCContext::createRenamableSymbol(const Twine &Name,
bool AlwaysAddSuffix,
bool IsTemporary) {
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 6c9fc94a5fdf9..70c9c5cf07bcf 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -479,8 +479,6 @@ static bool canExpand(const MCSymbol &Sym, bool InSet) {
if (Sym.isWeakExternal())
return false;
- Sym.getVariableValue(true);
-
if (InSet)
return true;
return !Sym.isInSection();
@@ -508,7 +506,6 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
Asm->getContext().reportError(
Sym.getVariableValue()->getLoc(),
"cyclic dependency detected for symbol '" + Sym.getName() + "'");
- Sym.IsUsed = false;
Sym.setVariableValue(MCConstantExpr::create(0, Asm->getContext()));
}
return false;
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 8019953f5fee9..4e56931387ed3 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1231,14 +1231,14 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable()) {
- auto V = Sym->getVariableValue(/*SetUsed*/ false);
+ auto V = Sym->getVariableValue();
bool DoInline = isa<MCConstantExpr>(V) && !Variant;
if (auto TV = dyn_cast<MCTargetExpr>(V))
DoInline = TV->inlineAssignedExpr();
if (DoInline) {
if (Variant)
return Error(EndLoc, "unexpected modifier on variable reference");
- Res = Sym->getVariableValue(/*SetUsed*/ false);
+ Res = Sym->getVariableValue();
return false;
}
}
@@ -6332,11 +6332,6 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
SMLoc EqualLoc = Parser.getTok().getLoc();
if (Parser.parseExpression(Value))
return Parser.TokError("missing expression");
-
- // Note: we don't count b as used in "a = b". This is to allow
- // a = b
- // b = c
-
if (Parser.parseEOL())
return true;
@@ -6344,22 +6339,13 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
// used as a symbol, or it is an absolute symbol).
Sym = Parser.getContext().lookupSymbol(Name);
if (Sym) {
- // Diagnose assignment to a label.
- //
- // FIXME: Diagnostics. Note the location of the definition as a label.
- // FIXME: Diagnose assignment to protected identifier (e.g., register name).
if (!Sym->isUnset() && (!allow_redef || !Sym->isRedefinable()))
return Parser.Error(EqualLoc, "redefinition of '" + Name + "'");
- else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
- ; // Allow redefinitions of undefined symbols only used in directives.
- else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
- ; // Allow redefinitions of variables that haven't yet been used.
- else if (!Sym->isVariable())
- return Parser.Error(EqualLoc, "invalid assignment to '" + Name + "'");
- else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
- return Parser.Error(EqualLoc,
- "invalid reassignment of non-absolute variable '" +
- Name + "'");
+ // If the symbol is redefinable, clone it and update the symbol table
+ // to the new symbol. Existing references to the original symbol remain
+ // unchanged.
+ if (Sym->isRedefinable())
+ Sym = Parser.getContext().cloneSymbol(*Sym);
} else if (Name == ".") {
Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc);
return false;
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index c259de61aab4b..c596006ba9a0c 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1486,12 +1486,12 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable()) {
- auto V = Sym->getVariableValue(/*SetUsed=*/false);
+ auto V = Sym->getVariableValue();
bool DoInline = isa<MCConstantExpr>(V);
if (auto TV = dyn_cast<MCTargetExpr>(V))
DoInline = TV->inlineAssignedExpr();
if (DoInline) {
- Res = Sym->getVariableValue(/*SetUsed=*/false);
+ Res = Sym->getVariableValue();
return false;
}
}
@@ -3012,9 +3012,9 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
const MCConstantExpr *PrevValue =
- Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
- Sym->getVariableValue(/*SetUsed=*/false))
- : nullptr;
+ Sym->isVariable()
+ ? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
+ : nullptr;
if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
switch (Var.Redefinable) {
case Variable::NOT_REDEFINABLE:
diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp
index 3ca85b76a35d9..100f693a739e4 100644
--- a/llvm/lib/MC/MCSymbol.cpp
+++ b/llvm/lib/MC/MCSymbol.cpp
@@ -45,7 +45,6 @@ void *MCSymbol::operator new(size_t s, const MCSymbolTableEntry *Name,
}
void MCSymbol::setVariableValue(const MCExpr *Value) {
- assert(!IsUsed && "Cannot set a variable that has already been used.");
assert(Value && "Invalid variable value!");
assert((SymbolContents == SymContentsUnset ||
SymbolContents == SymContentsVariable) &&
diff --git a/llvm/test/MC/ARM/thumb_set-diagnostics.s b/llvm/test/MC/ARM/thumb_set-diagnostics.s
index 4c8c927f5dbac..6872ce2b29c39 100644
--- a/llvm/test/MC/ARM/thumb_set-diagnostics.s
+++ b/llvm/test/MC/ARM/thumb_set-diagnostics.s
@@ -1,11 +1,12 @@
-@ RUN: not llvm-mc -triple armv7-eabi -o /dev/null %s 2>&1 | FileCheck %s
-@ RUN: not llvm-mc -filetype=obj -triple=armv7-eabi -o /dev/null %s --defsym LOOP=1 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
+@ RUN: rm -rf %t && split-file %s %t --leading-lines && cd %t
+@ RUN: not llvm-mc -triple armv7-eabi -o /dev/null a.s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -filetype=obj -triple=armv7-eabi -o /dev/null redef.s 2>&1 | FileCheck %s --check-prefix=ERR
+@ RUN: not llvm-mc -filetype=obj -triple=armv7-eabi -o /dev/null cycle.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
+//--- a.s
.syntax unified
.thumb
-
-.ifndef LOOP
.thumb_set
@ CHECK: error: expected identifier after '.thumb_set'
@@ -42,29 +43,20 @@
@ CHECK: .thumb_set trailer_trash, 0x11fe1e55,
@ CHECK: ^
+//--- redef.s
.type alpha,%function
alpha:
nop
-
.type beta,%function
beta:
- bkpt
-
- .thumb_set beta, alpha
-
-@ CHECK: error: redefinition of 'beta'
-@ CHECK: .thumb_set beta, alpha
-@ CHECK: ^
+.thumb_set beta, alpha
+@ ERR: [[#@LINE-1]]:18: error: redefinition of 'beta'
variable_result = alpha + 1
.long variable_result
.thumb_set variable_result, 1
-@ CHECK: error: invalid reassignment of non-absolute variable 'variable_result'
-@ CHECK: .thumb_set variable_result, 1
-@ CHECK: ^
-
-.else
+//--- cycle.s
.type recursive_use,%function
.thumb_set recursive_use, recursive_use + 1
@ ERR2: [[#@LINE-1]]:41: error: cyclic dependency detected for symbol 'recursive_use'
@@ -74,4 +66,3 @@ beta:
.thumb_set recursive_use2, recursive_use2 + 1
@ ERR2: [[#@LINE-1]]:43: error: cyclic dependency detected for symbol 'recursive_use2'
@ ERR2: [[#@LINE-2]]:43: error: expression could not be evaluated
-.endif
diff --git a/llvm/test/MC/AsmParser/redef.s b/llvm/test/MC/AsmParser/redef.s
index 56a13b10169ea..166cedf3725c7 100644
--- a/llvm/test/MC/AsmParser/redef.s
+++ b/llvm/test/MC/AsmParser/redef.s
@@ -1,4 +1,17 @@
-# RUN: not llvm-mc -filetype=obj -triple=x86_64 %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=error:
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
+# RUN: llvm-objdump -ts %t | FileCheck %s
+
+# CHECK: SYMBOL TABLE:
+# CHECK-NEXT: 0000000000000000 l .text 0000000000000000 l
+# CHECK-NEXT: 0000000000000008 l *ABS* 0000000000000000 x
+# CHECK-NEXT: 0000000000000002 l *ABS* 0000000000000000 b
+# CHECK-NEXT: 0000000000000003 l *ABS* 0000000000000000 c
+# CHECK-NEXT: ffffffffffffffff l *ABS* 0000000000000000 a
+# CHECK-NEXT: 0000000000000000 g .text 0000000000000000 l_v
+# CHECK: Contents of section .data:
+# CHECK-NEXT: 0000 00000000 04000000 08000000 .
+# CHECK-NEXT: Contents of section .data1:
+# CHECK-NEXT: 0000 010203ff 0203 ......
l:
@@ -8,11 +21,23 @@ l:
x = .-.data
.long x
.set x,.-.data
-# CHECK: [[#@LINE-1]]:8: error: invalid reassignment of non-absolute variable 'x'
-## TODO This should be allowed
.long x
.globl l_v
.set l_v, l
.globl l_v
.set l_v, l
+
+.section .data1,"aw"
+.equiv b, 2*a
+.set a, 1
+.equiv c, 3*a
+
+.if b > a
+.byte a, b, c
+.endif
+
+.set a, -a
+.if b > a
+.byte a, b, c
+.endif
More information about the llvm-commits
mailing list