[llvm] 33246f7 - [MC] Rework evaluateSymbolicAdd to eliminate MCValue::SymB reliance
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 5 11:16:17 PDT 2025
Author: Fangrui Song
Date: 2025-04-05T11:16:12-07:00
New Revision: 33246f79e87a0e629ae776d1811a1175a3f10065
URL: https://github.com/llvm/llvm-project/commit/33246f79e87a0e629ae776d1811a1175a3f10065
DIFF: https://github.com/llvm/llvm-project/commit/33246f79e87a0e629ae776d1811a1175a3f10065.diff
LOG: [MC] Rework evaluateSymbolicAdd to eliminate MCValue::SymB reliance
Reworked evaluateSymbolicAdd and isSymbolRefDifferenceFullyResolved to
remove their reliance on MCValue::SymB, which previously used the
MCSymbolRefExpr member when folding two symbolic expressions. This
dependency prevented replacing MCValue::SymB with a MCSymbol. By
refactoring, we enable this replacement, which is a more significant
improvement.
Note that this change eliminates the rare "unsupported subtraction of
qualified symbol" diagnostic, resulting in a minor loss of information.
However, the benefit of enabling MCValue::SymB replacement with MCSymbol
outweighs this slight regression.
Added:
Modified:
llvm/include/llvm/MC/MCExpr.h
llvm/include/llvm/MC/MCObjectWriter.h
llvm/include/llvm/MC/MCValue.h
llvm/lib/MC/MCAssembler.cpp
llvm/lib/MC/MCExpr.cpp
llvm/lib/MC/MCObjectWriter.cpp
llvm/test/MC/AArch64/elf-reloc-ptrauth.s
llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
llvm/test/MC/ELF/bad-expr.s
llvm/test/MC/X86/macho-reloc-errors-x86_64.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 782f7ea8957d9..3127a93d32581 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -259,6 +259,9 @@ class MCSymbolRefExpr : public MCExpr {
VariantKind getKind() const {
return (VariantKind)(getSubclassData() & VariantKindMask);
}
+ uint16_t getSpecifier() const {
+ return (getSubclassData() & VariantKindMask);
+ }
bool hasSubsectionsViaSymbols() const {
return (getSubclassData() & HasSubsectionsViaSymbolsBit) != 0;
diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h
index 81ba6ffd5d44e..da919c941e507 100644
--- a/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/llvm/include/llvm/MC/MCObjectWriter.h
@@ -83,8 +83,7 @@ class MCObjectWriter {
/// Clients are not required to answer precisely and may conservatively return
/// false, even when a
diff erence is fully resolved.
bool isSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
- const MCSymbolRefExpr *A,
- const MCSymbolRefExpr *B,
+ const MCSymbol &A, const MCSymbol &B,
bool InSet) const;
virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
diff --git a/llvm/include/llvm/MC/MCValue.h b/llvm/include/llvm/MC/MCValue.h
index 4b789412325e4..d291c4cb5aff0 100644
--- a/llvm/include/llvm/MC/MCValue.h
+++ b/llvm/include/llvm/MC/MCValue.h
@@ -36,15 +36,17 @@ class raw_ostream;
class MCValue {
const MCSymbolRefExpr *SymA = nullptr, *SymB = nullptr;
int64_t Cst = 0;
- uint32_t RefKind = 0;
+ uint32_t Specifier = 0;
public:
+ friend class MCExpr;
MCValue() = default;
int64_t getConstant() const { return Cst; }
const MCSymbolRefExpr *getSymA() const { return SymA; }
const MCSymbolRefExpr *getSymB() const { return SymB; }
- uint32_t getRefKind() const { return RefKind; }
- void setSpecifier(uint32_t S) { RefKind = S; }
+ uint32_t getRefKind() const { return Specifier; }
+ uint32_t getSpecifier() const { return Specifier; }
+ void setSpecifier(uint32_t S) { Specifier = S; }
const MCSymbol *getAddSym() const {
return SymA ? &SymA->getSymbol() : nullptr;
@@ -71,7 +73,7 @@ class MCValue {
R.Cst = Val;
R.SymA = SymA;
R.SymB = SymB;
- R.RefKind = RefKind;
+ R.Specifier = RefKind;
return R;
}
@@ -80,7 +82,7 @@ class MCValue {
R.Cst = Val;
R.SymA = nullptr;
R.SymB = nullptr;
- R.RefKind = 0;
+ R.Specifier = 0;
return R;
}
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 2c9cd0e5b626e..962b8e006dc0b 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -159,13 +159,6 @@ bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF,
Ctx.reportError(Fixup.getLoc(), "expected relocatable expression");
return true;
}
- if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
- if (RefB->getKind() != MCSymbolRefExpr::VK_None) {
- Ctx.reportError(Fixup.getLoc(),
- "unsupported subtraction of qualified symbol");
- return true;
- }
- }
unsigned FixupFlags = getBackend().getFixupKindInfo(Fixup.getKind()).Flags;
if (FixupFlags & MCFixupKindInfo::FKF_IsTarget)
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 773df74291064..c5500ef9cf34d 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -295,19 +295,18 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
}
/// Helper method for \see EvaluateSymbolAdd().
-static void AttemptToFoldSymbolOffsetDifference(
- const MCAssembler *Asm, const SectionAddrMap *Addrs, bool InSet,
- const MCSymbolRefExpr *&A, const MCSymbolRefExpr *&B, int64_t &Addend) {
+static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+ const SectionAddrMap *Addrs,
+ bool InSet, const MCSymbol *&A,
+ const MCSymbol *&B,
+ int64_t &Addend) {
if (!A || !B)
return;
- const MCSymbol &SA = A->getSymbol();
- const MCSymbol &SB = B->getSymbol();
-
+ const MCSymbol &SA = *A, &SB = *B;
if (SA.isUndefined() || SB.isUndefined())
return;
-
- if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+ if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, SA, SB, InSet))
return;
auto FinalizeFolding = [&]() {
@@ -345,8 +344,7 @@ static void AttemptToFoldSymbolOffsetDifference(
}
// Eagerly evaluate when layout is finalized.
- Addend += Asm->getSymbolOffset(A->getSymbol()) -
- Asm->getSymbolOffset(B->getSymbol());
+ Addend += Asm->getSymbolOffset(SA) - Asm->getSymbolOffset(SB);
if (Addrs && (&SecA != &SecB))
Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
@@ -420,65 +418,52 @@ static void AttemptToFoldSymbolOffsetDifference(
}
}
-/// Evaluate the result of an add between (conceptually) two MCValues.
-///
-/// This routine conceptually attempts to construct an MCValue:
-/// Result = (Result_A - Result_B + Result_Cst)
-/// from two MCValue's LHS and RHS where
-/// Result = LHS + RHS
-/// and
-/// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
-///
-/// This routine attempts to aggressively fold the operands such that the result
-/// is representable in an MCValue, but may not always succeed.
-///
-/// \returns True on success, false if the result is not representable in an
-/// MCValue.
-
-/// NOTE: It is really important to have both the Asm and Layout arguments.
-/// They might look redundant, but this function can be used before layout
-/// is done (see the object streamer for example) and having the Asm argument
-/// lets us avoid relaxations early.
+// Evaluate the sum of two relocatable expressions.
+//
+// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+//
+// This routine attempts to aggressively fold the operands such that the result
+// is representable in an MCValue, but may not always succeed.
+//
+// LHS_A and RHS_A might have relocation specifiers while LHS_B and RHS_B
+// cannot have specifiers.
+//
+// \returns True on success, false if the result is not representable in an
+// MCValue.
+
+// NOTE: This function can be used before layout is done (see the object
+// streamer for example) and having the Asm argument lets us avoid relaxations
+// early.
static bool evaluateSymbolicAdd(const MCAssembler *Asm,
const SectionAddrMap *Addrs, bool InSet,
- const MCValue &LHS, const MCValue &RHS,
+ const MCValue &LHS,
+ const MCSymbolRefExpr *RhsAdd,
+ const MCSymbolRefExpr *RhsSub, int64_t RHS_Cst,
MCValue &Res) {
- // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
- // about dealing with modifiers. This will ultimately bite us, one day.
- const MCSymbolRefExpr *LHS_A = LHS.getSymA();
- const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+ const MCSymbol *LHS_A = LHS.getAddSym();
+ const MCSymbol *LHS_B = LHS.getSubSym();
int64_t LHS_Cst = LHS.getConstant();
- const MCSymbolRefExpr *RHS_A = RHS.getSymA();
- const MCSymbolRefExpr *RHS_B = RHS.getSymB();
- int64_t RHS_Cst = RHS.getConstant();
-
- if (LHS.getRefKind() != RHS.getRefKind())
- return false;
+ const MCSymbol *RHS_A = RhsAdd ? &RhsAdd->getSymbol() : nullptr;
+ const MCSymbol *RHS_B = RhsSub ? &RhsSub->getSymbol() : nullptr;
// Fold the result constant immediately.
int64_t Result_Cst = LHS_Cst + RHS_Cst;
// If we have a layout, we can fold resolved
diff erences.
if (Asm) {
- // First, fold out any
diff erences which are fully resolved. By
- // reassociating terms in
+ // While LHS_A-LHS_B and RHS_A-RHS_B from recursive calls have already been
+ // folded, reassociating terms in
// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
- // we have the four possible
diff erences:
- // (LHS_A - LHS_B),
- // (LHS_A - RHS_B),
- // (RHS_A - LHS_B),
- // (RHS_A - RHS_B).
- // Since we are attempting to be as aggressive as possible about folding, we
- // attempt to evaluate each possible alternative.
- AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, LHS_A, LHS_B,
- Result_Cst);
- AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, LHS_A, RHS_B,
- Result_Cst);
- AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, RHS_A, LHS_B,
- Result_Cst);
- AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, RHS_A, RHS_B,
- Result_Cst);
+ // might bring more opportunities.
+ if (LHS_A && RHS_B && !LHS.getSymA()->getSpecifier()) {
+ attemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, LHS_A, RHS_B,
+ Result_Cst);
+ }
+ if (RHS_A && LHS_B && !RhsAdd->getSpecifier()) {
+ attemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, RHS_A, LHS_B,
+ Result_Cst);
+ }
}
// We can't represent the addition or subtraction of two symbols.
@@ -487,9 +472,10 @@ static bool evaluateSymbolicAdd(const MCAssembler *Asm,
// At this point, we have at most one additive symbol and one subtractive
// symbol -- find them.
- const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
- const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
-
+ auto *A = LHS_A ? LHS.getSymA() : RHS_A ? RhsAdd : nullptr;
+ auto *B = LHS_B ? LHS.getSymB() : RHS_B ? RhsSub : nullptr;
+ if (B && B->getKind() != MCSymbolRefExpr::VK_None)
+ return false;
Res = MCValue::get(A, B, Result_Cst);
return true;
}
@@ -641,31 +627,40 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
// We only support a few operations on non-constant expressions, handle
// those first.
+ auto Op = ABE->getOpcode();
+ int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
- switch (ABE->getOpcode()) {
+ switch (Op) {
default:
return false;
- case MCBinaryExpr::Sub:
- // Negate RHS and add.
- // The cast avoids undefined behavior if the constant is INT64_MIN.
- return evaluateSymbolicAdd(
- Asm, Addrs, InSet, LHSValue,
- MCValue::get(RHSValue.getSymB(), RHSValue.getSymA(),
- -(uint64_t)RHSValue.getConstant(),
- RHSValue.getRefKind()),
- Res);
-
case MCBinaryExpr::Add:
- return evaluateSymbolicAdd(Asm, Addrs, InSet, LHSValue, RHSValue, Res);
+ case MCBinaryExpr::Sub:
+ // TODO: Prevent folding for AArch64 @AUTH operands.
+ if (LHSValue.getSpecifier() || RHSValue.getSpecifier())
+ return false;
+ if (Op == MCBinaryExpr::Sub) {
+ std::swap(RHSValue.SymA, RHSValue.SymB);
+ RHSValue.Cst = -(uint64_t)RHSValue.Cst;
+ }
+ if (RHSValue.isAbsolute()) {
+ LHSValue.Cst += RHSValue.Cst;
+ Res = LHSValue;
+ return true;
+ }
+ if (LHSValue.isAbsolute()) {
+ RHSValue.Cst += LHSValue.Cst;
+ Res = RHSValue;
+ return true;
+ }
+ return evaluateSymbolicAdd(Asm, Addrs, InSet, LHSValue, RHSValue.SymA,
+ RHSValue.SymB, RHSValue.Cst, Res);
}
}
// FIXME: We need target hooks for the evaluation. It may be limited in
// width, and gas defines the result of comparisons
diff erently from
// Apple as.
- int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
int64_t Result = 0;
- auto Op = ABE->getOpcode();
switch (Op) {
case MCBinaryExpr::AShr: Result = LHS >> RHS; break;
case MCBinaryExpr::Add: Result = LHS + RHS; break;
diff --git a/llvm/lib/MC/MCObjectWriter.cpp b/llvm/lib/MC/MCObjectWriter.cpp
index 7183acc3865ef..6d5010dcae0a0 100644
--- a/llvm/lib/MC/MCObjectWriter.cpp
+++ b/llvm/lib/MC/MCObjectWriter.cpp
@@ -27,16 +27,10 @@ void MCObjectWriter::reset() {
CGProfile.clear();
}
-bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
- const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B,
- bool InSet) const {
- // Modified symbol references cannot be resolved.
- if (A->getKind() != MCSymbolRefExpr::VK_None ||
- B->getKind() != MCSymbolRefExpr::VK_None)
- return false;
-
- const MCSymbol &SA = A->getSymbol();
- const MCSymbol &SB = B->getSymbol();
+bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+ const MCSymbol &SA,
+ const MCSymbol &SB,
+ bool InSet) const {
assert(!SA.isUndefined() && !SB.isUndefined());
return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(),
InSet, /*IsPCRel=*/false);
diff --git a/llvm/test/MC/AArch64/elf-reloc-ptrauth.s b/llvm/test/MC/AArch64/elf-reloc-ptrauth.s
index bed85bcc5798b..9fe78a4e4e822 100644
--- a/llvm/test/MC/AArch64/elf-reloc-ptrauth.s
+++ b/llvm/test/MC/AArch64/elf-reloc-ptrauth.s
@@ -41,8 +41,6 @@
// RELOC-NEXT: 70 00000000 10000000
// ^^^^ discriminator
// ^^ 0 no addr diversity 0 reserved 00 ia key 0000 reserved
-// RELOC-NEXT: 80 04000000 00000000
-// Folded to constant 4 bytes
diff erence between _g9 and _g8
.section .helper
.local "_g 6"
@@ -91,10 +89,6 @@ _g9:
.quad ("_g 7" + 7)@AUTH(ia,16)
.quad 0
-// ASM: .xword _g9 at AUTH(ia,42)-_g8 at AUTH(ia,42)
-.quad _g9 at AUTH(ia,42) - _g8 at AUTH(ia,42)
-.quad 0
-
.ifdef ASMONLY
// ASM: .xword _g10 at AUTH(ia,42)+1
@@ -190,4 +184,8 @@ _g9:
// ERROBJ: :[[#@LINE+1]]:7: error: expected relocatable expression
.quad _g9 at AUTH(ia,42) - _g8
+// ERROBJ: :[[#@LINE+1]]:7: error: expected relocatable expression
+.quad _g9 at AUTH(ia,42) - _g8 at AUTH(ia,42)
+.quad 0
+
.endif // ERROBJ
diff --git a/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s b/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
index e32db7c125bb4..3e51e487e1288 100644
--- a/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
+++ b/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
@@ -14,13 +14,9 @@ Lend:
// CHECK-NEXT: ^
add w0, w1, #(Lend - var at TLVPPAGEOFF)
+ // CHECK: [[#@LINE-1]]:3: error: expected relocatable expression
cmp w0, #(Lend - var at TLVPPAGEOFF)
- // CHECK: error: unsupported subtraction of qualified symbol
- // CHECK-NEXT: add w0, w1, #(Lend - var at TLVPPAGEOFF)
- // CHECK-NEXT: ^
- // CHECK: error: unsupported subtraction of qualified symbol
- // CHECK-NEXT: cmp w0, #(Lend - var at TLVPPAGEOFF)
- // CHECK-NEXT: ^
+ // CHECK: [[#@LINE-1]]:3: error: expected relocatable expression
add w0, w1, #(Lstart - Lend)
cmp w0, #(Lstart - Lend)
diff --git a/llvm/test/MC/ELF/bad-expr.s b/llvm/test/MC/ELF/bad-expr.s
index 59809700d4a11..2346f5493b98a 100644
--- a/llvm/test/MC/ELF/bad-expr.s
+++ b/llvm/test/MC/ELF/bad-expr.s
@@ -7,6 +7,6 @@ x:
// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: symbol '__executable_start' can not be undefined in a subtraction expression
.quad x-__executable_start
-// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: unsupported subtraction of qualified symbol
- .long bar - foo at got
+// CHECK: [[@LINE+1]]:7: error: expected relocatable expression
+.long bar - foo at got
foo:
diff --git a/llvm/test/MC/X86/macho-reloc-errors-x86_64.s b/llvm/test/MC/X86/macho-reloc-errors-x86_64.s
index 4498d03338f39..daf84a7dbdc4f 100644
--- a/llvm/test/MC/X86/macho-reloc-errors-x86_64.s
+++ b/llvm/test/MC/X86/macho-reloc-errors-x86_64.s
@@ -10,7 +10,7 @@
mov %rax, thing at TLVP
// CHECK-ERROR: 3:9: error: 32-bit absolute addressing is not supported in 64-bit mode
-// CHECK-ERROR: 4:9: error: unsupported subtraction of qualified symbol
+// CHECK-ERROR: 4:9: error: expected relocatable expression
// CHECK-ERROR: 5:9: error: unsupported pc-relative relocation of
diff erence
// CHECK-ERROR: 6:9: error: unsupported relocation with identical base
// CHECK-ERROR: 7:9: error: unsupported relocation with subtraction expression, symbol 'thing' can not be undefined in a subtraction expression
More information about the llvm-commits
mailing list