[llvm] 33246f7 - [MC] Rework evaluateSymbolicAdd to eliminate MCValue::SymB reliance

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 5 11:16:17 PDT 2025


Author: Fangrui Song
Date: 2025-04-05T11:16:12-07:00
New Revision: 33246f79e87a0e629ae776d1811a1175a3f10065

URL: https://github.com/llvm/llvm-project/commit/33246f79e87a0e629ae776d1811a1175a3f10065
DIFF: https://github.com/llvm/llvm-project/commit/33246f79e87a0e629ae776d1811a1175a3f10065.diff

LOG: [MC] Rework evaluateSymbolicAdd to eliminate MCValue::SymB reliance

Reworked evaluateSymbolicAdd and isSymbolRefDifferenceFullyResolved to
remove their reliance on MCValue::SymB, which previously used the
MCSymbolRefExpr member when folding two symbolic expressions. This
dependency prevented replacing MCValue::SymB with a MCSymbol. By
refactoring, we enable this replacement, which is a more significant
improvement.

Note that this change eliminates the rare "unsupported subtraction of
qualified symbol" diagnostic, resulting in a minor loss of information.
However, the benefit of enabling MCValue::SymB replacement with MCSymbol
outweighs this slight regression.

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCExpr.h
    llvm/include/llvm/MC/MCObjectWriter.h
    llvm/include/llvm/MC/MCValue.h
    llvm/lib/MC/MCAssembler.cpp
    llvm/lib/MC/MCExpr.cpp
    llvm/lib/MC/MCObjectWriter.cpp
    llvm/test/MC/AArch64/elf-reloc-ptrauth.s
    llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
    llvm/test/MC/ELF/bad-expr.s
    llvm/test/MC/X86/macho-reloc-errors-x86_64.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 782f7ea8957d9..3127a93d32581 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -259,6 +259,9 @@ class MCSymbolRefExpr : public MCExpr {
   VariantKind getKind() const {
     return (VariantKind)(getSubclassData() & VariantKindMask);
   }
+  uint16_t getSpecifier() const {
+    return (getSubclassData() & VariantKindMask);
+  }
 
   bool hasSubsectionsViaSymbols() const {
     return (getSubclassData() & HasSubsectionsViaSymbolsBit) != 0;

diff  --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h
index 81ba6ffd5d44e..da919c941e507 100644
--- a/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/llvm/include/llvm/MC/MCObjectWriter.h
@@ -83,8 +83,7 @@ class MCObjectWriter {
   /// Clients are not required to answer precisely and may conservatively return
   /// false, even when a 
diff erence is fully resolved.
   bool isSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
-                                          const MCSymbolRefExpr *A,
-                                          const MCSymbolRefExpr *B,
+                                          const MCSymbol &A, const MCSymbol &B,
                                           bool InSet) const;
 
   virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,

diff  --git a/llvm/include/llvm/MC/MCValue.h b/llvm/include/llvm/MC/MCValue.h
index 4b789412325e4..d291c4cb5aff0 100644
--- a/llvm/include/llvm/MC/MCValue.h
+++ b/llvm/include/llvm/MC/MCValue.h
@@ -36,15 +36,17 @@ class raw_ostream;
 class MCValue {
   const MCSymbolRefExpr *SymA = nullptr, *SymB = nullptr;
   int64_t Cst = 0;
-  uint32_t RefKind = 0;
+  uint32_t Specifier = 0;
 
 public:
+  friend class MCExpr;
   MCValue() = default;
   int64_t getConstant() const { return Cst; }
   const MCSymbolRefExpr *getSymA() const { return SymA; }
   const MCSymbolRefExpr *getSymB() const { return SymB; }
-  uint32_t getRefKind() const { return RefKind; }
-  void setSpecifier(uint32_t S) { RefKind = S; }
+  uint32_t getRefKind() const { return Specifier; }
+  uint32_t getSpecifier() const { return Specifier; }
+  void setSpecifier(uint32_t S) { Specifier = S; }
 
   const MCSymbol *getAddSym() const {
     return SymA ? &SymA->getSymbol() : nullptr;
@@ -71,7 +73,7 @@ class MCValue {
     R.Cst = Val;
     R.SymA = SymA;
     R.SymB = SymB;
-    R.RefKind = RefKind;
+    R.Specifier = RefKind;
     return R;
   }
 
@@ -80,7 +82,7 @@ class MCValue {
     R.Cst = Val;
     R.SymA = nullptr;
     R.SymB = nullptr;
-    R.RefKind = 0;
+    R.Specifier = 0;
     return R;
   }
 

diff  --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 2c9cd0e5b626e..962b8e006dc0b 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -159,13 +159,6 @@ bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF,
     Ctx.reportError(Fixup.getLoc(), "expected relocatable expression");
     return true;
   }
-  if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
-    if (RefB->getKind() != MCSymbolRefExpr::VK_None) {
-      Ctx.reportError(Fixup.getLoc(),
-                      "unsupported subtraction of qualified symbol");
-      return true;
-    }
-  }
 
   unsigned FixupFlags = getBackend().getFixupKindInfo(Fixup.getKind()).Flags;
   if (FixupFlags & MCFixupKindInfo::FKF_IsTarget)

diff  --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 773df74291064..c5500ef9cf34d 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -295,19 +295,18 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
 }
 
 /// Helper method for \see EvaluateSymbolAdd().
-static void AttemptToFoldSymbolOffsetDifference(
-    const MCAssembler *Asm, const SectionAddrMap *Addrs, bool InSet,
-    const MCSymbolRefExpr *&A, const MCSymbolRefExpr *&B, int64_t &Addend) {
+static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+                                                const SectionAddrMap *Addrs,
+                                                bool InSet, const MCSymbol *&A,
+                                                const MCSymbol *&B,
+                                                int64_t &Addend) {
   if (!A || !B)
     return;
 
-  const MCSymbol &SA = A->getSymbol();
-  const MCSymbol &SB = B->getSymbol();
-
+  const MCSymbol &SA = *A, &SB = *B;
   if (SA.isUndefined() || SB.isUndefined())
     return;
-
-  if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+  if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, SA, SB, InSet))
     return;
 
   auto FinalizeFolding = [&]() {
@@ -345,8 +344,7 @@ static void AttemptToFoldSymbolOffsetDifference(
     }
 
     // Eagerly evaluate when layout is finalized.
-    Addend += Asm->getSymbolOffset(A->getSymbol()) -
-              Asm->getSymbolOffset(B->getSymbol());
+    Addend += Asm->getSymbolOffset(SA) - Asm->getSymbolOffset(SB);
     if (Addrs && (&SecA != &SecB))
       Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
 
@@ -420,65 +418,52 @@ static void AttemptToFoldSymbolOffsetDifference(
   }
 }
 
-/// Evaluate the result of an add between (conceptually) two MCValues.
-///
-/// This routine conceptually attempts to construct an MCValue:
-///   Result = (Result_A - Result_B + Result_Cst)
-/// from two MCValue's LHS and RHS where
-///   Result = LHS + RHS
-/// and
-///   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
-///
-/// This routine attempts to aggressively fold the operands such that the result
-/// is representable in an MCValue, but may not always succeed.
-///
-/// \returns True on success, false if the result is not representable in an
-/// MCValue.
-
-/// NOTE: It is really important to have both the Asm and Layout arguments.
-/// They might look redundant, but this function can be used before layout
-/// is done (see the object streamer for example) and having the Asm argument
-/// lets us avoid relaxations early.
+// Evaluate the sum of two relocatable expressions.
+//
+//   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+//
+// This routine attempts to aggressively fold the operands such that the result
+// is representable in an MCValue, but may not always succeed.
+//
+// LHS_A and RHS_A might have relocation specifiers while LHS_B and RHS_B
+// cannot have specifiers.
+//
+// \returns True on success, false if the result is not representable in an
+// MCValue.
+
+// NOTE: This function can be used before layout is done (see the object
+// streamer for example) and having the Asm argument lets us avoid relaxations
+// early.
 static bool evaluateSymbolicAdd(const MCAssembler *Asm,
                                 const SectionAddrMap *Addrs, bool InSet,
-                                const MCValue &LHS, const MCValue &RHS,
+                                const MCValue &LHS,
+                                const MCSymbolRefExpr *RhsAdd,
+                                const MCSymbolRefExpr *RhsSub, int64_t RHS_Cst,
                                 MCValue &Res) {
-  // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
-  // about dealing with modifiers. This will ultimately bite us, one day.
-  const MCSymbolRefExpr *LHS_A = LHS.getSymA();
-  const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+  const MCSymbol *LHS_A = LHS.getAddSym();
+  const MCSymbol *LHS_B = LHS.getSubSym();
   int64_t LHS_Cst = LHS.getConstant();
 
-  const MCSymbolRefExpr *RHS_A = RHS.getSymA();
-  const MCSymbolRefExpr *RHS_B = RHS.getSymB();
-  int64_t RHS_Cst = RHS.getConstant();
-
-  if (LHS.getRefKind() != RHS.getRefKind())
-    return false;
+  const MCSymbol *RHS_A = RhsAdd ? &RhsAdd->getSymbol() : nullptr;
+  const MCSymbol *RHS_B = RhsSub ? &RhsSub->getSymbol() : nullptr;
 
   // Fold the result constant immediately.
   int64_t Result_Cst = LHS_Cst + RHS_Cst;
 
   // If we have a layout, we can fold resolved 
diff erences.
   if (Asm) {
-    // First, fold out any 
diff erences which are fully resolved. By
-    // reassociating terms in
+    // While LHS_A-LHS_B and RHS_A-RHS_B from recursive calls have already been
+    // folded, reassociating terms in
     //   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
-    // we have the four possible 
diff erences:
-    //   (LHS_A - LHS_B),
-    //   (LHS_A - RHS_B),
-    //   (RHS_A - LHS_B),
-    //   (RHS_A - RHS_B).
-    // Since we are attempting to be as aggressive as possible about folding, we
-    // attempt to evaluate each possible alternative.
-    AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, LHS_A, LHS_B,
-                                        Result_Cst);
-    AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, LHS_A, RHS_B,
-                                        Result_Cst);
-    AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, RHS_A, LHS_B,
-                                        Result_Cst);
-    AttemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, RHS_A, RHS_B,
-                                        Result_Cst);
+    // might bring more opportunities.
+    if (LHS_A && RHS_B && !LHS.getSymA()->getSpecifier()) {
+      attemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, LHS_A, RHS_B,
+                                          Result_Cst);
+    }
+    if (RHS_A && LHS_B && !RhsAdd->getSpecifier()) {
+      attemptToFoldSymbolOffsetDifference(Asm, Addrs, InSet, RHS_A, LHS_B,
+                                          Result_Cst);
+    }
   }
 
   // We can't represent the addition or subtraction of two symbols.
@@ -487,9 +472,10 @@ static bool evaluateSymbolicAdd(const MCAssembler *Asm,
 
   // At this point, we have at most one additive symbol and one subtractive
   // symbol -- find them.
-  const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
-  const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
-
+  auto *A = LHS_A ? LHS.getSymA() : RHS_A ? RhsAdd : nullptr;
+  auto *B = LHS_B ? LHS.getSymB() : RHS_B ? RhsSub : nullptr;
+  if (B && B->getKind() != MCSymbolRefExpr::VK_None)
+    return false;
   Res = MCValue::get(A, B, Result_Cst);
   return true;
 }
@@ -641,31 +627,40 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
 
     // We only support a few operations on non-constant expressions, handle
     // those first.
+    auto Op = ABE->getOpcode();
+    int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
     if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
-      switch (ABE->getOpcode()) {
+      switch (Op) {
       default:
         return false;
-      case MCBinaryExpr::Sub:
-        // Negate RHS and add.
-        // The cast avoids undefined behavior if the constant is INT64_MIN.
-        return evaluateSymbolicAdd(
-            Asm, Addrs, InSet, LHSValue,
-            MCValue::get(RHSValue.getSymB(), RHSValue.getSymA(),
-                         -(uint64_t)RHSValue.getConstant(),
-                         RHSValue.getRefKind()),
-            Res);
-
       case MCBinaryExpr::Add:
-        return evaluateSymbolicAdd(Asm, Addrs, InSet, LHSValue, RHSValue, Res);
+      case MCBinaryExpr::Sub:
+        // TODO: Prevent folding for AArch64 @AUTH operands.
+        if (LHSValue.getSpecifier() || RHSValue.getSpecifier())
+          return false;
+        if (Op == MCBinaryExpr::Sub) {
+          std::swap(RHSValue.SymA, RHSValue.SymB);
+          RHSValue.Cst = -(uint64_t)RHSValue.Cst;
+        }
+        if (RHSValue.isAbsolute()) {
+          LHSValue.Cst += RHSValue.Cst;
+          Res = LHSValue;
+          return true;
+        }
+        if (LHSValue.isAbsolute()) {
+          RHSValue.Cst += LHSValue.Cst;
+          Res = RHSValue;
+          return true;
+        }
+        return evaluateSymbolicAdd(Asm, Addrs, InSet, LHSValue, RHSValue.SymA,
+                                   RHSValue.SymB, RHSValue.Cst, Res);
       }
     }
 
     // FIXME: We need target hooks for the evaluation. It may be limited in
     // width, and gas defines the result of comparisons 
diff erently from
     // Apple as.
-    int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
     int64_t Result = 0;
-    auto Op = ABE->getOpcode();
     switch (Op) {
     case MCBinaryExpr::AShr: Result = LHS >> RHS; break;
     case MCBinaryExpr::Add:  Result = LHS + RHS; break;

diff  --git a/llvm/lib/MC/MCObjectWriter.cpp b/llvm/lib/MC/MCObjectWriter.cpp
index 7183acc3865ef..6d5010dcae0a0 100644
--- a/llvm/lib/MC/MCObjectWriter.cpp
+++ b/llvm/lib/MC/MCObjectWriter.cpp
@@ -27,16 +27,10 @@ void MCObjectWriter::reset() {
   CGProfile.clear();
 }
 
-bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
-    const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B,
-    bool InSet) const {
-  // Modified symbol references cannot be resolved.
-  if (A->getKind() != MCSymbolRefExpr::VK_None ||
-      B->getKind() != MCSymbolRefExpr::VK_None)
-    return false;
-
-  const MCSymbol &SA = A->getSymbol();
-  const MCSymbol &SB = B->getSymbol();
+bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+                                                        const MCSymbol &SA,
+                                                        const MCSymbol &SB,
+                                                        bool InSet) const {
   assert(!SA.isUndefined() && !SB.isUndefined());
   return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(),
                                                 InSet, /*IsPCRel=*/false);

diff  --git a/llvm/test/MC/AArch64/elf-reloc-ptrauth.s b/llvm/test/MC/AArch64/elf-reloc-ptrauth.s
index bed85bcc5798b..9fe78a4e4e822 100644
--- a/llvm/test/MC/AArch64/elf-reloc-ptrauth.s
+++ b/llvm/test/MC/AArch64/elf-reloc-ptrauth.s
@@ -41,8 +41,6 @@
 // RELOC-NEXT: 70 00000000 10000000
 //                         ^^^^ discriminator
 //                               ^^ 0 no addr diversity 0 reserved 00 ia key 0000 reserved
-// RELOC-NEXT: 80 04000000 00000000
-// Folded to constant 4 bytes 
diff erence between _g9 and _g8
 
 .section    .helper
 .local "_g 6"
@@ -91,10 +89,6 @@ _g9:
 .quad ("_g 7" + 7)@AUTH(ia,16)
 .quad 0
 
-// ASM:          .xword _g9 at AUTH(ia,42)-_g8 at AUTH(ia,42)
-.quad _g9 at AUTH(ia,42) - _g8 at AUTH(ia,42)
-.quad 0
-
 .ifdef ASMONLY
 
 // ASM:          .xword _g10 at AUTH(ia,42)+1
@@ -190,4 +184,8 @@ _g9:
 // ERROBJ: :[[#@LINE+1]]:7: error: expected relocatable expression
 .quad _g9 at AUTH(ia,42) - _g8
 
+// ERROBJ: :[[#@LINE+1]]:7: error: expected relocatable expression
+.quad _g9 at AUTH(ia,42) - _g8 at AUTH(ia,42)
+.quad 0
+
 .endif // ERROBJ

diff  --git a/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s b/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
index e32db7c125bb4..3e51e487e1288 100644
--- a/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
+++ b/llvm/test/MC/AArch64/label-arithmetic-diags-darwin.s
@@ -14,13 +14,9 @@ Lend:
   // CHECK-NEXT: ^
 
   add w0, w1, #(Lend - var at TLVPPAGEOFF)
+  // CHECK: [[#@LINE-1]]:3: error: expected relocatable expression
   cmp w0, #(Lend - var at TLVPPAGEOFF)
-  // CHECK: error: unsupported subtraction of qualified symbol
-  // CHECK-NEXT: add w0, w1, #(Lend - var at TLVPPAGEOFF)
-  // CHECK-NEXT: ^
-  // CHECK: error: unsupported subtraction of qualified symbol
-  // CHECK-NEXT: cmp w0, #(Lend - var at TLVPPAGEOFF)
-  // CHECK-NEXT: ^
+  // CHECK: [[#@LINE-1]]:3: error: expected relocatable expression
 
   add w0, w1, #(Lstart - Lend)
   cmp w0, #(Lstart - Lend)

diff  --git a/llvm/test/MC/ELF/bad-expr.s b/llvm/test/MC/ELF/bad-expr.s
index 59809700d4a11..2346f5493b98a 100644
--- a/llvm/test/MC/ELF/bad-expr.s
+++ b/llvm/test/MC/ELF/bad-expr.s
@@ -7,6 +7,6 @@ x:
 // CHECK: [[@LINE+1]]:{{[0-9]+}}: error: symbol '__executable_start' can not be undefined in a subtraction expression
         .quad   x-__executable_start
 
-// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: unsupported subtraction of qualified symbol
-        .long bar - foo at got
+// CHECK: [[@LINE+1]]:7: error: expected relocatable expression
+.long bar - foo at got
 foo:

diff  --git a/llvm/test/MC/X86/macho-reloc-errors-x86_64.s b/llvm/test/MC/X86/macho-reloc-errors-x86_64.s
index 4498d03338f39..daf84a7dbdc4f 100644
--- a/llvm/test/MC/X86/macho-reloc-errors-x86_64.s
+++ b/llvm/test/MC/X86/macho-reloc-errors-x86_64.s
@@ -10,7 +10,7 @@
         mov %rax, thing at TLVP
 
 // CHECK-ERROR: 3:9: error: 32-bit absolute addressing is not supported in 64-bit mode
-// CHECK-ERROR: 4:9: error: unsupported subtraction of qualified symbol
+// CHECK-ERROR: 4:9: error: expected relocatable expression
 // CHECK-ERROR: 5:9: error: unsupported pc-relative relocation of 
diff erence
 // CHECK-ERROR: 6:9: error: unsupported relocation with identical base
 // CHECK-ERROR: 7:9: error: unsupported relocation with subtraction expression, symbol 'thing' can not be undefined in a subtraction expression


        


More information about the llvm-commits mailing list