[llvm] [MC] Fuse relaxation and layout into a single forward pass (PR #184544)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 22:11:05 PST 2026
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/184544
>From f30582c3fb183ece8cbe99d2ba7cad9d9b2fe346 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Tue, 3 Mar 2026 08:28:24 -0800
Subject: [PATCH] [MC] Fuse relaxation and layout into a single forward pass
Replace the two-pass inner loop in relaxOnce (relaxFragment +
layoutSection) with a single forward pass that sets each fragment's
offset before processing it.
- Extract relaxAlign from layoutSection's FT_Align handling and call
it from relaxFragment. FT_Align padding is computed inline with the
tracked Offset, so alignment fragments always see fresh upstream
offsets. This structurally eliminates the O(N) convergence pitfall
where stale offsets caused each iteration to fix only one more
alignment fragment.
- The new MCAssembler::Stretch field tracks the cumulative upstream size
delta. In evaluateFixup, for PCRel fixups during relaxation, Stretch
is added to forward-reference target values (LayoutOrder comparison).
This makes displacement = target_old - source_old, identical to the
old two-pass approach, preventing premature relaxation for
span-dependent instructions.
- FT_Fill/FT_Org removed from relaxFragment; `if (F.Offset != Offset) in
the fused loop detects their size changes.
- layoutSection is retained for initial layout and post-finishLayout.
This fixes the FT_BoundaryAlign linear time convergence issue reported
by #176535.
Key commits that updated relaxFragment/layoutSection:
- 742ecfc13e8a [MC] Relax MCFillFragment and compute fragment offsets eagerly
- 9f66ebe42715 MC: Eliminate redundant fragment relaxation
- df71243fa885 MC: Evaluate .org during fragment relaxation
- b1d58f025e83 MCAssembler: Simplify fragment relaxation
- 58d16db8b5d2 MCAssembler: Simplify relaxation of FT_Fill and FT_Org
---
llvm/include/llvm/MC/MCAssembler.h | 7 +-
llvm/lib/MC/MCAssembler.cpp | 102 ++++++++++++--------
llvm/test/MC/X86/align-branch-convergence.s | 37 +++++++
3 files changed, 103 insertions(+), 43 deletions(-)
create mode 100644 llvm/test/MC/X86/align-branch-convergence.s
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index dbae271a1c198..e66c0bed67a7b 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -60,6 +60,10 @@ class MCAssembler {
bool HasFinalLayout = false;
bool RelaxAll = false;
+ // Cumulative upstream size change during `relaxOnce`. Used to compensate
+ // forward-reference displacements in `evaluateFixup`.
+ int64_t Stretch = 0;
+
SectionListType Sections;
SmallVector<const MCSymbol *, 0> Symbols;
@@ -108,7 +112,8 @@ class MCAssembler {
unsigned relaxOnce(unsigned FirstStable);
/// Perform relaxation on a single fragment.
- bool relaxFragment(MCFragment &F);
+ void relaxFragment(MCFragment &F);
+ void relaxAlign(MCFragment &F);
void relaxInstruction(MCFragment &F);
void relaxLEB(MCFragment &F);
void relaxBoundaryAlign(MCBoundaryAlignFragment &BF);
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index e649ea7fedabe..671fb14908a71 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -173,6 +173,19 @@ bool MCAssembler::evaluateFixup(const MCFragment &F, MCFixup &Fixup,
if (Fixup.isPCRel()) {
Value -= getFragmentOffset(F) + Fixup.getOffset();
+ // During relaxation, F's offset is already updated but forward reference
+ // targets are stale. Add Stretch so that the displacement equals
+ // target_old - source_old, preventing premature relaxation.
+ if (Stretch) {
+ assert(!RecordReloc &&
+ "Stretch should only be applied during relaxation");
+ MCFragment *AF = Add ? Add->getFragment() : nullptr;
+ if (AF && AF->getLayoutOrder() > F.getLayoutOrder())
+ Value += Stretch;
+ MCFragment *SF = Sub ? Sub->getFragment() : nullptr;
+ if (SF && SF->getLayoutOrder() > F.getLayoutOrder())
+ Value -= Stretch;
+ }
if (Add && !Sub && !Add->isUndefined() && !Add->isAbsolute()) {
IsResolved = getWriter().isSymbolRefDifferenceFullyResolvedImpl(
*Add, F, false, true);
@@ -743,6 +756,24 @@ void MCAssembler::Finish() {
assert(PendingErrors.empty());
}
+void MCAssembler::relaxAlign(MCFragment &F) {
+ uint64_t Offset = F.Offset + F.getFixedSize();
+ unsigned Size = offsetToAlignment(Offset, F.getAlignment());
+ bool AlignFixup = false;
+ if (F.hasAlignEmitNops()) {
+ AlignFixup = getBackend().relaxAlign(F, Size);
+ if (!AlignFixup)
+ while (Size % getBackend().getMinimumNopSize())
+ Size += F.getAlignment().value();
+ }
+ if (!AlignFixup && Size > F.getAlignMaxBytesToEmit())
+ Size = 0;
+ F.VarContentStart = F.getFixedSize();
+ F.VarContentEnd = F.VarContentStart + Size;
+ if (F.VarContentEnd > F.getParent()->ContentStorage.size())
+ F.getParent()->ContentStorage.resize(F.VarContentEnd);
+}
+
bool MCAssembler::fixupNeedsRelaxation(const MCFragment &F,
const MCFixup &Fixup) const {
++stats::FixupEvalForRelax;
@@ -938,11 +969,13 @@ void MCAssembler::relaxSFrameFragment(MCFragment &F) {
F.clearVarFixups();
}
-bool MCAssembler::relaxFragment(MCFragment &F) {
- auto Size = computeFragmentSize(F);
+void MCAssembler::relaxFragment(MCFragment &F) {
switch (F.getKind()) {
default:
- return false;
+ return;
+ case MCFragment::FT_Align:
+ relaxAlign(F);
+ break;
case MCFragment::FT_Relaxable:
assert(!getRelaxAll() && "Did not expect a FT_Relaxable in RelaxAll mode");
relaxInstruction(F);
@@ -970,61 +1003,42 @@ bool MCAssembler::relaxFragment(MCFragment &F) {
getContext().getCVContext().encodeDefRange(
*this, static_cast<MCCVDefRangeFragment &>(F));
break;
- case MCFragment::FT_Fill:
- case MCFragment::FT_Org:
- return F.getNext()->Offset - F.Offset != Size;
}
- return computeFragmentSize(F) != Size;
}
void MCAssembler::layoutSection(MCSection &Sec) {
uint64_t Offset = 0;
for (MCFragment &F : Sec) {
F.Offset = Offset;
- if (F.getKind() == MCFragment::FT_Align) {
- Offset += F.getFixedSize();
- unsigned Size = offsetToAlignment(Offset, F.getAlignment());
- // In the nops mode, RISC-V style linker relaxation might adjust the size
- // and add a fixup, even if `Size` is originally 0.
- bool AlignFixup = false;
- if (F.hasAlignEmitNops()) {
- AlignFixup = getBackend().relaxAlign(F, Size);
- // If the backend does not handle the fragment specially, pad with nops,
- // but ensure that the padding is larger than the minimum nop size.
- if (!AlignFixup)
- while (Size % getBackend().getMinimumNopSize())
- Size += F.getAlignment().value();
- }
- if (!AlignFixup && Size > F.getAlignMaxBytesToEmit())
- Size = 0;
- // Update the variable tail size, offset by FixedSize to prevent ubsan
- // pointer-overflow in evaluateFixup. The content is ignored.
- F.VarContentStart = F.getFixedSize();
- F.VarContentEnd = F.VarContentStart + Size;
- if (F.VarContentEnd > F.getParent()->ContentStorage.size())
- F.getParent()->ContentStorage.resize(F.VarContentEnd);
- Offset += Size;
- } else {
- Offset += computeFragmentSize(F);
- }
+ if (F.getKind() == MCFragment::FT_Align)
+ relaxAlign(F);
+ Offset += computeFragmentSize(F);
}
}
+// Fused relaxation and layout: a single forward pass that updates each
+// fragment's offset before processing it, so upstream size changes are
+// immediately visible.
unsigned MCAssembler::relaxOnce(unsigned FirstStable) {
- ++stats::RelaxationSteps;
+ uint64_t MaxIterations = 0;
PendingErrors.clear();
-
unsigned Res = 0;
for (unsigned I = 0; I != FirstStable; ++I) {
- // Assume each iteration finalizes at least one extra fragment. If the
- // layout does not converge after N+1 iterations, bail out.
auto &Sec = *Sections[I];
- auto MaxIter = Sec.curFragList()->Tail->getLayoutOrder() + 1;
+ uint64_t Iters = 0;
for (;;) {
bool Changed = false;
- for (MCFragment &F : Sec)
- if (F.getKind() != MCFragment::FT_Data && relaxFragment(F))
+ uint64_t Offset = 0;
+ for (MCFragment &F : Sec) {
+ if (F.Offset != Offset)
Changed = true;
+ Stretch = Offset - F.Offset;
+ F.Offset = Offset;
+ if (F.getKind() != MCFragment::FT_Data)
+ relaxFragment(F);
+ Offset += computeFragmentSize(F);
+ }
+ ++Iters;
if (!Changed)
break;
@@ -1032,11 +1046,15 @@ unsigned MCAssembler::relaxOnce(unsigned FirstStable) {
// sections. Therefore, we must re-evaluate all sections.
FirstStable = Sections.size();
Res = I;
- if (--MaxIter == 0)
+ // Assume each iteration finalizes at least one extra fragment. If the
+ // layout does not converge after N+1 iterations, bail out.
+ if (Iters > Sec.curFragList()->Tail->getLayoutOrder())
break;
- layoutSection(Sec);
}
+ MaxIterations = std::max(MaxIterations, Iters);
}
+ stats::RelaxationSteps += MaxIterations;
+ Stretch = 0;
// The subsequent relaxOnce call only needs to visit Sections [0,Res) if no
// change occurred.
return Res;
diff --git a/llvm/test/MC/X86/align-branch-convergence.s b/llvm/test/MC/X86/align-branch-convergence.s
new file mode 100644
index 0000000000000..a5e4e521d5646
--- /dev/null
+++ b/llvm/test/MC/X86/align-branch-convergence.s
@@ -0,0 +1,37 @@
+# REQUIRES: asserts
+## Verify that boundary alignment converges in O(1) inner iterations,
+## not O(N) where N is the number of BoundaryAlign fragments.
+## The fused relaxation+layout pass gives each BoundaryAlign fragment fresh
+## upstream offsets, so all padding is computed correctly in a single pass.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 --stats \
+# RUN: --x86-align-branch-boundary=32 --x86-align-branch=jcc+call %s \
+# RUN: -o %t 2>&1 | FileCheck %s --check-prefix=STATS
+# STATS: 2 assembler - Number of assembler layout and relaxation steps
+
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+# CHECK: 0: testl
+# CHECK: 2: je
+# CHECK: 4: callq
+# CHECK: 1d: nop
+# CHECK-NEXT: 20: callq
+# CHECK: 2f: testl
+# CHECK-NEXT: 31: jne
+# CHECK: 3d: nop
+# CHECK-NEXT: 40: callq
+# CHECK: 4a: retq
+
+ .p2align 5
+func:
+ testl %eax, %eax
+ je .Lend
+ .rept 8
+ callq foo
+ .endr
+ testl %ecx, %ecx
+ jne func
+ .rept 4
+ callq foo
+ .endr
+.Lend:
+ retq
More information about the llvm-commits
mailing list