[llvm] [BOLT][AArch64] Run LDR relaxation (PR #165787)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 15:06:28 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: YongKang Zhu (yozhu)
<details>
<summary>Changes</summary>
Replace the current `ADRRelaxationPass` with `AArch64RelaxationPass`,
which, besides the existing ADR relaxation, will also run LDR relaxation
that for now only handles these two forms of LDR instructions:
`ldr Xt, [label]` and `ldr Wt, [label]`.
---
Full diff: https://github.com/llvm/llvm-project/pull/165787.diff
8 Files Affected:
- (modified) bolt/include/bolt/Core/MCPlusBuilder.h (+23)
- (renamed) bolt/include/bolt/Passes/AArch64RelaxationPass.h (+10-10)
- (modified) bolt/include/bolt/Passes/FixRelaxationPass.h (+1-1)
- (renamed) bolt/lib/Passes/AArch64RelaxationPass.cpp (+27-21)
- (modified) bolt/lib/Passes/CMakeLists.txt (+1-1)
- (modified) bolt/lib/Rewrite/BinaryPassManager.cpp (+6-6)
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+42)
- (added) bolt/test/AArch64/ldr-relaxation.s (+122)
``````````diff
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index d666c10885ad5..5e349cd69fb43 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -840,6 +840,16 @@ class MCPlusBuilder {
return false;
}
+ virtual bool isLDRWl(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
+ virtual bool isLDRXl(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
virtual bool isMOVW(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
@@ -1789,6 +1799,19 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ /// Take \p LDRInst and return ADRP+LDR instruction sequence - for
+ ///
+ /// ldr x0, [label]
+ ///
+ /// the following sequence will be generated:
+ ///
+ /// adrp x0, PageBase(label)
+ /// ldr x0, [x0, PageOffset(label)]
+ virtual InstructionListType createAdrpLdr(const MCInst &LDRInst,
+ MCContext *Ctx) const {
+ llvm_unreachable("not implemented");
+ }
+
/// Return not 0 if the instruction CurInst, in combination with the recent
/// history of disassembled instructions supplied by [Begin, End), is a linker
/// generated veneer/stub that needs patching. This happens in AArch64 when
diff --git a/bolt/include/bolt/Passes/ADRRelaxationPass.h b/bolt/include/bolt/Passes/AArch64RelaxationPass.h
similarity index 55%
rename from bolt/include/bolt/Passes/ADRRelaxationPass.h
rename to bolt/include/bolt/Passes/AArch64RelaxationPass.h
index b9f92dec7f03b..c61874f48141c 100644
--- a/bolt/include/bolt/Passes/ADRRelaxationPass.h
+++ b/bolt/include/bolt/Passes/AArch64RelaxationPass.h
@@ -6,29 +6,29 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares the ADRRelaxationPass class, which replaces AArch64
-// non-local ADR instructions with ADRP + ADD due to small offset range of ADR
-// instruction (+- 1MB) which could be easily overflowed after BOLT
-// optimizations. Such problems are usually connected with errata 843419
-// https://developer.arm.com/documentation/epm048406/2100/
+// This file declares the AArch64RelaxationPass class, which replaces AArch64
+// non-local ADR/LDR instructions with ADRP + ADD/LDR due to small offset
+// range of ADR and LDR instruction (+- 1MB) which could be easily overflowed
+// after BOLT optimizations. Such problems are usually connected with errata
+// 843419: https://developer.arm.com/documentation/epm048406/2100/
// The linker could replace ADRP instruction with ADR in some cases.
//
//===----------------------------------------------------------------------===//
-#ifndef BOLT_PASSES_ADRRELAXATIONPASS_H
-#define BOLT_PASSES_ADRRELAXATIONPASS_H
+#ifndef BOLT_PASSES_ADRLDRRELAXATIONPASS_H
+#define BOLT_PASSES_ADRLDRRELAXATIONPASS_H
#include "bolt/Passes/BinaryPasses.h"
namespace llvm {
namespace bolt {
-class ADRRelaxationPass : public BinaryFunctionPass {
+class AArch64RelaxationPass : public BinaryFunctionPass {
public:
- explicit ADRRelaxationPass(const cl::opt<bool> &PrintPass)
+ explicit AArch64RelaxationPass(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}
- const char *getName() const override { return "adr-relaxation"; }
+ const char *getName() const override { return "aarch64-relaxation"; }
/// Pass entry point
Error runOnFunctions(BinaryContext &BC) override;
diff --git a/bolt/include/bolt/Passes/FixRelaxationPass.h b/bolt/include/bolt/Passes/FixRelaxationPass.h
index 50b64480aa62e..cf5a8a1fcb134 100644
--- a/bolt/include/bolt/Passes/FixRelaxationPass.h
+++ b/bolt/include/bolt/Passes/FixRelaxationPass.h
@@ -1,4 +1,4 @@
-//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===//
+//===- bolt/Passes/FixRelaxationPass.h --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/AArch64RelaxationPass.cpp
similarity index 66%
rename from bolt/lib/Passes/ADRRelaxationPass.cpp
rename to bolt/lib/Passes/AArch64RelaxationPass.cpp
index c3954c94a7f92..246cb402bb7a6 100644
--- a/bolt/lib/Passes/ADRRelaxationPass.cpp
+++ b/bolt/lib/Passes/AArch64RelaxationPass.cpp
@@ -1,4 +1,5 @@
-//===- bolt/Passes/ADRRelaxationPass.cpp ----------------------------------===//
+//===- bolt/Passes/AArch64RelaxationPass.cpp
+//----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +7,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the ADRRelaxationPass class.
+// This file implements the AArch64RelaxationPass class.
//
//===----------------------------------------------------------------------===//
-#include "bolt/Passes/ADRRelaxationPass.h"
+#include "bolt/Passes/AArch64RelaxationPass.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Utils/CommandLineOpts.h"
#include <iterator>
@@ -20,10 +21,10 @@ using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltCategory;
-static cl::opt<bool>
- AdrPassOpt("adr-relaxation",
- cl::desc("Replace ARM non-local ADR instructions with ADRP"),
- cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
+static cl::opt<bool> AArch64PassOpt(
+ "adr-ldr-relaxation",
+ cl::desc("Replace ARM non-local ADR/LDR instructions with ADRP"),
+ cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
} // namespace opts
namespace llvm {
@@ -35,7 +36,7 @@ namespace bolt {
// jobs and checking the exit flag after it.
static bool PassFailed = false;
-void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
+void AArch64RelaxationPass::runOnFunction(BinaryFunction &BF) {
if (PassFailed)
return;
@@ -43,10 +44,13 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
for (BinaryBasicBlock &BB : BF) {
for (auto It = BB.begin(); It != BB.end(); ++It) {
MCInst &Inst = *It;
- if (!BC.MIB->isADR(Inst))
+ bool IsADR = BC.MIB->isADR(Inst);
+
+ // TODO: Handlel other types of LDR (literal, PC-relative) instructions.
+ if (!IsADR && !BC.MIB->isLDRXl(Inst) && !BC.MIB->isLDRWl(Inst))
continue;
- const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst);
+ const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, IsADR ? 0 : 1);
if (!Symbol)
continue;
@@ -56,25 +60,27 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
continue;
}
- // Don't relax ADR if it points to the same function and is in the main
- // fragment and BF initial size is < 1MB.
+ // Don't relax ADR/LDR if it points to the same function and is in the
+ // main fragment and BF initial size is < 1MB.
const unsigned OneMB = 0x100000;
if (BF.getSize() < OneMB) {
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
if (TargetBF == &BF && !BB.isSplit())
continue;
- // No relaxation needed if ADR references a basic block in the same
+ // No relaxation needed if ADR/LDR references a basic block in the same
// fragment.
if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))
if (BB.getFragmentNum() == TargetBB->getFragmentNum())
continue;
}
- InstructionListType AdrpAdd;
+ InstructionListType AdrpMaterialization;
{
auto L = BC.scopeLock();
- AdrpAdd = BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get());
+ AdrpMaterialization =
+ IsADR ? BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get())
+ : BC.MIB->createAdrpLdr(Inst, BC.Ctx.get());
}
if (It != BB.begin() && BC.MIB->isNoop(*std::prev(It))) {
@@ -88,18 +94,18 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
// invalidate this offset, so we have to rely on linker-inserted NOP to
// replace it with ADRP, and abort if it is not present.
auto L = BC.scopeLock();
- BC.errs() << "BOLT-ERROR: cannot relax ADR in non-simple function "
- << BF << '\n';
+ BC.errs() << "BOLT-ERROR: cannot relax " << (IsADR ? "ADR" : "LDR")
+ << " in non-simple function " << BF << '\n';
PassFailed = true;
return;
}
- It = BB.replaceInstruction(It, AdrpAdd);
+ It = BB.replaceInstruction(It, AdrpMaterialization);
}
}
}
-Error ADRRelaxationPass::runOnFunctions(BinaryContext &BC) {
- if (!opts::AdrPassOpt || !BC.HasRelocations)
+Error AArch64RelaxationPass::runOnFunctions(BinaryContext &BC) {
+ if (!opts::AArch64PassOpt || !BC.HasRelocations)
return Error::success();
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
@@ -108,7 +114,7 @@ Error ADRRelaxationPass::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
- "ADRRelaxationPass");
+ "AArch64RelaxationPass");
if (PassFailed)
return createFatalBOLTError("");
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index d7519518f186f..3197e62faad21 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -1,5 +1,5 @@
add_llvm_library(LLVMBOLTPasses
- ADRRelaxationPass.cpp
+ AArch64RelaxationPass.cpp
Aligner.cpp
AllocCombiner.cpp
AsmDump.cpp
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 782137e807662..1a0f6d75d63e8 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "bolt/Rewrite/BinaryPassManager.h"
-#include "bolt/Passes/ADRRelaxationPass.h"
+#include "bolt/Passes/AArch64RelaxationPass.h"
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
@@ -129,10 +129,10 @@ static cl::opt<bool> PrintJTFootprintReduction(
cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
cl::cat(BoltOptCategory));
-static cl::opt<bool>
- PrintAdrRelaxation("print-adr-relaxation",
- cl::desc("print functions after ADR Relaxation pass"),
- cl::Hidden, cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintAArch64Relaxation(
+ "print-adr-ldr-relaxation",
+ cl::desc("print functions after ADR/LDR Relaxation pass"), cl::Hidden,
+ cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintLongJmp("print-longjmp",
@@ -517,7 +517,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
if (BC.isAArch64()) {
Manager.registerPass(
- std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation));
+ std::make_unique<AArch64RelaxationPass>(PrintAArch64Relaxation));
// Tighten branches according to offset differences between branch and
// targets. No extra instructions after this pass, otherwise we may have
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7769162d67eaf..8a496c566b06b 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -142,6 +142,7 @@ static InstructionListType createIncMemory(MCPhysReg RegTo, MCPhysReg RegTmp) {
atomicAdd(Insts.back(), RegTo, RegTmp);
return Insts;
}
+
class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
@@ -583,6 +584,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Inst.getOpcode() == AArch64::ADDXri;
}
+ bool isLDRWl(const MCInst &Inst) const override {
+ return Inst.getOpcode() == AArch64::LDRWl;
+ }
+
+ bool isLDRXl(const MCInst &Inst) const override {
+ return Inst.getOpcode() == AArch64::LDRXl;
+ }
+
MCPhysReg getADRReg(const MCInst &Inst) const {
assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
assert(MCPlus::getNumPrimeOperands(Inst) != 0 &&
@@ -602,6 +611,39 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return materializeAddress(Target, Ctx, Reg, Addend);
}
+ InstructionListType createAdrpLdr(const MCInst &LDRInst,
+ MCContext *Ctx) const override {
+ assert((isLDRXl(LDRInst) || isLDRWl(LDRInst)) &&
+ "LDR (literal, 32 or 64-bit integer load) instruction expected");
+ assert(LDRInst.getOperand(0).isReg() &&
+ "unexpected operand in LDR instruction");
+ const MCPhysReg DataReg = LDRInst.getOperand(0).getReg();
+ const MCPhysReg AddrReg =
+ isLDRXl(LDRInst) ? DataReg
+ : (MCPhysReg)RegInfo->getMatchingSuperReg(
+ DataReg, AArch64::sub_32,
+ &RegInfo->getRegClass(AArch64::GPR64RegClassID));
+ const MCSymbol *Target = getTargetSymbol(LDRInst, 1);
+ assert(Target && "missing target symbol in LDR instruction");
+
+ InstructionListType Insts(2);
+ Insts[0].setOpcode(AArch64::ADRP);
+ Insts[0].clear();
+ Insts[0].addOperand(MCOperand::createReg(AddrReg));
+ Insts[0].addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(Insts[0], /* OpNum */ 1, Target, 0, Ctx,
+ ELF::R_AARCH64_NONE);
+ Insts[1].setOpcode(isLDRXl(LDRInst) ? AArch64::LDRXui : AArch64::LDRWui);
+ Insts[1].clear();
+ Insts[1].addOperand(MCOperand::createReg(DataReg));
+ Insts[1].addOperand(MCOperand::createReg(AddrReg));
+ Insts[1].addOperand(MCOperand::createImm(0));
+ Insts[1].addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, 0, Ctx,
+ ELF::R_AARCH64_ADD_ABS_LO12_NC);
+ return Insts;
+ }
+
bool isTB(const MCInst &Inst) const {
return (Inst.getOpcode() == AArch64::TBNZW ||
Inst.getOpcode() == AArch64::TBNZX ||
diff --git a/bolt/test/AArch64/ldr-relaxation.s b/bolt/test/AArch64/ldr-relaxation.s
new file mode 100644
index 0000000000000..24464df6f4924
--- /dev/null
+++ b/bolt/test/AArch64/ldr-relaxation.s
@@ -0,0 +1,122 @@
+## Check that LDR relaxation will fail since LDR is inside a non-simple
+## function and there is no NOP next ot it.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym FAIL=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: not llvm-bolt %t.so -o %t.bolt 2>&1 | FileCheck %s --check-prefix=FAIL
+
+# FAIL: BOLT-ERROR: cannot relax LDR in non-simple function _start
+
+.ifdef FAIL
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ br x2
+ ldr x0, _foo
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check that LDR relaxation is not needed since the reference is not far away.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym NOT_NEEDED=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NOT_NEEDED
+
+# NOT_NEEDED: <_start>
+# NOT_NEEDED-NEXT: ldr
+
+.ifdef NOT_NEEDED
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ldr x0, _start
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check that LDR relaxation is done in a simple function, where NOP will
+## be inserted as needed.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym RELAX_SIMPLE=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELAX
+
+# RELAX: adrp
+# RELAX-NEXT: ldr
+
+.ifdef RELAX_SIMPLE
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ldr x0, _foo
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check that LDR relaxation is done in a non-simple function, where NOP
+## exists next to LDR.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym RELAX_NON_SIMPLE=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELAX
+
+.ifdef RELAX_NON_SIMPLE
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ br x2
+ ldr x0, _foo
+ nop
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check LDR relaxation works on loading W (low 32-bit of X) registers.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym RELAX_SIMPLE_WREG=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELAXW
+
+# RELAXW: adrp x0
+# RELAXW-NEXT: ldr w0
+
+.ifdef RELAX_SIMPLE_WREG
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ldr w0, _foo
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+ .section .text_cold
+ .global _foo
+ .align 3
+_foo:
+ .long 0x12345678
+.size _foo, .-_foo
``````````
</details>
https://github.com/llvm/llvm-project/pull/165787
More information about the llvm-commits
mailing list