[llvm] [bolt][aarch64] test to reproduce the issue with ldr reg, literal (PR #165723)

Alexey Moksyakov via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 24 23:56:34 PST 2025


https://github.com/yavtuk updated https://github.com/llvm/llvm-project/pull/165723

>From 85cc465dd52cd3f139aa274c69deeb86c6efa564 Mon Sep 17 00:00:00 2001
From: Alexey Moksyakov <yavtuk at yandex.ru>
Date: Thu, 30 Oct 2025 17:23:18 +0300
Subject: [PATCH 1/2] [bolt][aarch64] test to reproduce the issue with ldr reg,
 literal

ldr reg, literal instruction is limited  +/- 1MB range,
emitCI put the constants by the end of function and the one is out of available range.
---
 bolt/test/AArch64/materialize-constant.s | 74 ++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 bolt/test/AArch64/materialize-constant.s

diff --git a/bolt/test/AArch64/materialize-constant.s b/bolt/test/AArch64/materialize-constant.s
new file mode 100644
index 0000000000000..1c15626b09594
--- /dev/null
+++ b/bolt/test/AArch64/materialize-constant.s
@@ -0,0 +1,74 @@
+// this test checks a load literal instructions changed to movk
+
+// REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: %clang %cflags -pie %t.o -o %t.exe -Wl,-q -Wl,-z,relro -Wl,-z,now
+# RUN: llvm-bolt %t.exe -o %t.bolt -data %t.fdata \
+# RUN:    --keep-nops --eliminate-unreachable=false
+# RUN: llvm-objdump --disassemble-symbols=foo %t.bolt | FileCheck %s
+
+# CHECK: mov{{.*}} w19, #0
+# CHECK-NEXT: mov{{.*}} w22, #0
+# CHECK-NEXT: movk{{.*}} w23, #0, lsl #16
+# CHECK-NEXT: movk{{.*}} w23, #100
+# CHECK-NEXT: movk{{.*}} w24, #0, lsl #16
+# CHECK-NEXT: movk{{.*}} w24, #3
+
+  .text
+  .align 4
+  .local foo
+  .type foo, %function
+foo:
+# FDATA: 1 main 0 1 foo 0 0 10
+    stp x29, x30, [sp, #-32]!
+    stp x19, x20, [sp, #16]
+    mov x29, sp
+
+    mov w19, #0 // counter = 0
+    mov w22, #0 // result = 0
+
+    ldr w23, .Llimit
+    ldr w24, .LStep
+    b .LStub
+
+.LConstants:
+  .Llimit: .word 100
+  .LStep:  .word 3
+
+.LStub:
+.rep 0x100000
+    nop
+.endr
+    b .Lmain_loop
+
+.Lmain_loop:
+    madd w22, w19, w24, w22  // result += counter * increment
+
+    add w19, w19, #1
+    cmp w19, w23
+    b.lt .Lmain_loop
+
+    mov w0, w22
+
+    b .Lreturn_point
+
+.Lreturn_point:
+    ldp x19, x20, [sp, #16]
+    ldp x29, x30, [sp], #32
+    ret
+.size foo, .-foo
+
+
+  .global main
+  .type main, %function
+main:
+  mov x0, #0
+  bl foo
+  mov     x0, 0
+  mov     w8, #93
+  svc     #0
+
+  .size main, .-main

>From 54f11caaf399ab05579fec23a40c3cdf4e44c82a Mon Sep 17 00:00:00 2001
From: Moksyakov Alexey <moksyakov.alexey at huawei.com>
Date: Fri, 21 Nov 2025 08:16:39 +0000
Subject: [PATCH 2/2] [bolt] simplify constant loads for X86 & AArch64

This patch fixed the issue related to load literal
for AArch64 (bolt/test/AArch64/materialize-constant.s),
address range for literal is limited  +/- 1MB,
emitCI puts the constants by the end of function and
the one is out of available range.

SimplifyRODataLoads is enabled by default for X86 & AArch64

Signed-off-by: Moksyakov Alexey <moksyakov.alexey at huawei.com>
---
 bolt/include/bolt/Core/MCPlusBuilder.h        |  7 ++
 bolt/lib/Passes/BinaryPasses.cpp              | 50 +++++++---
 bolt/lib/Rewrite/BinaryPassManager.cpp        |  2 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 50 ++++++++++
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp      | 18 ++++
 bolt/test/AArch64/materialize-constant.s      | 93 +++++++++++++++----
 6 files changed, 187 insertions(+), 33 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 69ae4fb8ddcc9..c14eb8b354424 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1872,6 +1872,13 @@ class MCPlusBuilder {
     return {};
   }
 
+  virtual InstructionListType materializeConstant(const MCInst &Inst,
+                                                  StringRef ConstantData,
+                                                  uint64_t Offset) const {
+    llvm_unreachable("not implemented");
+    return {};
+  }
+
   /// Creates a new unconditional branch instruction in Inst and set its operand
   /// to TBB.
   virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 1d187de11c35e..3f635c7c5c176 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1187,7 +1187,8 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
   uint64_t NumDynamicLocalLoadsFound = 0;
 
   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
-    for (MCInst &Inst : *BB) {
+    for (auto It = BB->begin(); It != BB->end(); ++It) {
+      const MCInst &Inst = *It;
       unsigned Opcode = Inst.getOpcode();
       const MCInstrDesc &Desc = BC.MII->get(Opcode);
 
@@ -1200,7 +1201,7 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
 
       if (MIB->hasPCRelOperand(Inst)) {
         // Try to find the symbol that corresponds to the PC-relative operand.
-        MCOperand *DispOpI = MIB->getMemOperandDisp(Inst);
+        MCOperand *DispOpI = MIB->getMemOperandDisp(const_cast<MCInst &>(Inst));
         assert(DispOpI != Inst.end() && "expected PC-relative displacement");
         assert(DispOpI->isExpr() &&
                "found PC-relative with non-symbolic displacement");
@@ -1226,28 +1227,53 @@ bool SimplifyRODataLoads::simplifyRODataLoads(BinaryFunction &BF) {
       }
 
       // Get the contents of the section containing the target address of the
-      // memory operand. We are only interested in read-only sections.
+      // memory operand. We are only interested in read-only sections for X86,
+      // for aarch64 the sections can be read-only or executable.
       ErrorOr<BinarySection &> DataSection =
           BC.getSectionForAddress(TargetAddress);
-      if (!DataSection || DataSection->isWritable())
+      if (!DataSection)
         continue;
 
+      if (BC.isX86() && DataSection->isWritable())
+        continue;
+
+      if (DataSection->isText()) {
+        // if data is not part of a function, check if it is part of a global CI
+        // do not proceed if there aren't data markers for CIs
+        BinaryFunction *BFTgt =
+            BC.getBinaryFunctionContainingAddress(TargetAddress,
+                                                  /*CheckPastEnd*/ false,
+                                                  /*UseMaxSize*/ true);
+        const bool IsInsideFunc =
+            BFTgt && BFTgt->isInConstantIsland(TargetAddress);
+
+        auto CIEndIter = BC.AddressToConstantIslandMap.end();
+        auto CIIter = BC.AddressToConstantIslandMap.find(TargetAddress);
+        if (!IsInsideFunc && CIIter == CIEndIter)
+          continue;
+      }
+
       if (BC.getRelocationAt(TargetAddress) ||
           BC.getDynamicRelocationAt(TargetAddress))
         continue;
 
-      uint32_t Offset = TargetAddress - DataSection->getAddress();
-      StringRef ConstantData = DataSection->getContents();
-
       ++NumLocalLoadsFound;
       if (BB->hasProfile())
         NumDynamicLocalLoadsFound += BB->getExecutionCount();
 
-      if (MIB->replaceMemOperandWithImm(Inst, ConstantData, Offset)) {
-        ++NumLocalLoadsSimplified;
-        if (BB->hasProfile())
-          NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
-      }
+      uint32_t Offset = TargetAddress - DataSection->getAddress();
+      StringRef ConstantData = DataSection->getContents();
+      const InstructionListType Instrs =
+          MIB->materializeConstant(Inst, ConstantData, Offset);
+      if (Instrs.empty())
+        continue;
+
+      auto IIter = BB->findInstruction(&Inst);
+      It = BB->replaceInstruction(IIter, Instrs);
+
+      ++NumLocalLoadsSimplified;
+      if (BB->hasProfile())
+        NumDynamicLocalLoadsSimplified += BB->getExecutionCount();
     }
   }
 
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 1a0f6d75d63e8..60f80ceb2d376 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -236,7 +236,7 @@ static cl::opt<bool> SimplifyRODataLoads(
     "simplify-rodata-loads",
     cl::desc("simplify loads from read-only sections by replacing the memory "
              "operand with the constant found in the corresponding section"),
-    cl::cat(BoltOptCategory));
+    cl::init(true), cl::cat(BoltOptCategory));
 
 static cl::list<std::string>
 SpecializeMemcpy1("memcpy1-spec",
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index db3989d6b0b5f..cd9b39eb93d0d 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2770,6 +2770,56 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return Insts;
   }
 
+  InstructionListType materializeConstant(const MCInst &Inst,
+                                          StringRef ConstantData,
+                                          uint64_t Offset) const override {
+    struct InstInfo {
+      // Size in bytes that Inst loads from memory.
+      uint8_t DataSize;
+      // number instructions needed to materialize the constant.
+      uint8_t numInstrs;
+      // Opcode to use for materializing the constant.
+      unsigned Opcode;
+    };
+
+    InstInfo I;
+    InstructionListType Insts(0);
+    switch (Inst.getOpcode()) {
+    case AArch64::LDRWl:
+      I = {4, 2, AArch64::MOVKWi};
+      break;
+    case AArch64::LDRXl:
+      I = {8, 4, AArch64::MOVKXi};
+      break;
+    default:
+      llvm_unreachable("unexpected ldr instruction");
+      break;
+    }
+
+    const uint64_t ConstantSize = ConstantData.size() - Offset > I.DataSize
+                                      ? I.DataSize
+                                      : ConstantData.size() - Offset;
+    if (ConstantSize != I.DataSize)
+      return Insts;
+
+    const uint64_t ImmVal =
+        DataExtractor(ConstantData, true, 8).getUnsigned(&Offset, I.DataSize);
+
+    Insts.resize(I.numInstrs);
+    unsigned shift = (Insts.size() - 1) * 16;
+    MCPhysReg Reg = Inst.getOperand(0).getReg();
+    for (unsigned i = 0; i < Insts.size(); i++, shift -= 16) {
+      Insts[i].setOpcode(I.Opcode);
+      Insts[i].clear();
+      Insts[i].addOperand(MCOperand::createReg(Reg));
+      Insts[i].addOperand(MCOperand::createReg(Reg));
+      Insts[i].addOperand(MCOperand::createImm((ImmVal >> shift) & 0xFFFF));
+      Insts[i].addOperand(MCOperand::createImm(shift));
+    }
+
+    return Insts;
+  }
+
   std::optional<Relocation>
   createRelocation(const MCFixup &Fixup,
                    const MCAsmBackend &MAB) const override {
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 7c24c2ce136fa..bf1adeabe27cc 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -1477,6 +1477,24 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return true;
   }
 
+  InstructionListType materializeConstant(const MCInst &Inst,
+                                          StringRef ConstantData,
+                                          uint64_t Offset) const override {
+    InstructionListType Instrs;
+    MCInst InstCopy = Inst;
+
+    if (!replaceMemOperandWithImm(InstCopy, ConstantData, Offset))
+      return InstructionListType{};
+
+    Instrs.emplace_back();
+    Instrs.back().setOpcode(InstCopy.getOpcode());
+    Instrs.back().clear();
+    for (unsigned i = 0; i < InstCopy.getNumOperands(); ++i)
+      Instrs.back().addOperand(InstCopy.getOperand(i));
+
+    return Instrs;
+  }
+
   /// TODO: this implementation currently works for the most common opcodes that
   /// load from memory. It can be extended to work with memory store opcodes as
   /// well as more memory load opcodes.
diff --git a/bolt/test/AArch64/materialize-constant.s b/bolt/test/AArch64/materialize-constant.s
index 1c15626b09594..f49111e556c20 100644
--- a/bolt/test/AArch64/materialize-constant.s
+++ b/bolt/test/AArch64/materialize-constant.s
@@ -1,28 +1,36 @@
 // this test checks a load literal instructions changed to movk
 
-// REQUIRES: system-linux
+# REQUIRES: system-linux
 
-# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: rm -rf %t && split-file %s %t
 
-# RUN: link_fdata %s %t.o %t.fdata
-# RUN: %clang %cflags -pie %t.o -o %t.exe -Wl,-q -Wl,-z,relro -Wl,-z,now
-# RUN: llvm-bolt %t.exe -o %t.bolt -data %t.fdata \
-# RUN:    --keep-nops --eliminate-unreachable=false
-# RUN: llvm-objdump --disassemble-symbols=foo %t.bolt | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN:    %t/materialize-ci-big-func.s -o %t/materialize-ci-big-func.o
+# RUN: %clang %t/materialize-ci-big-func.o -Wl,-q --target=aarch64-unknown-linux-gnu \
+# RUN:    -o %t/materialize-ci-big-func.exe
+# RUN: llvm-bolt %t/materialize-ci-big-func.exe \
+# RUN:    -o %t/materialize-ci-big-func.bolt --lite=0 \
+# RUN:    --keep-nops --eliminate-unreachable=false \
+# RUN:    | FileCheck %s --check-prefix=CHECK-BIG-FUNC
 
-# CHECK: mov{{.*}} w19, #0
-# CHECK-NEXT: mov{{.*}} w22, #0
-# CHECK-NEXT: movk{{.*}} w23, #0, lsl #16
-# CHECK-NEXT: movk{{.*}} w23, #100
-# CHECK-NEXT: movk{{.*}} w24, #0, lsl #16
-# CHECK-NEXT: movk{{.*}} w24, #3
+# CHECK-BIG-FUNC: simplified 2 out of 2 loads from a statically computed address
 
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN:    %t/materialize-ci-outside-func.s -o %t/materialize-ci-outside-func.o
+# RUN:  %clang %t/materialize-ci-outside-func.o -Wl,-q --target=aarch64-unknown-linux-gnu \
+# RUN:    -o %t/materialize-ci-outside-func.exe
+# RUN: llvm-bolt %t/materialize-ci-outside-func.exe \
+# RUN:    -o %t/materialize-ci-outside-func.bolt --lite=0 \
+# RUN:    | FileCheck %s --check-prefix=CHECK-OUTSIDE-FUNC
+
+# CHECK-OUTSIDE-FUNC: simplified 2 out of 2 loads from a statically computed address
+
+#--- materialize-ci-big-func.s
   .text
   .align 4
   .local foo
   .type foo, %function
 foo:
-# FDATA: 1 main 0 1 foo 0 0 10
     stp x29, x30, [sp, #-32]!
     stp x19, x20, [sp, #16]
     mov x29, sp
@@ -31,12 +39,12 @@ foo:
     mov w22, #0 // result = 0
 
     ldr w23, .Llimit
-    ldr w24, .LStep
+    ldr x24, .LStep
     b .LStub
 
 .LConstants:
   .Llimit: .word 100
-  .LStep:  .word 3
+  .LStep:  .xword 3
 
 .LStub:
 .rep 0x100000
@@ -67,8 +75,53 @@ foo:
 main:
   mov x0, #0
   bl foo
-  mov     x0, 0
-  mov     w8, #93
-  svc     #0
+  mov x0, 0
+  mov w8, #93
+  svc #0
+
+.size main, .-main
+
+#--- materialize-ci-outside-func.s
+// check that constants in .text section but outside functions
+// are materialized correctly
+  .text
+  .align 4
+  .local foo
+  .type foo, %function
+foo:
+    stp x29, x30, [sp, #-32]!
+    stp x19, x20, [sp, #16]
+    mov x29, sp
+
+    mov w19, #0 // counter = 0
+    mov w22, #0 // result = 0
+
+    ldr w23, .Llimit
+    ldr x24, .LStep
+
+.Lmain_loop:
+    madd w22, w19, w24, w22
+    add w19, w19, #1
+    cmp w19, w23
+    b.lt .Lmain_loop
+    mov w0, w22
+.Lreturn_point:
+    ldp x19, x20, [sp, #16]
+    ldp x29, x30, [sp], #32
+    ret
+.size foo, .-foo
+
+.LConstants:
+  .Llimit: .word 100
+  .LStep:  .xword 3
+
+  .global main
+  .type main, %function
+main:
+  mov x0, #0
+  bl foo
+  mov x0, 0
+  mov w8, #93
+  svc #0
 
-  .size main, .-main
+.size main, .-main
\ No newline at end of file



More information about the llvm-commits mailing list