[llvm] [BOLT][AArch64] Enabling Inlining for Memcpy for AArch64 in BOLT (PR #154929)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 10:12:26 PDT 2025


================
@@ -2597,6 +2597,120 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   getInstructionSize(const MCInst &Inst) const override {
     return 4;
   }
+
+  InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
+    return createInlineMemcpy(ReturnEnd, std::nullopt);
+  }
+
+  std::optional<uint64_t>
+  extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
+    // Match MOVZXi with the target register and no shift.
+    if (Inst.getOpcode() == AArch64::MOVZXi &&
+        Inst.getOperand(0).getReg() == TargetReg &&
+        Inst.getOperand(2).getImm() == 0)
+      return Inst.getOperand(1).getImm();
+    return std::nullopt;
+  }
+
+  std::optional<uint64_t>
+  findMemcpySizeInBytes(const BinaryBasicBlock &BB,
+                        BinaryBasicBlock::iterator CallInst) const override {
+    BitVector WrittenRegs(RegInfo->getNumRegs());
+    MCPhysReg SizeReg = getIntArgRegister(2);
+    std::optional<uint64_t> ExtractedSize;
+
+    for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) {
+      const MCInst &Inst = *InstIt;
+      WrittenRegs.reset();
+      getWrittenRegs(Inst, WrittenRegs);
+
+      if (SizeReg != getNoRegister() && WrittenRegs[SizeReg] &&
+          (ExtractedSize = extractMoveImmediate(Inst, SizeReg)))
+        return *ExtractedSize;
+    }
+    return std::nullopt;
+  }
+
+  InstructionListType
+  createInlineMemcpy(bool ReturnEnd,
+                     std::optional<uint64_t> KnownSize) const override {
+    InstructionListType Code;
+    uint64_t Size = *KnownSize;
+
+    generateSizeSpecificMemcpy(Code, Size);
+
+    // If _memcpy8, adjust X0 to return dest+size instead of dest.
+    if (ReturnEnd)
+      Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
+                            .addReg(AArch64::X0)
+                            .addReg(AArch64::X0)
+                            .addImm(Size)
+                            .addImm(0));
+    return Code;
+  }
+
+  InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
+                                                 uint64_t Size) const {
+    auto AddLoadStorePair = [&](unsigned LoadOpc, unsigned StoreOpc,
+                                unsigned Reg, unsigned Offset = 0) {
+      Code.emplace_back(MCInstBuilder(LoadOpc)
+                            .addReg(Reg)
+                            .addReg(AArch64::X1)
+                            .addImm(Offset));
+      Code.emplace_back(MCInstBuilder(StoreOpc)
+                            .addReg(Reg)
+                            .addReg(AArch64::X0)
+                            .addImm(Offset));
+    };
+
+    // Generate optimal instruction sequences based on exact size.
+    switch (Size) {
+    case 1:
+      AddLoadStorePair(AArch64::LDRBBui, AArch64::STRBBui, AArch64::W3);
----------------
yafet-a wrote:

The tests have been updated to check for the exact temporary registers being used.

https://github.com/llvm/llvm-project/pull/154929


More information about the llvm-commits mailing list