[llvm] [BOLT][AArch64] Enabling Inlining for Memcpy for AArch64 in BOLT (PR #154929)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 10:12:26 PDT 2025
================
@@ -2597,6 +2597,120 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
getInstructionSize(const MCInst &Inst) const override {
return 4;
}
+
+ InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
+ return createInlineMemcpy(ReturnEnd, std::nullopt);
+ }
+
+ std::optional<uint64_t>
+ extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
+ // Match MOVZXi with the target register and no shift.
+ if (Inst.getOpcode() == AArch64::MOVZXi &&
+ Inst.getOperand(0).getReg() == TargetReg &&
+ Inst.getOperand(2).getImm() == 0)
+ return Inst.getOperand(1).getImm();
+ return std::nullopt;
+ }
+
+ std::optional<uint64_t>
+ findMemcpySizeInBytes(const BinaryBasicBlock &BB,
+ BinaryBasicBlock::iterator CallInst) const override {
+ BitVector WrittenRegs(RegInfo->getNumRegs());
+ MCPhysReg SizeReg = getIntArgRegister(2);
+ std::optional<uint64_t> ExtractedSize;
+
+ for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) {
+ const MCInst &Inst = *InstIt;
+ WrittenRegs.reset();
+ getWrittenRegs(Inst, WrittenRegs);
+
+ if (SizeReg != getNoRegister() && WrittenRegs[SizeReg] &&
+ (ExtractedSize = extractMoveImmediate(Inst, SizeReg)))
+ return *ExtractedSize;
+ }
+ return std::nullopt;
+ }
+
+ InstructionListType
+ createInlineMemcpy(bool ReturnEnd,
+ std::optional<uint64_t> KnownSize) const override {
+ InstructionListType Code;
+ uint64_t Size = *KnownSize;
+
+ generateSizeSpecificMemcpy(Code, Size);
+
+ // If _memcpy8, adjust X0 to return dest+size instead of dest.
+ if (ReturnEnd)
+ Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(Size)
+ .addImm(0));
+ return Code;
+ }
+
+ InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
+ uint64_t Size) const {
+ auto AddLoadStorePair = [&](unsigned LoadOpc, unsigned StoreOpc,
+ unsigned Reg, unsigned Offset = 0) {
+ Code.emplace_back(MCInstBuilder(LoadOpc)
+ .addReg(Reg)
+ .addReg(AArch64::X1)
+ .addImm(Offset));
+ Code.emplace_back(MCInstBuilder(StoreOpc)
+ .addReg(Reg)
+ .addReg(AArch64::X0)
+ .addImm(Offset));
+ };
+
+ // Generate optimal instruction sequences based on exact size.
+ switch (Size) {
+ case 1:
+ AddLoadStorePair(AArch64::LDRBBui, AArch64::STRBBui, AArch64::W3);
----------------
yafet-a wrote:
The tests have been updated to check for the exact temporary registers being used.
https://github.com/llvm/llvm-project/pull/154929
More information about the llvm-commits
mailing list