[llvm] [BOLT][AArch64] Enabling Inlining for Memcpy for AArch64 in BOLT (PR #154929)
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 07:53:51 PDT 2025
================
@@ -2597,6 +2597,210 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
getInstructionSize(const MCInst &Inst) const override {
return 4;
}
+
+ InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
+ // Fallback
+ return createInlineMemcpy(ReturnEnd, std::nullopt);
+ }
+
+ std::optional<uint64_t>
+ extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
+ if (Inst.getOpcode() == AArch64::MOVZXi && Inst.getNumOperands() >= 3) {
+ if (Inst.getOperand(0).isReg() &&
+ Inst.getOperand(0).getReg() == TargetReg &&
+ Inst.getOperand(1).isImm() && Inst.getOperand(2).isImm() &&
+ Inst.getOperand(2).getImm() == 0) {
+ return Inst.getOperand(1).getImm();
+ }
+ }
+ return std::nullopt;
+ }
+
+ InstructionListType
+ createInlineMemcpy(bool ReturnEnd,
+ std::optional<uint64_t> KnownSize) const override {
+ InstructionListType Code;
+ if (ReturnEnd) {
+ if (KnownSize.has_value() && (*KnownSize >> 12) == 0) {
+ // Use immediate if size is known and fits in 12-bit immediate (0-4095)
+ Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(*KnownSize)
+ .addImm(0));
+ } else {
+ // Fall back to register add for unknown or large sizes
+ Code.emplace_back(MCInstBuilder(AArch64::ADDXrr)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X2));
+ }
+ }
+
+ if (!KnownSize.has_value()) {
+ return Code;
+ }
+
+ uint64_t Size = *KnownSize;
+ return generateSizeSpecificMemcpy(Code, Size);
+ }
+
+ InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
+ uint64_t Size) const {
+ // Generate optimal instruction sequences based on exact size
+ switch (Size) {
+ case 1:
+ // Single byte copy
+ Code.emplace_back(MCInstBuilder(AArch64::LDRBBui)
----------------
sjoerdmeijer wrote:
All this code is duplicated here. The only thing that changes are the LD and STR opcodes. Maybe create a switch over `Size`, and just record the load/store opcodes, then have just 2 calls to create the instructions. I see a bit down that some registers might be different as well, but hopefully you'll get the gist of this and can reduce the code a bit.
https://github.com/llvm/llvm-project/pull/154929
More information about the llvm-commits
mailing list