[llvm] [BOLT][AArch64] Enabling Inlining for Memcpy for AArch64 in BOLT (PR #154929)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 04:20:11 PDT 2025
https://github.com/yafet-a created https://github.com/llvm/llvm-project/pull/154929
## Overview
The pass for inlining memcpy in BOLT was currently X86-specific. It was using the instruction `rep movsb` which currently has no equivalent in ARM v8-A.
This patch implements a static size analysis system for AArch64 memcpy inlining that extracts copy sizes from preceding instructions to then use it to generate the optimal width-specific load/store sequences.
## Testing Coverage (`inline-memcpy.s`)
### Positive Tests:
- [x] Exact size optimizations: 1, 2, 4, 8, 16, 32 bytes → optimal instruction sequences
- [x] Arbitrary size decomposition: 37 bytes → 16+16+4+1 byte sequence with correct offsets
- [x] Inline count verification: CHECK-INLINE: inlined 8 memcpy() calls
- [x] Assembly validation: CHECK-ASM patterns verify exact generated instructions
### Negative Tests:
- [x] Large size safety: 128 bytes → CHECK-ASM-NOT: ldr.*q (no SIMD, skipped inlining)
- [x] No unwanted instructions: CHECK-ASM-NOT patterns ensure clean generation
>From ce56f84aa7c86e1b35cf0ca4218a1f23702a206e Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 21 Aug 2025 10:12:03 -0700
Subject: [PATCH 1/3] pre-commit test
---
bolt/test/AArch64/inline-memcpy.s | 193 ++++++++++++++++++++++++++++++
1 file changed, 193 insertions(+)
create mode 100644 bolt/test/AArch64/inline-memcpy.s
diff --git a/bolt/test/AArch64/inline-memcpy.s b/bolt/test/AArch64/inline-memcpy.s
new file mode 100644
index 0000000000000..3bb498e600fb6
--- /dev/null
+++ b/bolt/test/AArch64/inline-memcpy.s
@@ -0,0 +1,193 @@
+## This test checks that BOLT correctly inlines memcpy calls on AArch64.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang --target=aarch64-unknown-linux-gnu %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --inline-memcpy -o %t.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-INLINE
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-ASM
+
+# Verify BOLT reports that it inlined memcpy calls (all 8 calls processed)
+# CHECK-INLINE: BOLT-INFO: inlined 8 memcpy() calls
+
+# Each function should use optimal size-specific instructions and NO memcpy calls
+
+# 1-byte copy should use single byte load/store (ldrb/strb)
+# CHECK-ASM-LABEL: <test_1_byte_direct>:
+# CHECK-ASM: ldrb{{.*}}w{{[0-9]+}}, [x1]
+# CHECK-ASM: strb{{.*}}w{{[0-9]+}}, [x0]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 2-byte copy should use single 16-bit load/store (ldrh/strh)
+# CHECK-ASM-LABEL: <test_2_byte_direct>:
+# CHECK-ASM: ldrh{{.*}}w{{[0-9]+}}, [x1]
+# CHECK-ASM: strh{{.*}}w{{[0-9]+}}, [x0]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 4-byte copy should use single 32-bit load/store (w register)
+# CHECK-ASM-LABEL: <test_4_byte_direct>:
+# CHECK-ASM: ldr{{.*}}w{{[0-9]+}}, [x1]
+# CHECK-ASM: str{{.*}}w{{[0-9]+}}, [x0]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 8-byte copy should use single 64-bit load/store (x register)
+# CHECK-ASM-LABEL: <test_8_byte_direct>:
+# CHECK-ASM: ldr{{.*}}x{{[0-9]+}}, [x1]
+# CHECK-ASM: str{{.*}}x{{[0-9]+}}, [x0]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 16-byte copy should use single 128-bit SIMD load/store (q register)
+# CHECK-ASM-LABEL: <test_16_byte_direct>:
+# CHECK-ASM: ldr{{.*}}q{{[0-9]+}}, [x1]
+# CHECK-ASM: str{{.*}}q{{[0-9]+}}, [x0]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 32-byte copy should use two 128-bit SIMD operations
+# CHECK-ASM-LABEL: <test_32_byte_direct>:
+# CHECK-ASM: ldr{{.*}}q{{[0-9]+}}, [x1]
+# CHECK-ASM: str{{.*}}q{{[0-9]+}}, [x0]
+# CHECK-ASM: ldr{{.*}}q{{[0-9]+}}, [x1, #0x10]
+# CHECK-ASM: str{{.*}}q{{[0-9]+}}, [x0, #0x10]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 37-byte copy should use greedy decomposition: (2*16) + (1*4) + (1*1)
+# CHECK-ASM-LABEL: <test_37_byte_arbitrary>:
+# CHECK-ASM: ldr{{.*}}q{{[0-9]+}}, [x1]
+# CHECK-ASM: str{{.*}}q{{[0-9]+}}, [x0]
+# CHECK-ASM: ldr{{.*}}q{{[0-9]+}}, [x1, #0x10]
+# CHECK-ASM: str{{.*}}q{{[0-9]+}}, [x0, #0x10]
+# CHECK-ASM: ldr{{.*}}w{{[0-9]+}}, [x1, #0x20]
+# CHECK-ASM: str{{.*}}w{{[0-9]+}}, [x0, #0x20]
+# CHECK-ASM: ldrb{{.*}}w{{[0-9]+}}, [x1, #0x24]
+# CHECK-ASM: strb{{.*}}w{{[0-9]+}}, [x0, #0x24]
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+
+# 128-byte copy should be "inlined" by removing the call entirely (too large for real inlining)
+# CHECK-ASM-LABEL: <test_128_byte_too_large>:
+# CHECK-ASM-NOT: bl{{.*}}<memcpy
+# CHECK-ASM-NOT: ldr{{.*}}q{{[0-9]+}}
+
+ .text
+ .globl test_1_byte_direct
+ .type test_1_byte_direct, at function
+test_1_byte_direct:
+ stp x29, x30, [sp, #-32]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #8
+ mov x2, #1
+ bl memcpy
+ ldp x29, x30, [sp], #32
+ ret
+ .size test_1_byte_direct, .-test_1_byte_direct
+
+ .globl test_2_byte_direct
+ .type test_2_byte_direct, at function
+test_2_byte_direct:
+ stp x29, x30, [sp, #-32]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #8
+ mov x2, #2
+ bl memcpy
+ ldp x29, x30, [sp], #32
+ ret
+ .size test_2_byte_direct, .-test_2_byte_direct
+
+ .globl test_4_byte_direct
+ .type test_4_byte_direct, at function
+test_4_byte_direct:
+ stp x29, x30, [sp, #-32]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #8
+ mov x2, #4
+ bl memcpy
+ ldp x29, x30, [sp], #32
+ ret
+ .size test_4_byte_direct, .-test_4_byte_direct
+
+ .globl test_8_byte_direct
+ .type test_8_byte_direct, at function
+test_8_byte_direct:
+ stp x29, x30, [sp, #-32]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #8
+ mov x2, #8
+ bl memcpy
+ ldp x29, x30, [sp], #32
+ ret
+ .size test_8_byte_direct, .-test_8_byte_direct
+
+ .globl test_16_byte_direct
+ .type test_16_byte_direct, at function
+test_16_byte_direct:
+ stp x29, x30, [sp, #-48]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #32
+ mov x2, #16
+ bl memcpy
+ ldp x29, x30, [sp], #48
+ ret
+ .size test_16_byte_direct, .-test_16_byte_direct
+
+ .globl test_32_byte_direct
+ .type test_32_byte_direct, at function
+test_32_byte_direct:
+ stp x29, x30, [sp, #-80]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #48
+ mov x2, #32
+ bl memcpy
+ ldp x29, x30, [sp], #80
+ ret
+ .size test_32_byte_direct, .-test_32_byte_direct
+
+ .globl test_37_byte_arbitrary
+ .type test_37_byte_arbitrary, at function
+test_37_byte_arbitrary:
+ stp x29, x30, [sp, #-96]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #56
+ mov x2, #37
+ bl memcpy
+ ldp x29, x30, [sp], #96
+ ret
+ .size test_37_byte_arbitrary, .-test_37_byte_arbitrary
+
+ .globl test_128_byte_too_large
+ .type test_128_byte_too_large, at function
+test_128_byte_too_large:
+ stp x29, x30, [sp, #-288]!
+ mov x29, sp
+ add x1, sp, #16
+ add x0, sp, #152
+ mov x2, #128
+ bl memcpy
+ ldp x29, x30, [sp], #288
+ ret
+ .size test_128_byte_too_large, .-test_128_byte_too_large
+
+ .globl main
+ .type main, at function
+main:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ bl test_1_byte_direct
+ bl test_2_byte_direct
+ bl test_4_byte_direct
+ bl test_8_byte_direct
+ bl test_16_byte_direct
+ bl test_32_byte_direct
+ bl test_37_byte_arbitrary
+ bl test_128_byte_too_large
+
+ mov w0, #0
+ ldp x29, x30, [sp], #16
+ ret
+ .size main, .-main
>From 1c27d8967a1938cea4e9bf3110362cb91d7b3bbb Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 21 Aug 2025 10:17:40 -0700
Subject: [PATCH 2/3] [BOLT] documentation
---
bolt/docs/CommandLineArgumentReference.md | 2 +-
bolt/lib/Rewrite/BinaryPassManager.cpp | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index f3881c9a640a9..3fc0594514f6e 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -631,7 +631,7 @@
- `--inline-memcpy`
- Inline memcpy using 'rep movsb' instruction (X86-only)
+ Inline memcpy using optimized instruction sequences (X86: 'rep movsb', AArch64: width-optimized register operations)
- `--inline-small-functions`
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 996d2e972599d..6b554598cf1bc 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -247,7 +247,9 @@ static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
static cl::opt<bool> StringOps(
"inline-memcpy",
- cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
+ cl::desc(
+ "inline memcpy using size-specific optimized instructions "
+ "(X86: 'rep movsb', AArch64: width-optimized register operations)"),
cl::cat(BoltOptCategory));
static cl::opt<bool> StripRepRet(
>From db353b759b298aed2e0ebf86f99d6049a5a62e12 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 21 Aug 2025 11:25:05 -0700
Subject: [PATCH 3/3] [BOLT][AArch64] Implement safe size-aware memcpy inlining
---
bolt/include/bolt/Core/MCPlusBuilder.h | 16 ++
bolt/lib/Passes/BinaryPasses.cpp | 28 ++-
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 204 ++++++++++++++++++
3 files changed, 246 insertions(+), 2 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e773250ce8734..6cbf288f3b8f4 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1895,6 +1895,22 @@ class MCPlusBuilder {
return {};
}
+ /// Creates size-aware inline memcpy instruction. If \p KnownSize is provided,
+ /// generates optimized code for that specific size. Falls back to regular
+ /// createInlineMemcpy if size is unknown or not needed (e.g. with X86).
+ virtual InstructionListType
+ createInlineMemcpy(bool ReturnEnd, std::optional<uint64_t> KnownSize) const {
+ return createInlineMemcpy(ReturnEnd);
+ }
+
+ /// Extract immediate value from move instruction that sets the given
+ /// register. Returns the immediate value if the instruction is a
+ /// move-immediate to TargetReg.
+ virtual std::optional<uint64_t>
+ extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const {
+ return std::nullopt;
+ }
+
/// Create a target-specific relocation out of the \p Fixup.
/// Note that not every fixup could be converted into a relocation.
virtual std::optional<Relocation>
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d7f02b9470030..0068c1ad0bf1c 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1843,7 +1843,7 @@ Error StripRepRet::runOnFunctions(BinaryContext &BC) {
}
Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
- if (!BC.isX86())
+ if (!BC.isX86() && !BC.isAArch64())
return Error::success();
uint64_t NumInlined = 0;
@@ -1866,8 +1866,32 @@ Error InlineMemcpy::runOnFunctions(BinaryContext &BC) {
const bool IsMemcpy8 = (CalleeSymbol->getName() == "_memcpy8");
const bool IsTailCall = BC.MIB->isTailCall(Inst);
+ // Extract the size of thecopy from preceding instructions by looking
+ // for writes to the size register
+ std::optional<uint64_t> KnownSize = std::nullopt;
+ BitVector WrittenRegs(BC.MRI->getNumRegs());
+
+ // Get the size register (3rd arg register, index 2 for AArch64)
+ MCPhysReg SizeReg = BC.MIB->getIntArgRegister(2);
+
+ // Look backwards through the basic block for size-setting instr
+ for (auto InstIt = BB.begin(); InstIt != II; ++InstIt) {
+ MCInst &Inst = *InstIt;
+ WrittenRegs.reset(); // Clear and check what the instruction writes to
+ BC.MIB->getWrittenRegs(Inst, WrittenRegs);
+
+ // Check for writes to the size register
+ if (SizeReg != BC.MIB->getNoRegister() && WrittenRegs[SizeReg]) {
+ if (std::optional<uint64_t> ExtractedSize =
+ BC.MIB->extractMoveImmediate(Inst, SizeReg)) {
+ KnownSize = *ExtractedSize;
+ break;
+ }
+ }
+ }
+
const InstructionListType NewCode =
- BC.MIB->createInlineMemcpy(IsMemcpy8);
+ BC.MIB->createInlineMemcpy(IsMemcpy8, KnownSize);
II = BB.replaceInstruction(II, NewCode);
std::advance(II, NewCode.size() - 1);
if (IsTailCall) {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 973261765f951..03f62117ea096 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2597,6 +2597,210 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
getInstructionSize(const MCInst &Inst) const override {
return 4;
}
+
+ InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
+ // Fallback
+ return createInlineMemcpy(ReturnEnd, std::nullopt);
+ }
+
+ std::optional<uint64_t>
+ extractMoveImmediate(const MCInst &Inst, MCPhysReg TargetReg) const override {
+ if (Inst.getOpcode() == AArch64::MOVZXi && Inst.getNumOperands() >= 3) {
+ if (Inst.getOperand(0).isReg() &&
+ Inst.getOperand(0).getReg() == TargetReg &&
+ Inst.getOperand(1).isImm() && Inst.getOperand(2).isImm() &&
+ Inst.getOperand(2).getImm() == 0) {
+ return Inst.getOperand(1).getImm();
+ }
+ }
+ return std::nullopt;
+ }
+
+ InstructionListType
+ createInlineMemcpy(bool ReturnEnd,
+ std::optional<uint64_t> KnownSize) const override {
+ InstructionListType Code;
+ if (ReturnEnd) {
+ if (KnownSize.has_value() && (*KnownSize >> 12) == 0) {
+ // Use immediate if size is known and fits in 12-bit immediate (0-4095)
+ Code.emplace_back(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(*KnownSize)
+ .addImm(0));
+ } else {
+ // Fall back to register add for unknown or large sizes
+ Code.emplace_back(MCInstBuilder(AArch64::ADDXrr)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X2));
+ }
+ }
+
+ if (!KnownSize.has_value()) {
+ return Code;
+ }
+
+ uint64_t Size = *KnownSize;
+ return generateSizeSpecificMemcpy(Code, Size);
+ }
+
+ InstructionListType generateSizeSpecificMemcpy(InstructionListType &Code,
+ uint64_t Size) const {
+ // Generate optimal instruction sequences based on exact size
+ switch (Size) {
+ case 1:
+ // Single byte copy
+ Code.emplace_back(MCInstBuilder(AArch64::LDRBBui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X1)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::STRBBui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X0)
+ .addImm(0));
+ break;
+
+ case 2:
+ // 2-byte copy using 16-bit load/store
+ Code.emplace_back(MCInstBuilder(AArch64::LDRHHui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X1)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::STRHHui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X0)
+ .addImm(0));
+ break;
+
+ case 4:
+ // 4-byte copy using 32-bit load/store
+ Code.emplace_back(MCInstBuilder(AArch64::LDRWui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X1)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::STRWui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X0)
+ .addImm(0));
+ break;
+
+ case 8:
+ // 8-byte copy using 64-bit load/store
+ Code.emplace_back(MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X3)
+ .addReg(AArch64::X1)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::STRXui)
+ .addReg(AArch64::X3)
+ .addReg(AArch64::X0)
+ .addImm(0));
+ break;
+
+ case 16:
+ // 16-byte copy using 128-bit SIMD
+ Code.emplace_back(MCInstBuilder(AArch64::LDRQui)
+ .addReg(AArch64::Q0)
+ .addReg(AArch64::X1)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::STRQui)
+ .addReg(AArch64::Q0)
+ .addReg(AArch64::X0)
+ .addImm(0));
+ break;
+
+ case 32:
+ // 32-byte copy using two 128-bit SIMD operations
+ Code.emplace_back(MCInstBuilder(AArch64::LDRQui)
+ .addReg(AArch64::Q0)
+ .addReg(AArch64::X1)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::STRQui)
+ .addReg(AArch64::Q0)
+ .addReg(AArch64::X0)
+ .addImm(0));
+ Code.emplace_back(MCInstBuilder(AArch64::LDRQui)
+ .addReg(AArch64::Q1)
+ .addReg(AArch64::X1)
+ .addImm(1));
+ Code.emplace_back(MCInstBuilder(AArch64::STRQui)
+ .addReg(AArch64::Q1)
+ .addReg(AArch64::X0)
+ .addImm(1));
+ break;
+
+ default:
+ if (Size <= 64) {
+ // For sizes up to 64 bytes, greedily use the largest possible loads in
+ // descending order
+ uint64_t Remaining = Size;
+ uint64_t Offset = 0;
+
+ while (Remaining >= 16) {
+ Code.emplace_back(MCInstBuilder(AArch64::LDRQui)
+ .addReg(AArch64::Q0)
+ .addReg(AArch64::X1)
+ .addImm(Offset / 16));
+ Code.emplace_back(MCInstBuilder(AArch64::STRQui)
+ .addReg(AArch64::Q0)
+ .addReg(AArch64::X0)
+ .addImm(Offset / 16));
+ Remaining -= 16;
+ Offset += 16;
+ }
+ if (Remaining >= 8) {
+ Code.emplace_back(MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X3)
+ .addReg(AArch64::X1)
+ .addImm(Offset / 8));
+ Code.emplace_back(MCInstBuilder(AArch64::STRXui)
+ .addReg(AArch64::X3)
+ .addReg(AArch64::X0)
+ .addImm(Offset / 8));
+ Remaining -= 8;
+ Offset += 8;
+ }
+ if (Remaining >= 4) {
+ Code.emplace_back(MCInstBuilder(AArch64::LDRWui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X1)
+ .addImm(Offset / 4));
+ Code.emplace_back(MCInstBuilder(AArch64::STRWui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X0)
+ .addImm(Offset / 4));
+ Remaining -= 4;
+ Offset += 4;
+ }
+ if (Remaining >= 2) {
+ Code.emplace_back(MCInstBuilder(AArch64::LDRHHui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X1)
+ .addImm(Offset / 2));
+ Code.emplace_back(MCInstBuilder(AArch64::STRHHui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X0)
+ .addImm(Offset / 2));
+ Remaining -= 2;
+ Offset += 2;
+ }
+ if (Remaining == 1) {
+ Code.emplace_back(MCInstBuilder(AArch64::LDRBBui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X1)
+ .addImm(Offset));
+ Code.emplace_back(MCInstBuilder(AArch64::STRBBui)
+ .addReg(AArch64::W3)
+ .addReg(AArch64::X0)
+ .addImm(Offset));
+ }
+ } else {
+ Code.clear();
+ }
+ break;
+ }
+ return Code;
+ }
};
} // end anonymous namespace
More information about the llvm-commits
mailing list