[libc] [llvm] [libc] Improve memcpy for ARM Cortex-M supporting unaligned accesses. (PR #144872)

Michael Jones via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 14:06:04 PDT 2025


================
@@ -0,0 +1,134 @@
+//===-- Memcpy implementation for arm ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_INLINE_MEMCPY_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_INLINE_MEMCPY_H
+
+#include "src/__support/macros/attributes.h"   // LIBC_INLINE
+#include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL
+#include "src/string/memory_utils/utils.h" // memcpy_inline, distance_to_align
+
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace {
+
+LIBC_INLINE_VAR constexpr size_t kWordSize = sizeof(uint32_t);
+
+template <size_t bytes>
+LIBC_INLINE void copy_and_bump_pointers(Ptr &dst, CPtr &src) {
+  if constexpr (bytes == 1 || bytes == 2 || bytes == 4) {
+    memcpy_inline<bytes>(dst, src);
+  } else {
+    // We restrict loads/stores to 4 byte to prevent the use of load/store
+    // multiple (LDM, STM) and load/store double (LDRD, STRD). First, they may
+    // fault (see notes below) and second, they use more registers which in turn
+    // adds push/pop instructions in the hot path.
+    static_assert(bytes % kWordSize == 0);
+    LIBC_LOOP_UNROLL
+    for (size_t i = 0; i < bytes / kWordSize; ++i) {
+      const uintptr_t offset = i * kWordSize;
+      memcpy_inline<kWordSize>(dst + offset, src + offset);
+    }
+  }
+  // In the 1, 2, 4 byte copy case, the compiler can fold pointer offsetting
+  // into the load/store instructions.
+  // e.g.,
+  // ldrb  r3, [r1], #1
+  // strb  r3, [r0], #1
+  dst += bytes;
+  src += bytes;
+}
+
+template <size_t block_size>
+LIBC_INLINE void copy_blocks(Ptr &dst, CPtr &src, size_t &size) {
+  LIBC_LOOP_NOUNROLL
+  for (size_t i = 0; i < size / block_size; ++i)
+    copy_and_bump_pointers<block_size>(dst, src);
+  // Update `size` once at the end instead of once per iteration.
+  size %= block_size;
+}
+
+LIBC_INLINE CPtr bitwise_or(CPtr a, CPtr b) {
+  return cpp::bit_cast<CPtr>(cpp::bit_cast<uintptr_t>(a) |
+                             cpp::bit_cast<uintptr_t>(b));
+}
+
+LIBC_INLINE auto misaligned(CPtr a) {
+  return distance_to_align_down<kWordSize>(a);
+}
+
+} // namespace
+
+// Implementation for Cortex-M0, M0+, M1.
+// The implementation makes sure that all accesses are aligned.
+[[maybe_unused]] LIBC_INLINE void inline_memcpy_arm_low_end(Ptr dst, CPtr src,
+                                                            size_t size) {
+  // For now, dummy implementation that performs byte per byte copy.
+  LIBC_LOOP_NOUNROLL
+  for (size_t i = 0; i < size; ++i)
+    dst[i] = src[i];
+}
+
+// Implementation for Cortex-M3, M4, M7, M23, M33, M35P, M52 with hardware
+// support for unaligned loads and stores.
+// Notes:
+// - It compiles down to <300 bytes.
+// - `dst` and `src` are not `__restrict` to prevent the compiler from
+//   reordering loads/stores.
+// - We keep state variables to a strict minimum to keep everything in the free
+//   registers and prevent costly push / pop.
+// - If unaligned single loads/stores to normal memory are supported, unaligned
+//   accesses for load/store multiple (LDM, STM) and load/store double (LDRD,
+//   STRD) instructions are generally not supported and will still fault so we
+//   make sure to restrict unrolling to word loads/stores.
+[[maybe_unused]] LIBC_INLINE void inline_memcpy_arm_mid_end(Ptr dst, CPtr src,
+                                                            size_t size) {
+  if (misaligned(bitwise_or(src, dst))) [[unlikely]] {
----------------
michaelrj-google wrote:

does `LIBC_UNLIKELY` work instead of `[[unlikely]]`?

https://github.com/llvm/llvm-project/pull/144872


More information about the llvm-commits mailing list