[libc-commits] [libc] 7f5d7bc - [libc][mem*] Introduce Algorithms for new mem framework
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Tue Jun 28 02:24:06 PDT 2022
Author: Guillaume Chatelet
Date: 2022-06-28T09:23:49Z
New Revision: 7f5d7bc827a5fda1c803342af536f81986e2635a
URL: https://github.com/llvm/llvm-project/commit/7f5d7bc827a5fda1c803342af536f81986e2635a
DIFF: https://github.com/llvm/llvm-project/commit/7f5d7bc827a5fda1c803342af536f81986e2635a.diff
LOG: [libc][mem*] Introduce Algorithms for new mem framework
This patch is a subpart of D125768 intented to make the review easier.
This patch introduces the same algorithms as in `libc/src/string/memory_utils/elements.h` but using the new API.
Differential Revision: https://reviews.llvm.org/D128335
Added:
libc/src/string/memory_utils/algorithm.h
libc/test/src/string/memory_utils/algorithm_test.cpp
Modified:
libc/src/string/memory_utils/sized_op.h
libc/test/src/string/memory_utils/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/src/string/memory_utils/algorithm.h b/libc/src/string/memory_utils/algorithm.h
new file mode 100644
index 0000000000000..82c09fc7ea16b
--- /dev/null
+++ b/libc/src/string/memory_utils/algorithm.h
@@ -0,0 +1,463 @@
+//===-- Algorithms to compose sized memory operations ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Higher order primitives that build upon the SizedOpT facility.
+// They constitute the basic blocks for composing memory functions.
+// This file defines the following operations:
+// - Skip
+// - Tail
+// - HeadTail
+// - Loop
+// - Align
+//
+// See each class for documentation.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
+
+#include "src/string/memory_utils/address.h" // Address
+#include "src/string/memory_utils/utils.h" // offset_to_next_aligned
+
+#include <stddef.h> // ptr
diff _t
+
+namespace __llvm_libc {
+
+// We are not yet allowed to use asserts in low level memory operations as
+// assert itself could depend on them.
+// We define this empty macro so we can enable them as soon as possible and keep
+// track of invariants.
+#define LIBC_ASSERT(COND)
+
+// An operation that allows to skip the specified amount of bytes.
+template <ptr
diff _t Bytes> struct Skip {
+ template <typename NextT> struct Then {
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value) {
+ static_assert(NextT::IS_FIXED_SIZE);
+ NextT::set(offsetAddr<Bytes>(dst), value);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
+ static_assert(NextT::IS_FIXED_SIZE);
+ return NextT::isDifferent(offsetAddr<Bytes>(src1),
+ offsetAddr<Bytes>(src2));
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
+ static_assert(NextT::IS_FIXED_SIZE);
+ return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
+ offsetAddr<Bytes>(src2));
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ static_assert(NextT::IS_RUNTIME_SIZE);
+ return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
+ offsetAddr<Bytes>(src2), runtime_size - Bytes);
+ }
+ };
+};
+
+// Compute the address of a tail operation.
+// Because of the runtime size, we loose the alignment information.
+template <size_t Size, typename AddrT>
+static auto tailAddr(AddrT addr, size_t runtime_size) {
+ static_assert(IsAddressType<AddrT>::Value);
+ return offsetAddrAssumeAligned<1>(addr, runtime_size - Size);
+}
+
+// Perform the operation on the last 'Size' bytes of the buffer.
+//
+// e.g. with
+// [1234567812345678123]
+// [__XXXXXXXXXXXXXX___]
+// [________XXXXXXXX___]
+//
+// Precondition: `runtime_size >= Size`.
+template <typename SizedOpT> struct Tail {
+ static_assert(SizedOpT::IS_FIXED_SIZE);
+ static constexpr bool IS_RUNTIME_SIZE = true;
+ static constexpr size_t SIZE = SizedOpT::SIZE;
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ SizedOpT::copy(tailAddr<SIZE>(dst, runtime_size),
+ tailAddr<SIZE>(src, runtime_size));
+ }
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ SizedOpT::move(tailAddr<SIZE>(dst, runtime_size),
+ tailAddr<SIZE>(src, runtime_size));
+ }
+
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
+ SizedOpT::set(tailAddr<SIZE>(dst, runtime_size), value);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ return SizedOpT::isDifferent(tailAddr<SIZE>(src1, runtime_size),
+ tailAddr<SIZE>(src2, runtime_size));
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ return SizedOpT::threeWayCmp(tailAddr<SIZE>(src1, runtime_size),
+ tailAddr<SIZE>(src2, runtime_size));
+ }
+};
+
+// Perform the operation on the first and the last `SizedOpT::Size` bytes of the
+// buffer. This is useful for overlapping operations.
+//
+// e.g. with
+// [1234567812345678123]
+// [__XXXXXXXXXXXXXX___]
+// [__XXXXXXXX_________]
+// [________XXXXXXXX___]
+//
+// Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`.
+template <typename SizedOpT> struct HeadTail {
+ static_assert(SizedOpT::IS_FIXED_SIZE);
+ static constexpr bool IS_RUNTIME_SIZE = true;
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
+ SizedOpT::copy(dst, src);
+ Tail<SizedOpT>::copy(dst, src, runtime_size);
+ }
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
+ static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
+ // The load and store operations can be performed in any order as long as
+ // they are not interleaved. More investigations are needed to determine the
+ // best order.
+ auto head = SizedOpT::load(src);
+ auto tail = SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
+ SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail);
+ SizedOpT::store(dst, head);
+ }
+
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
+ LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
+ SizedOpT::set(dst, value);
+ Tail<SizedOpT>::set(dst, value, runtime_size);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
+ // Two strategies can be applied here:
+ // 1. Compute head and tail and compose them with a bitwise or operation.
+ // 2. Stop early if head is
diff erent.
+ // We chose the later because HeadTail operations are typically performed
+ // with sizes ranging from 4 to 256 bytes. The cost of the loads is then
+ // significantly larger than the cost of the branch.
+ if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
+ return res;
+ return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ LIBC_ASSERT(runtime_size >= SizedOpT::SIZE &&
+ runtime_size <= 2 * SizedOpT::SIZE);
+ if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
+ return res;
+ return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
+ }
+};
+
+// Simple loop ending with a Tail operation.
+//
+// e.g. with
+// [12345678123456781234567812345678]
+// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
+// [__XXXXXXXX_______________________]
+// [__________XXXXXXXX_______________]
+// [__________________XXXXXXXX_______]
+// [______________________XXXXXXXX___]
+//
+// Precondition:
+// - runtime_size >= Size
+template <typename SizedOpT> struct Loop {
+ static_assert(SizedOpT::IS_FIXED_SIZE);
+ static constexpr bool IS_RUNTIME_SIZE = true;
+ static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ size_t offset = 0;
+ do {
+ SizedOpT::copy(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
+ offset += BLOCK_SIZE;
+ } while (offset < runtime_size - BLOCK_SIZE);
+ Tail<SizedOpT>::copy(dst, src, runtime_size);
+ }
+
+ // Move forward suitable when dst < src. We load the tail bytes before
+ // handling the loop.
+ //
+ // e.g. Moving two bytes
+ // [ | | | | |]
+ // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
+ // [_________________________LLLLLLLL___]
+ // [___LLLLLLLL_________________________]
+ // [_SSSSSSSS___________________________]
+ // [___________LLLLLLLL_________________]
+ // [_________SSSSSSSS___________________]
+ // [___________________LLLLLLLL_________]
+ // [_________________SSSSSSSS___________]
+ // [_______________________SSSSSSSS_____]
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ const auto tail_value =
+ SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
+ size_t offset = 0;
+ do {
+ SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
+ offset += BLOCK_SIZE;
+ } while (offset < runtime_size - BLOCK_SIZE);
+ SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail_value);
+ }
+
+ // Move backward suitable when dst > src. We load the head bytes before
+ // handling the loop.
+ //
+ // e.g. Moving two bytes
+ // [ | | | | |]
+ // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
+ // [___LLLLLLLL_________________________]
+ // [_________________________LLLLLLLL___]
+ // [___________________________SSSSSSSS_]
+ // [_________________LLLLLLLL___________]
+ // [___________________SSSSSSSS_________]
+ // [_________LLLLLLLL___________________]
+ // [___________SSSSSSSS_________________]
+ // [_____SSSSSSSS_______________________]
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move_backward(DstAddrT dst, SrcAddrT src,
+ size_t runtime_size) {
+ const auto head_value = SizedOpT::load(src);
+ ptr
diff _t offset = runtime_size - BLOCK_SIZE;
+ do {
+ SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
+ offset -= BLOCK_SIZE;
+ } while (offset >= 0);
+ SizedOpT::store(dst, head_value);
+ }
+
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
+ size_t offset = 0;
+ do {
+ SizedOpT::set(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), value);
+ offset += BLOCK_SIZE;
+ } while (offset < runtime_size - BLOCK_SIZE);
+ Tail<SizedOpT>::set(dst, value, runtime_size);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ size_t offset = 0;
+ do {
+ if (uint64_t res = SizedOpT::isDifferent(
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
+ return res;
+ offset += BLOCK_SIZE;
+ } while (offset < runtime_size - BLOCK_SIZE);
+ return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ size_t offset = 0;
+ do {
+ if (int32_t res = SizedOpT::threeWayCmp(
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
+ offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
+ return res;
+ offset += BLOCK_SIZE;
+ } while (offset < runtime_size - BLOCK_SIZE);
+ return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
+ }
+};
+
+// Aligns using a statically-sized operation, then calls the subsequent NextT
+// operation.
+//
+// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
+// Align<_16, Arg::Dst>::Then<Loop<_32>>::copy(dst, src, runtime_size);
+enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
+template <typename SizedOpT, Arg AlignOn = Arg::_1> struct Align {
+ static_assert(SizedOpT::IS_FIXED_SIZE);
+
+ template <typename NextT> struct Then {
+ static_assert(NextT::IS_RUNTIME_SIZE);
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ SizedOpT::copy(dst, src);
+ auto aligned = align(dst, src, runtime_size);
+ NextT::copy(aligned.arg1, aligned.arg2, aligned.size);
+ }
+
+ // Move forward suitable when dst < src. The alignment is performed with
+ // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
+ //
+ // e.g. Moving two bytes and making sure src is then aligned.
+ // [ | | | | ]
+ // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
+ // [____LLLLLLLL_____________________]
+ // [___________LLLLLLLL______________]
+ // [_SSSSSSSS________________________]
+ // [________SSSSSSSS_________________]
+ //
+ // e.g. Moving two bytes and making sure dst is then aligned.
+ // [ | | | | ]
+ // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
+ // [____LLLLLLLL_____________________]
+ // [______LLLLLLLL___________________]
+ // [_SSSSSSSS________________________]
+ // [___SSSSSSSS______________________]
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
+ auto aligned_after_begin = align(dst, src, runtime_size);
+ // We move pointers forward by Size so we can perform HeadTail.
+ auto aligned = aligned_after_begin.stepForward();
+ HeadTail<SizedOpT>::move(dst, src, runtime_size - aligned.size);
+ NextT::move(aligned.arg1, aligned.arg2, aligned.size);
+ }
+
+ // Move backward suitable when dst > src. The alignment is performed with
+ // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
+ //
+ // e.g. Moving two bytes backward and making sure src is then aligned.
+ // [ | | | | ]
+ // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
+ // [ _________________LLLLLLLL_______]
+ // [ ___________________LLLLLLLL_____]
+ // [____________________SSSSSSSS_____]
+ // [______________________SSSSSSSS___]
+ //
+ // e.g. Moving two bytes and making sure dst is then aligned.
+ // [ | | | | ]
+ // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
+ // [ _______________LLLLLLLL_________]
+ // [ ___________________LLLLLLLL_____]
+ // [__________________SSSSSSSS_______]
+ // [______________________SSSSSSSS___]
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move_backward(DstAddrT dst, SrcAddrT src,
+ size_t runtime_size) {
+ const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size);
+ const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size);
+ auto aligned_after_end = align(dst_end, src_end, 0);
+ // We move pointers back by 2 x Size so we can perform HeadTail.
+ auto aligned = aligned_after_end.stepBack().stepBack();
+ HeadTail<SizedOpT>::move(aligned.arg1, aligned.arg2, aligned.size);
+ NextT::move_backward(dst, src, runtime_size - aligned.size);
+ }
+
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
+ SizedOpT::set(dst, value);
+ DstAddrT _(nullptr);
+ auto aligned = align(dst, _, runtime_size);
+ NextT::set(aligned.arg1, value, aligned.size);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
+ return res;
+ auto aligned = align(src1, src2, runtime_size);
+ return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
+ size_t runtime_size) {
+ if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
+ return res;
+ auto aligned = align(src1, src2, runtime_size);
+ return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size);
+ }
+ };
+
+private:
+ static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE;
+ static_assert(ALIGN_OP_SIZE > 1);
+
+ template <typename Arg1AddrT, typename Arg2AddrT> struct Aligned {
+ Arg1AddrT arg1;
+ Arg2AddrT arg2;
+ size_t size;
+
+ Aligned stepForward() const {
+ return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, ALIGN_OP_SIZE),
+ offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, ALIGN_OP_SIZE),
+ size - ALIGN_OP_SIZE};
+ }
+
+ Aligned stepBack() const {
+ return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, -ALIGN_OP_SIZE),
+ offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, -ALIGN_OP_SIZE),
+ size + ALIGN_OP_SIZE};
+ }
+ };
+
+ template <typename Arg1AddrT, typename Arg2AddrT>
+ static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) {
+ return Aligned<Arg1AddrT, Arg2AddrT>{arg1, arg2, size};
+ }
+
+ template <typename Arg1AddrT, typename Arg2AddrT>
+ static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) {
+ static_assert(IsAddressType<Arg1AddrT>::Value);
+ static_assert(IsAddressType<Arg2AddrT>::Value);
+ if constexpr (AlignOn == Arg::_1) {
+ auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg1.ptr_);
+ return makeAligned(offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg1, offset),
+ offsetAddrAssumeAligned<1>(arg2, offset),
+ runtime_size - offset);
+ } else if constexpr (AlignOn == Arg::_2) {
+ auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg2.ptr_);
+ return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset),
+ offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg2, offset),
+ runtime_size - offset);
+ } else {
+ DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2");
+ }
+ }
+};
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h
index 12ace7cc6bdfc..d58d39383490c 100644
--- a/libc/src/string/memory_utils/sized_op.h
+++ b/libc/src/string/memory_utils/sized_op.h
@@ -33,6 +33,9 @@ namespace __llvm_libc {
template <typename Backend, size_t Size> struct SizedOp {
static constexpr size_t SIZE = Size;
+ // Define instantiations of SizedOp as a fixed size operation.
+ // i.e. an operation that is composable by types in algorithm.h
+ static constexpr bool IS_FIXED_SIZE = true;
private:
static_assert(Backend::IS_BACKEND_TYPE);
diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt
index 83926e48aff55..9b777a7d53112 100644
--- a/libc/test/src/string/memory_utils/CMakeLists.txt
+++ b/libc/test/src/string/memory_utils/CMakeLists.txt
@@ -4,6 +4,7 @@ add_libc_unittest(
libc_string_unittests
SRCS
address_test.cpp
+ algorithm_test.cpp
backend_test.cpp
elements_test.cpp
memory_access_test.cpp
diff --git a/libc/test/src/string/memory_utils/algorithm_test.cpp b/libc/test/src/string/memory_utils/algorithm_test.cpp
new file mode 100644
index 0000000000000..e4fd3e3712ccd
--- /dev/null
+++ b/libc/test/src/string/memory_utils/algorithm_test.cpp
@@ -0,0 +1,565 @@
+
+#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 0
+
+#include "utils/UnitTest/Test.h"
+#include <src/__support/CPP/Array.h>
+#include <src/string/memory_utils/algorithm.h>
+#include <src/string/memory_utils/backends.h>
+
+#include <sstream>
+
+namespace __llvm_libc {
+
+struct alignas(64) Buffer : cpp::Array<char, 128> {
+ bool contains(const char *ptr) const {
+ return ptr >= data() && ptr < (data() + size());
+ }
+ size_t getOffset(const char *ptr) const { return ptr - data(); }
+ void fill(char c) {
+ for (auto itr = begin(); itr != end(); ++itr)
+ *itr = c;
+ }
+};
+
+static Buffer buffer1;
+static Buffer buffer2;
+static std::ostringstream LOG;
+
+struct TestBackend {
+ static constexpr bool IS_BACKEND_TYPE = true;
+
+ template <typename T> static void log(const char *Action, const char *ptr) {
+ LOG << Action << "<" << sizeof(T) << "> ";
+ if (buffer1.contains(ptr))
+ LOG << "a[" << buffer1.getOffset(ptr) << "]";
+ else if (buffer2.contains(ptr))
+ LOG << "b[" << buffer2.getOffset(ptr) << "]";
+ LOG << "\n";
+ }
+
+ template <typename T, Temporality TS, Aligned AS>
+ static T load(const T *src) {
+ log<T>((AS == Aligned::YES ? "LdA" : "LdU"),
+ reinterpret_cast<const char *>(src));
+ return Scalar64BitBackend::load<T, TS, AS>(src);
+ }
+
+ template <typename T, Temporality TS, Aligned AS>
+ static void store(T *dst, T value) {
+ log<T>((AS == Aligned::YES ? "StA" : "StU"),
+ reinterpret_cast<const char *>(dst));
+ Scalar64BitBackend::store<T, TS, AS>(dst, value);
+ }
+
+ template <typename T> static inline T splat(ubyte value) {
+ LOG << "Splat<" << sizeof(T) << "> " << (unsigned)value << '\n';
+ return Scalar64BitBackend::splat<T>(value);
+ }
+
+ template <typename T> static inline uint64_t notEquals(T v1, T v2) {
+ LOG << "Neq<" << sizeof(T) << ">\n";
+ return Scalar64BitBackend::notEquals<T>(v1, v2);
+ }
+
+ template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
+ LOG << "Diff<" << sizeof(T) << ">\n";
+ return Scalar64BitBackend::threeWayCmp<T>(v1, v2);
+ }
+
+ template <size_t Size>
+ using getNextType = Scalar64BitBackend::getNextType<Size>;
+};
+
+struct LlvmLibcAlgorithm : public testing::Test {
+ void SetUp() override {
+ LOG = std::ostringstream();
+ LOG << '\n';
+ }
+
+ void fillEqual() {
+ buffer1.fill('a');
+ buffer2.fill('a');
+ }
+
+ void fillDifferent() {
+ buffer1.fill('a');
+ buffer2.fill('b');
+ }
+
+ const char *getTrace() {
+ trace_ = LOG.str();
+ return trace_.c_str();
+ }
+
+ const char *stripComments(const char *expected) {
+ expected_.clear();
+ std::stringstream ss(expected);
+ std::string line;
+ while (std::getline(ss, line, '\n')) {
+ const auto pos = line.find('#');
+ if (pos == std::string::npos) {
+ expected_ += line;
+ } else {
+ auto log = line.substr(0, pos);
+ while (!log.empty() && std::isspace(log.back()))
+ log.pop_back();
+ expected_ += log;
+ }
+ expected_ += '\n';
+ }
+ return expected_.c_str();
+ }
+
+ template <size_t Align = 1> SrcAddr<Align> buf1(size_t offset = 0) const {
+ return buffer1.data() + offset;
+ }
+ template <size_t Align = 1> SrcAddr<Align> buf2(size_t offset = 0) const {
+ return buffer2.data() + offset;
+ }
+ template <size_t Align = 1> DstAddr<Align> dst(size_t offset = 0) const {
+ return buffer1.data() + offset;
+ }
+ template <size_t Align = 1> SrcAddr<Align> src(size_t offset = 0) const {
+ return buffer2.data() + offset;
+ }
+
+private:
+ std::string trace_;
+ std::string expected_;
+};
+
+using _8 = SizedOp<TestBackend, 8>;
+
+///////////////////////////////////////////////////////////////////////////////
+//// Testing fixed fized forward operations
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// Copy
+
+TEST_F(LlvmLibcAlgorithm, copy_1) {
+ SizedOp<TestBackend, 1>::copy(dst(), src());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<1> b[0]
+StU<1> a[0]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, copy_15) {
+ SizedOp<TestBackend, 15>::copy(dst(), src());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0]
+LdU<4> b[8]
+StU<4> a[8]
+LdU<2> b[12]
+StU<2> a[12]
+LdU<1> b[14]
+StU<1> a[14]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, copy_16) {
+ SizedOp<TestBackend, 16>::copy(dst(), src());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0]
+LdU<8> b[8]
+StU<8> a[8]
+)"));
+}
+///////////////////////////////////////////////////////////////////////////////
+// Move
+
+TEST_F(LlvmLibcAlgorithm, move_1) {
+ SizedOp<TestBackend, 1>::move(dst(), src());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<1> b[0]
+StU<1> a[0]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, move_15) {
+ SizedOp<TestBackend, 15>::move(dst(), src());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+LdU<4> b[8]
+LdU<2> b[12]
+LdU<1> b[14]
+StU<1> a[14]
+StU<2> a[12]
+StU<4> a[8]
+StU<8> a[0]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, move_16) {
+ SizedOp<TestBackend, 16>::move(dst(), src());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+LdU<8> b[8]
+StU<8> a[8]
+StU<8> a[0]
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// set
+
+TEST_F(LlvmLibcAlgorithm, set_1) {
+ SizedOp<TestBackend, 1>::set(dst(), ubyte{42});
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+Splat<1> 42
+StU<1> a[0]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, set_15) {
+ SizedOp<TestBackend, 15>::set(dst(), ubyte{42});
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+Splat<8> 42
+StU<8> a[0]
+Splat<4> 42
+StU<4> a[8]
+Splat<2> 42
+StU<2> a[12]
+Splat<1> 42
+StU<1> a[14]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, set_16) {
+ SizedOp<TestBackend, 16>::set(dst(), ubyte{42});
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+Splat<8> 42
+StU<8> a[0]
+Splat<8> 42
+StU<8> a[8]
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
diff erent
+
+TEST_F(LlvmLibcAlgorithm,
diff erent_1) {
+ fillEqual();
+ SizedOp<TestBackend, 1>::isDifferent(buf1(), buf2());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<1> a[0]
+LdU<1> b[0]
+Neq<1>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm,
diff erent_15) {
+ fillEqual();
+ SizedOp<TestBackend, 15>::isDifferent(buf1(), buf2());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[0]
+LdU<8> b[0]
+Neq<8>
+LdU<4> a[8]
+LdU<4> b[8]
+Neq<4>
+LdU<2> a[12]
+LdU<2> b[12]
+Neq<2>
+LdU<1> a[14]
+LdU<1> b[14]
+Neq<1>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm,
diff erent_15_no_shortcircuit) {
+ fillDifferent();
+ SizedOp<TestBackend, 15>::isDifferent(buf1(), buf2());
+ // If buffer compare isDifferent we continue to aggregate.
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[0]
+LdU<8> b[0]
+Neq<8>
+LdU<4> a[8]
+LdU<4> b[8]
+Neq<4>
+LdU<2> a[12]
+LdU<2> b[12]
+Neq<2>
+LdU<1> a[14]
+LdU<1> b[14]
+Neq<1>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm,
diff erent_16) {
+ fillEqual();
+ SizedOp<TestBackend, 16>::isDifferent(buf1(), buf2());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[0]
+LdU<8> b[0]
+Neq<8>
+LdU<8> a[8]
+LdU<8> b[8]
+Neq<8>
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// three_way_cmp
+
+TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_1) {
+ fillEqual();
+ SizedOp<TestBackend, 1>::threeWayCmp(buf1(), buf2());
+ // Buffer compare equal, returning 0 and no call to Diff.
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<1> a[0]
+LdU<1> b[0]
+Diff<1>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_15) {
+ fillEqual();
+ SizedOp<TestBackend, 15>::threeWayCmp(buf1(), buf2());
+ // Buffer compare equal, returning 0 and no call to Diff.
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[0]
+LdU<8> b[0]
+Diff<8>
+LdU<4> a[8]
+LdU<4> b[8]
+Diff<4>
+LdU<2> a[12]
+LdU<2> b[12]
+Diff<2>
+LdU<1> a[14]
+LdU<1> b[14]
+Diff<1>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, three_way_cmp_neq_15_shortcircuit) {
+ fillDifferent();
+ SizedOp<TestBackend, 16>::threeWayCmp(buf1(), buf2());
+ // If buffer compare isDifferent we stop early.
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[0]
+LdU<8> b[0]
+Diff<8>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_16) {
+ fillEqual();
+ SizedOp<TestBackend, 16>::threeWayCmp(buf1(), buf2());
+ // Buffer compare equal, returning 0 and no call to Diff.
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[0]
+LdU<8> b[0]
+Diff<8>
+LdU<8> a[8]
+LdU<8> b[8]
+Diff<8>
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//// Testing skip operations
+///////////////////////////////////////////////////////////////////////////////
+
+TEST_F(LlvmLibcAlgorithm, skip_and_set) {
+ Skip<11>::Then<SizedOp<TestBackend, 1>>::set(dst(), ubyte{42});
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+Splat<1> 42
+StU<1> a[11]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, skip_and_
diff erent_1) {
+ Skip<11>::Then<SizedOp<TestBackend, 1>>::isDifferent(buf1(), buf2());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<1> a[11]
+LdU<1> b[11]
+Neq<1>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, skip_and_three_way_cmp_8) {
+ Skip<11>::Then<SizedOp<TestBackend, 1>>::threeWayCmp(buf1(), buf2());
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<1> a[11]
+LdU<1> b[11]
+Diff<1>
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//// Testing tail operations
+///////////////////////////////////////////////////////////////////////////////
+
+TEST_F(LlvmLibcAlgorithm, tail_copy_8) {
+ Tail<_8>::copy(dst(), src(), 16);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[8]
+StU<8> a[8]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, tail_move_8) {
+ Tail<_8>::move(dst(), src(), 16);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[8]
+StU<8> a[8]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, tail_set_8) {
+ Tail<_8>::set(dst(), ubyte{42}, 16);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+Splat<8> 42
+StU<8> a[8]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, tail_
diff erent_8) {
+ fillEqual();
+ Tail<_8>::isDifferent(buf1(), buf2(), 16);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[8]
+LdU<8> b[8]
+Neq<8>
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, tail_three_way_cmp_8) {
+ fillEqual();
+ Tail<_8>::threeWayCmp(buf1(), buf2(), 16);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> a[8]
+LdU<8> b[8]
+Diff<8>
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//// Testing HeadTail operations
+///////////////////////////////////////////////////////////////////////////////
+
+TEST_F(LlvmLibcAlgorithm, head_tail_copy_8) {
+ HeadTail<_8>::copy(dst(), src(), 16);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0]
+LdU<8> b[8]
+StU<8> a[8]
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//// Testing Loop operations
+///////////////////////////////////////////////////////////////////////////////
+
+TEST_F(LlvmLibcAlgorithm, loop_copy_one_iteration_and_tail) {
+ Loop<_8>::copy(dst(), src(), 10);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0] # covers 0-7
+LdU<8> b[2]
+StU<8> a[2] # covers 2-9
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, loop_copy_two_iteration_and_tail) {
+ Loop<_8>::copy(dst(), src(), 17);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0] # covers 0-7
+LdU<8> b[8]
+StU<8> a[8] # covers 8-15
+LdU<8> b[9]
+StU<8> a[9] # covers 9-16
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, loop_with_one_turn_is_inefficient_but_ok) {
+ Loop<_8>::copy(dst(), src(), 8);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0] # first iteration covers 0-7
+LdU<8> b[0] # tail also covers 0-7 but since Loop is supposed to be used
+StU<8> a[0] # with a sufficient number of iterations the tail cost is amortised
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, loop_with_round_number_of_turn) {
+ Loop<_8>::copy(dst(), src(), 24);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StU<8> a[0] # first iteration covers 0-7
+LdU<8> b[8]
+StU<8> a[8] # second iteration covers 8-15
+LdU<8> b[16]
+StU<8> a[16]
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, dst_aligned_loop) {
+ Loop<_8>::copy(dst<16>(), src(), 23);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[0]
+StA<8> a[0] # store is aligned on 16B
+LdU<8> b[8]
+StA<8> a[8] # subsequent stores are aligned
+LdU<8> b[15]
+StU<8> a[15] # Tail is always unaligned
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, aligned_loop) {
+ Loop<_8>::copy(dst<16>(), src<8>(), 23);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdA<8> b[0] # load is aligned on 8B
+StA<8> a[0] # store is aligned on 16B
+LdA<8> b[8] # subsequent loads are aligned
+StA<8> a[8] # subsequent stores are aligned
+LdU<8> b[15] # Tail is always unaligned
+StU<8> a[15] # Tail is always unaligned
+)"));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//// Testing Align operations
+///////////////////////////////////////////////////////////////////////////////
+
+TEST_F(LlvmLibcAlgorithm, align_dst_copy_8) {
+ Align<_8, Arg::Dst>::Then<Loop<_8>>::copy(dst(2), src(3), 31);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[3]
+StU<8> a[2] # First store covers unaligned bytes
+LdU<8> b[9]
+StA<8> a[8] # First aligned store
+LdU<8> b[17]
+StA<8> a[16] # Subsequent stores are aligned
+LdU<8> b[25]
+StA<8> a[24] # Subsequent stores are aligned
+LdU<8> b[26]
+StU<8> a[25] # Last store covers remaining bytes
+)"));
+}
+
+TEST_F(LlvmLibcAlgorithm, align_src_copy_8) {
+ Align<_8, Arg::Src>::Then<Loop<_8>>::copy(dst(2), src(3), 31);
+ EXPECT_STREQ(getTrace(), stripComments(R"(
+LdU<8> b[3] # First load covers unaligned bytes
+StU<8> a[2]
+LdA<8> b[8] # First aligned load
+StU<8> a[7]
+LdA<8> b[16] # Subsequent loads are aligned
+StU<8> a[15]
+LdA<8> b[24] # Subsequent loads are aligned
+StU<8> a[23]
+LdU<8> b[26] # Last load covers remaining bytes
+StU<8> a[25]
+)"));
+}
+
+} // namespace __llvm_libc
More information about the libc-commits
mailing list