[libc-commits] [libc] 2188cf9 - [libc][NFC] Remove new framework, a simpler one is coming
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Mon Sep 26 05:43:11 PDT 2022
Author: Guillaume Chatelet
Date: 2022-09-26T12:42:38Z
New Revision: 2188cf9fa4d012b3ce484f9e97b66581569c1157
URL: https://github.com/llvm/llvm-project/commit/2188cf9fa4d012b3ce484f9e97b66581569c1157
DIFF: https://github.com/llvm/llvm-project/commit/2188cf9fa4d012b3ce484f9e97b66581569c1157.diff
LOG: [libc][NFC] Remove new framework, a simpler one is coming
Added:
Modified:
libc/test/src/string/memory_utils/CMakeLists.txt
Removed:
libc/src/string/memory_utils/address.h
libc/src/string/memory_utils/algorithm.h
libc/src/string/memory_utils/backend_aarch64.h
libc/src/string/memory_utils/backend_scalar.h
libc/src/string/memory_utils/backend_x86.h
libc/src/string/memory_utils/backends.h
libc/src/string/memory_utils/sized_op.h
libc/test/src/string/memory_utils/address_test.cpp
libc/test/src/string/memory_utils/algorithm_test.cpp
libc/test/src/string/memory_utils/backend_test.cpp
################################################################################
diff --git a/libc/src/string/memory_utils/address.h b/libc/src/string/memory_utils/address.h
deleted file mode 100644
index caa71be5b1da9..0000000000000
--- a/libc/src/string/memory_utils/address.h
+++ /dev/null
@@ -1,133 +0,0 @@
-//===-- Strongly typed address with alignment and access semantics --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H
-
-#include "src/__support/CPP/type_traits.h" // cpp::ConditionalType
-#include "src/string/memory_utils/utils.h" // is_power2
-#include <stddef.h> // size_t
-#include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
-
-namespace __llvm_libc {
-
-// Utility to enable static_assert(false) in templates.
-template <bool flag = false> static void DeferredStaticAssert(const char *msg) {
- static_assert(flag, "compilation error");
-}
-
-// A non-coercible type to represent raw data.
-enum class ubyte : unsigned char { ZERO = 0 };
-
-// Address attribute specifying whether the underlying load / store operations
-// are temporal or non-temporal.
-enum class Temporality { TEMPORAL, NON_TEMPORAL };
-
-// Address attribute specifying whether the underlying load / store operations
-// are aligned or unaligned.
-enum class Aligned { NO, YES };
-
-// Address attribute to discriminate between readable and writable addresses.
-enum class Permission { Read, Write };
-
-// Address is semantically equivalent to a pointer but also conveys compile time
-// information that helps with instructions selection (aligned/unaligned,
-// temporal/non-temporal).
-template <size_t Alignment, Permission P, Temporality TS> struct Address {
- static_assert(is_power2(Alignment));
- static constexpr size_t ALIGNMENT = Alignment;
- static constexpr Permission PERMISSION = P;
- static constexpr Temporality TEMPORALITY = TS;
- static constexpr bool IS_READ = P == Permission::Read;
- static constexpr bool IS_WRITE = P == Permission::Write;
- using PointeeType = cpp::conditional_t<!IS_WRITE, const ubyte, ubyte>;
- using VoidType = cpp::conditional_t<!IS_WRITE, const void, void>;
-
- Address(VoidType *ptr) : ptr_(reinterpret_cast<PointeeType *>(ptr)) {}
-
- PointeeType *ptr() const {
- return reinterpret_cast<PointeeType *>(
- __builtin_assume_aligned(ptr_, ALIGNMENT));
- }
-
- PointeeType *const ptr_;
-
- template <size_t ByteOffset> auto offset(size_t byte_offset) const {
- static constexpr size_t NewAlignment = commonAlign<ByteOffset>();
- return Address<NewAlignment, PERMISSION, TEMPORALITY>(ptr_ + byte_offset);
- }
-
-private:
- static constexpr size_t gcd(size_t A, size_t B) {
- return B == 0 ? A : gcd(B, A % B);
- }
-
- template <size_t ByteOffset> static constexpr size_t commonAlign() {
- constexpr size_t GCD = gcd(ByteOffset, ALIGNMENT);
- if constexpr (is_power2(GCD))
- return GCD;
- else
- return 1;
- }
-};
-
-template <typename T> struct IsAddressType : public cpp::false_type {};
-template <size_t Alignment, Permission P, Temporality TS>
-struct IsAddressType<Address<Alignment, P, TS>> : public cpp::true_type {};
-
-// Reinterpret the address as a pointer to T.
-// This is not UB since the underlying pointer always refers to a `char` in a
-// buffer of raw data.
-template <typename T, typename AddrT> static T *as(AddrT addr) {
- static_assert(IsAddressType<AddrT>::value);
- return reinterpret_cast<T *>(addr.ptr());
-}
-
-// Offsets the address by a compile time amount, this allows propagating
-// alignment whenever possible.
-template <size_t ByteOffset, typename AddrT>
-static auto offsetAddr(AddrT addr) {
- static_assert(IsAddressType<AddrT>::value);
- return addr.template offset<ByteOffset>(ByteOffset);
-}
-
-// Offsets the address by a runtime amount but assuming that the resulting
-// address will be Alignment aligned.
-template <size_t Alignment, typename AddrT>
-static auto offsetAddrAssumeAligned(AddrT addr, size_t byte_offset) {
- static_assert(IsAddressType<AddrT>::value);
- return Address<Alignment, AddrT::PERMISSION, AddrT::TEMPORALITY>(addr.ptr_ +
- byte_offset);
-}
-
-// Offsets the address by a runtime amount that is assumed to be a multiple of
-// ByteOffset. This allows to propagate the address alignment whenever possible.
-template <size_t ByteOffset, typename AddrT>
-static auto offsetAddrMultiplesOf(AddrT addr, ptr
diff _t byte_offset) {
- static_assert(IsAddressType<AddrT>::value);
- return addr.template offset<ByteOffset>(byte_offset);
-}
-
-// User friendly aliases for common address types.
-template <size_t Alignment>
-using SrcAddr = Address<Alignment, Permission::Read, Temporality::TEMPORAL>;
-template <size_t Alignment>
-using DstAddr = Address<Alignment, Permission::Write, Temporality::TEMPORAL>;
-template <size_t Alignment>
-using NtSrcAddr =
- Address<Alignment, Permission::Read, Temporality::NON_TEMPORAL>;
-template <size_t Alignment>
-using NtDstAddr =
- Address<Alignment, Permission::Write, Temporality::NON_TEMPORAL>;
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H
diff --git a/libc/src/string/memory_utils/algorithm.h b/libc/src/string/memory_utils/algorithm.h
deleted file mode 100644
index 6355ffe04562f..0000000000000
--- a/libc/src/string/memory_utils/algorithm.h
+++ /dev/null
@@ -1,463 +0,0 @@
-//===-- Algorithms to compose sized memory operations ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Higher order primitives that build upon the SizedOpT facility.
-// They constitute the basic blocks for composing memory functions.
-// This file defines the following operations:
-// - Skip
-// - Tail
-// - HeadTail
-// - Loop
-// - Align
-//
-// See each class for documentation.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
-
-#include "src/string/memory_utils/address.h" // Address
-#include "src/string/memory_utils/utils.h" // offset_to_next_aligned
-
-#include <stddef.h> // ptr
diff _t
-
-namespace __llvm_libc {
-
-// We are not yet allowed to use asserts in low level memory operations as
-// assert itself could depend on them.
-// We define this empty macro so we can enable them as soon as possible and keep
-// track of invariants.
-#define LIBC_ASSERT(COND)
-
-// An operation that allows to skip the specified amount of bytes.
-template <ptr
diff _t Bytes> struct Skip {
- template <typename NextT> struct Then {
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value) {
- static_assert(NextT::IS_FIXED_SIZE);
- NextT::set(offsetAddr<Bytes>(dst), value);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
- static_assert(NextT::IS_FIXED_SIZE);
- return NextT::isDifferent(offsetAddr<Bytes>(src1),
- offsetAddr<Bytes>(src2));
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
- static_assert(NextT::IS_FIXED_SIZE);
- return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
- offsetAddr<Bytes>(src2));
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- static_assert(NextT::IS_RUNTIME_SIZE);
- return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
- offsetAddr<Bytes>(src2), runtime_size - Bytes);
- }
- };
-};
-
-// Compute the address of a tail operation.
-// Because of the runtime size, we loose the alignment information.
-template <size_t Size, typename AddrT>
-static auto tailAddr(AddrT addr, size_t runtime_size) {
- static_assert(IsAddressType<AddrT>::value);
- return offsetAddrAssumeAligned<1>(addr, runtime_size - Size);
-}
-
-// Perform the operation on the last 'Size' bytes of the buffer.
-//
-// e.g. with
-// [1234567812345678123]
-// [__XXXXXXXXXXXXXX___]
-// [________XXXXXXXX___]
-//
-// Precondition: `runtime_size >= Size`.
-template <typename SizedOpT> struct Tail {
- static_assert(SizedOpT::IS_FIXED_SIZE);
- static constexpr bool IS_RUNTIME_SIZE = true;
- static constexpr size_t SIZE = SizedOpT::SIZE;
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- SizedOpT::copy(tailAddr<SIZE>(dst, runtime_size),
- tailAddr<SIZE>(src, runtime_size));
- }
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- SizedOpT::move(tailAddr<SIZE>(dst, runtime_size),
- tailAddr<SIZE>(src, runtime_size));
- }
-
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
- SizedOpT::set(tailAddr<SIZE>(dst, runtime_size), value);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- return SizedOpT::isDifferent(tailAddr<SIZE>(src1, runtime_size),
- tailAddr<SIZE>(src2, runtime_size));
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- return SizedOpT::threeWayCmp(tailAddr<SIZE>(src1, runtime_size),
- tailAddr<SIZE>(src2, runtime_size));
- }
-};
-
-// Perform the operation on the first and the last `SizedOpT::Size` bytes of the
-// buffer. This is useful for overlapping operations.
-//
-// e.g. with
-// [1234567812345678123]
-// [__XXXXXXXXXXXXXX___]
-// [__XXXXXXXX_________]
-// [________XXXXXXXX___]
-//
-// Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`.
-template <typename SizedOpT> struct HeadTail {
- static_assert(SizedOpT::IS_FIXED_SIZE);
- static constexpr bool IS_RUNTIME_SIZE = true;
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
- SizedOpT::copy(dst, src);
- Tail<SizedOpT>::copy(dst, src, runtime_size);
- }
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
- static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
- // The load and store operations can be performed in any order as long as
- // they are not interleaved. More investigations are needed to determine the
- // best order.
- auto head = SizedOpT::load(src);
- auto tail = SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
- SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail);
- SizedOpT::store(dst, head);
- }
-
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
- LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
- SizedOpT::set(dst, value);
- Tail<SizedOpT>::set(dst, value, runtime_size);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
- // Two strategies can be applied here:
- // 1. Compute head and tail and compose them with a bitwise or operation.
- // 2. Stop early if head is
diff erent.
- // We chose the later because HeadTail operations are typically performed
- // with sizes ranging from 4 to 256 bytes. The cost of the loads is then
- // significantly larger than the cost of the branch.
- if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
- return res;
- return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- LIBC_ASSERT(runtime_size >= SizedOpT::SIZE &&
- runtime_size <= 2 * SizedOpT::SIZE);
- if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
- return res;
- return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
- }
-};
-
-// Simple loop ending with a Tail operation.
-//
-// e.g. with
-// [12345678123456781234567812345678]
-// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
-// [__XXXXXXXX_______________________]
-// [__________XXXXXXXX_______________]
-// [__________________XXXXXXXX_______]
-// [______________________XXXXXXXX___]
-//
-// Precondition:
-// - runtime_size >= Size
-template <typename SizedOpT> struct Loop {
- static_assert(SizedOpT::IS_FIXED_SIZE);
- static constexpr bool IS_RUNTIME_SIZE = true;
- static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- size_t offset = 0;
- do {
- SizedOpT::copy(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
- offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
- offset += BLOCK_SIZE;
- } while (offset < runtime_size - BLOCK_SIZE);
- Tail<SizedOpT>::copy(dst, src, runtime_size);
- }
-
- // Move forward suitable when dst < src. We load the tail bytes before
- // handling the loop.
- //
- // e.g. Moving two bytes
- // [ | | | | |]
- // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
- // [_________________________LLLLLLLL___]
- // [___LLLLLLLL_________________________]
- // [_SSSSSSSS___________________________]
- // [___________LLLLLLLL_________________]
- // [_________SSSSSSSS___________________]
- // [___________________LLLLLLLL_________]
- // [_________________SSSSSSSS___________]
- // [_______________________SSSSSSSS_____]
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- const auto tail_value =
- SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
- size_t offset = 0;
- do {
- SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
- offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
- offset += BLOCK_SIZE;
- } while (offset < runtime_size - BLOCK_SIZE);
- SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail_value);
- }
-
- // Move backward suitable when dst > src. We load the head bytes before
- // handling the loop.
- //
- // e.g. Moving two bytes
- // [ | | | | |]
- // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
- // [___LLLLLLLL_________________________]
- // [_________________________LLLLLLLL___]
- // [___________________________SSSSSSSS_]
- // [_________________LLLLLLLL___________]
- // [___________________SSSSSSSS_________]
- // [_________LLLLLLLL___________________]
- // [___________SSSSSSSS_________________]
- // [_____SSSSSSSS_______________________]
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move_backward(DstAddrT dst, SrcAddrT src,
- size_t runtime_size) {
- const auto head_value = SizedOpT::load(src);
- ptr
diff _t offset = runtime_size - BLOCK_SIZE;
- do {
- SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
- offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
- offset -= BLOCK_SIZE;
- } while (offset >= 0);
- SizedOpT::store(dst, head_value);
- }
-
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
- size_t offset = 0;
- do {
- SizedOpT::set(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), value);
- offset += BLOCK_SIZE;
- } while (offset < runtime_size - BLOCK_SIZE);
- Tail<SizedOpT>::set(dst, value, runtime_size);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- size_t offset = 0;
- do {
- if (uint64_t res = SizedOpT::isDifferent(
- offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
- offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
- return res;
- offset += BLOCK_SIZE;
- } while (offset < runtime_size - BLOCK_SIZE);
- return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- size_t offset = 0;
- do {
- if (int32_t res = SizedOpT::threeWayCmp(
- offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
- offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
- return res;
- offset += BLOCK_SIZE;
- } while (offset < runtime_size - BLOCK_SIZE);
- return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
- }
-};
-
-// Aligns using a statically-sized operation, then calls the subsequent NextT
-// operation.
-//
-// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
-// Align<_16, Arg::Dst>::Then<Loop<_32>>::copy(dst, src, runtime_size);
-enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
-template <typename SizedOpT, Arg AlignOn = Arg::_1> struct Align {
- static_assert(SizedOpT::IS_FIXED_SIZE);
-
- template <typename NextT> struct Then {
- static_assert(NextT::IS_RUNTIME_SIZE);
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- SizedOpT::copy(dst, src);
- auto aligned = align(dst, src, runtime_size);
- NextT::copy(aligned.arg1, aligned.arg2, aligned.size);
- }
-
- // Move forward suitable when dst < src. The alignment is performed with
- // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
- //
- // e.g. Moving two bytes and making sure src is then aligned.
- // [ | | | | ]
- // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
- // [____LLLLLLLL_____________________]
- // [___________LLLLLLLL______________]
- // [_SSSSSSSS________________________]
- // [________SSSSSSSS_________________]
- //
- // e.g. Moving two bytes and making sure dst is then aligned.
- // [ | | | | ]
- // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
- // [____LLLLLLLL_____________________]
- // [______LLLLLLLL___________________]
- // [_SSSSSSSS________________________]
- // [___SSSSSSSS______________________]
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
- auto aligned_after_begin = align(dst, src, runtime_size);
- // We move pointers forward by Size so we can perform HeadTail.
- auto aligned = aligned_after_begin.stepForward();
- HeadTail<SizedOpT>::move(dst, src, runtime_size - aligned.size);
- NextT::move(aligned.arg1, aligned.arg2, aligned.size);
- }
-
- // Move backward suitable when dst > src. The alignment is performed with
- // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
- //
- // e.g. Moving two bytes backward and making sure src is then aligned.
- // [ | | | | ]
- // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
- // [ _________________LLLLLLLL_______]
- // [ ___________________LLLLLLLL_____]
- // [____________________SSSSSSSS_____]
- // [______________________SSSSSSSS___]
- //
- // e.g. Moving two bytes and making sure dst is then aligned.
- // [ | | | | ]
- // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
- // [ _______________LLLLLLLL_________]
- // [ ___________________LLLLLLLL_____]
- // [__________________SSSSSSSS_______]
- // [______________________SSSSSSSS___]
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move_backward(DstAddrT dst, SrcAddrT src,
- size_t runtime_size) {
- const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size);
- const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size);
- auto aligned_after_end = align(dst_end, src_end, 0);
- // We move pointers back by 2 x Size so we can perform HeadTail.
- auto aligned = aligned_after_end.stepBack().stepBack();
- HeadTail<SizedOpT>::move(aligned.arg1, aligned.arg2, aligned.size);
- NextT::move_backward(dst, src, runtime_size - aligned.size);
- }
-
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
- SizedOpT::set(dst, value);
- DstAddrT _(nullptr);
- auto aligned = align(dst, _, runtime_size);
- NextT::set(aligned.arg1, value, aligned.size);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
- return res;
- auto aligned = align(src1, src2, runtime_size);
- return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
- size_t runtime_size) {
- if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
- return res;
- auto aligned = align(src1, src2, runtime_size);
- return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size);
- }
- };
-
-private:
- static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE;
- static_assert(ALIGN_OP_SIZE > 1);
-
- template <typename Arg1AddrT, typename Arg2AddrT> struct Aligned {
- Arg1AddrT arg1;
- Arg2AddrT arg2;
- size_t size;
-
- Aligned stepForward() const {
- return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, ALIGN_OP_SIZE),
- offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, ALIGN_OP_SIZE),
- size - ALIGN_OP_SIZE};
- }
-
- Aligned stepBack() const {
- return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, -ALIGN_OP_SIZE),
- offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, -ALIGN_OP_SIZE),
- size + ALIGN_OP_SIZE};
- }
- };
-
- template <typename Arg1AddrT, typename Arg2AddrT>
- static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) {
- return Aligned<Arg1AddrT, Arg2AddrT>{arg1, arg2, size};
- }
-
- template <typename Arg1AddrT, typename Arg2AddrT>
- static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) {
- static_assert(IsAddressType<Arg1AddrT>::value);
- static_assert(IsAddressType<Arg2AddrT>::value);
- if constexpr (AlignOn == Arg::_1) {
- auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg1.ptr_);
- return makeAligned(offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg1, offset),
- offsetAddrAssumeAligned<1>(arg2, offset),
- runtime_size - offset);
- } else if constexpr (AlignOn == Arg::_2) {
- auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg2.ptr_);
- return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset),
- offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg2, offset),
- runtime_size - offset);
- } else {
- DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2");
- }
- }
-};
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
diff --git a/libc/src/string/memory_utils/backend_aarch64.h b/libc/src/string/memory_utils/backend_aarch64.h
deleted file mode 100644
index 8077a098ff9c0..0000000000000
--- a/libc/src/string/memory_utils/backend_aarch64.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===-- Elementary operations for aarch64 ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
-
-#if !defined(LLVM_LIBC_ARCH_AARCH64)
-#include "src/string/memory_utils/backend_scalar.h"
-
-#ifdef __ARM_NEON
-#include <arm_neon.h>
-#endif
-
-namespace __llvm_libc {
-
-struct Aarch64Backend : public Scalar64BitBackend {
- static constexpr bool IS_BACKEND_TYPE = true;
-
- template <typename T, Temporality TS, Aligned AS,
- cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline T load(const T *src) {
- return Scalar64BitBackend::template load<T, TS, AS>(src);
- }
-};
-
-// Implementation of the SizedOp abstraction for the set operation.
-struct Zva64 {
- static constexpr size_t SIZE = 64;
-
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value) {
-#if __SIZEOF_POINTER__ == 4
- asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
-#else
- asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
-#endif
- }
-};
-
-inline static bool hasZva() {
- uint64_t zva_val;
- asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
- // DC ZVA is permitted if DZP, bit [4] is zero.
- // BS, bits [3:0] is log2 of the block size in words.
- // So the next line checks whether the instruction is permitted and block size
- // is 16 words (i.e. 64 bytes).
- return (zva_val & 0b11111) == 0b00100;
-}
-
-namespace aarch64 {
-using _1 = SizedOp<Aarch64Backend, 1>;
-using _2 = SizedOp<Aarch64Backend, 2>;
-using _3 = SizedOp<Aarch64Backend, 3>;
-using _4 = SizedOp<Aarch64Backend, 4>;
-using _8 = SizedOp<Aarch64Backend, 8>;
-using _16 = SizedOp<Aarch64Backend, 16>;
-using _32 = SizedOp<Aarch64Backend, 32>;
-using _64 = SizedOp<Aarch64Backend, 64>;
-using _128 = SizedOp<Aarch64Backend, 128>;
-} // namespace aarch64
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_ARCH_AARCH64
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
diff --git a/libc/src/string/memory_utils/backend_scalar.h b/libc/src/string/memory_utils/backend_scalar.h
deleted file mode 100644
index dba36b159baa6..0000000000000
--- a/libc/src/string/memory_utils/backend_scalar.h
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- Elementary operations for native scalar types ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
-
-#include "src/__support/CPP/type_traits.h" // ConditionalType, enable_if_t
-#include "src/__support/endian.h"
-
-namespace __llvm_libc {
-
-struct Scalar64BitBackend {
- static constexpr bool IS_BACKEND_TYPE = true;
-
- template <typename T>
- static constexpr bool IsScalarType =
- cpp::is_same_v<T, uint8_t> || cpp::is_same_v<T, uint16_t> ||
- cpp::is_same_v<T, uint32_t> || cpp::is_same_v<T, uint64_t>;
-
- template <typename T, Temporality TS, Aligned AS>
- static inline T load(const T *src) {
- static_assert(IsScalarType<T>);
- static_assert(TS == Temporality::TEMPORAL,
- "Scalar load does not support non-temporal access");
- return *src;
- }
-
- template <typename T, Temporality TS, Aligned AS>
- static inline void store(T *dst, T value) {
- static_assert(IsScalarType<T>);
- static_assert(TS == Temporality::TEMPORAL,
- "Scalar store does not support non-temporal access");
- *dst = value;
- }
-
- template <typename T> static inline T splat(ubyte value) {
- static_assert(IsScalarType<T>);
- return (T(~0ULL) / T(0xFF)) * T(value);
- }
-
- template <typename T> static inline uint64_t notEquals(T v1, T v2) {
- static_assert(IsScalarType<T>);
- return v1 ^ v2;
- }
-
- template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
- DeferredStaticAssert("not implemented");
- }
-
- // Returns the type to use to consume Size bytes.
- template <size_t Size>
- using getNextType = cpp::conditional_t<
- Size >= 8, uint64_t,
- cpp::conditional_t<Size >= 4, uint32_t,
- cpp::conditional_t<Size >= 2, uint16_t, uint8_t>>>;
-};
-
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint8_t>(uint8_t a, uint8_t b) {
- const int16_t la = Endian::to_big_endian(a);
- const int16_t lb = Endian::to_big_endian(b);
- return la - lb;
-}
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint16_t>(uint16_t a,
- uint16_t b) {
- const int32_t la = Endian::to_big_endian(a);
- const int32_t lb = Endian::to_big_endian(b);
- return la - lb;
-}
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint32_t>(uint32_t a,
- uint32_t b) {
- const uint32_t la = Endian::to_big_endian(a);
- const uint32_t lb = Endian::to_big_endian(b);
- return la > lb ? 1 : la < lb ? -1 : 0;
-}
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint64_t>(uint64_t a,
- uint64_t b) {
- const uint64_t la = Endian::to_big_endian(a);
- const uint64_t lb = Endian::to_big_endian(b);
- return la > lb ? 1 : la < lb ? -1 : 0;
-}
-
-namespace scalar {
-using _1 = SizedOp<Scalar64BitBackend, 1>;
-using _2 = SizedOp<Scalar64BitBackend, 2>;
-using _3 = SizedOp<Scalar64BitBackend, 3>;
-using _4 = SizedOp<Scalar64BitBackend, 4>;
-using _8 = SizedOp<Scalar64BitBackend, 8>;
-using _16 = SizedOp<Scalar64BitBackend, 16>;
-using _32 = SizedOp<Scalar64BitBackend, 32>;
-using _64 = SizedOp<Scalar64BitBackend, 64>;
-using _128 = SizedOp<Scalar64BitBackend, 128>;
-} // namespace scalar
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
diff --git a/libc/src/string/memory_utils/backend_x86.h b/libc/src/string/memory_utils/backend_x86.h
deleted file mode 100644
index cfdfcdf90131c..0000000000000
--- a/libc/src/string/memory_utils/backend_x86.h
+++ /dev/null
@@ -1,219 +0,0 @@
-//===-- Elementary operations for x86 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
-
-#if defined(LLVM_LIBC_ARCH_X86)
-#include "src/__support/CPP/type_traits.h" // ConditionalType, enable_if_t
-#include "src/string/memory_utils/backend_scalar.h"
-
-#ifdef __SSE2__
-#include <immintrin.h>
-#endif // __SSE2__
-
-#if defined(__SSE2__)
-#define HAS_M128 true
-#else
-#define HAS_M128 false
-#endif
-
-#if defined(__AVX2__)
-#define HAS_M256 true
-#else
-#define HAS_M256 false
-#endif
-
-#if defined(__AVX512F__) and defined(__AVX512BW__)
-#define HAS_M512 true
-#else
-#define HAS_M512 false
-#endif
-
-namespace __llvm_libc {
-struct X86Backend : public Scalar64BitBackend {
- static constexpr bool IS_BACKEND_TYPE = true;
-
- // Scalar types use base class implementations.
- template <typename T, Temporality TS, Aligned AS,
- cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline T load(const T *src) {
- return Scalar64BitBackend::template load<T, TS, AS>(src);
- }
-
- // Scalar types use base class implementations.
- template <typename T, Temporality TS, Aligned AS,
- cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline void store(T *dst, T value) {
- Scalar64BitBackend::template store<T, TS, AS>(dst, value);
- }
-
- // Scalar types use base class implementations.
- template <typename T,
- cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline uint64_t notEquals(T v1, T v2) {
- return Scalar64BitBackend::template notEquals<T>(v1, v2);
- }
-
- // Scalar types use base class implementations.
- template <typename T,
- cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline T splat(ubyte value) {
- return Scalar64BitBackend::template splat<T>(value);
- }
-
- // Scalar types use base class implementations.
- template <typename T,
- cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline int32_t threeWayCmp(T v1, T v2) {
- return Scalar64BitBackend::template threeWayCmp<T>(v1, v2);
- }
-
- // X86 types are specialized below.
- template <typename T, Temporality TS, Aligned AS,
- cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline T load(const T *src);
-
- // X86 types are specialized below.
- template <typename T, Temporality TS, Aligned AS,
- cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline void store(T *dst, T value);
-
- // X86 types are specialized below.
- template <typename T,
- cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline T splat(ubyte value);
-
- // X86 types are specialized below.
- template <typename T,
- cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline uint64_t notEquals(T v1, T v2);
-
- template <typename T,
- cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
- static inline int32_t threeWayCmp(T v1, T v2) {
- return char_
diff (reinterpret_cast<char *>(&v1),
- reinterpret_cast<char *>(&v2), notEquals(v1, v2));
- }
-
- // Returns the type to use to consume Size bytes.
- template <size_t Size>
- using getNextType = cpp::conditional_t<
- (HAS_M512 && Size >= 64), __m512i,
- cpp::conditional_t<
- (HAS_M256 && Size >= 32), __m256i,
- cpp::conditional_t<(HAS_M128 && Size >= 16), __m128i,
- Scalar64BitBackend::getNextType<Size>>>>;
-
-private:
- static inline int32_t char_
diff (const char *a, const char *b, uint64_t mask) {
- const size_t
diff _index = mask == 0 ? 0 : __builtin_ctzll(mask);
- const int16_t ca = (unsigned char)a[
diff _index];
- const int16_t cb = (unsigned char)b[
diff _index];
- return ca - cb;
- }
-};
-
-static inline void repmovsb(void *dst, const void *src, size_t runtime_size) {
- asm volatile("rep movsb"
- : "+D"(dst), "+S"(src), "+c"(runtime_size)
- :
- : "memory");
-}
-
-#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \
- template <> inline T X86Backend::load<T, OS, AS>(const T *src) { \
- return INTRISIC(const_cast<T *>(src)); \
- }
-#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \
- template <> inline void X86Backend::store<T, OS, AS>(T * dst, T value) { \
- INTRISIC(dst, value); \
- }
-
-#if HAS_M128
-SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128)
-SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128)
-SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
- _mm_stream_load_si128)
-// X86 non-temporal load needs aligned access
-SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128)
-SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128)
-SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
- _mm_stream_si128)
-// X86 non-temporal store needs aligned access
-template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) {
- return _mm_set1_epi8(__builtin_bit_cast(char, value));
-}
-template <>
-inline uint64_t X86Backend::notEquals<__m128i>(__m128i a, __m128i b) {
- using T = char __attribute__((__vector_size__(16)));
- return _mm_movemask_epi8(T(a) != T(b));
-}
-#endif // HAS_M128
-
-#if HAS_M256
-SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256)
-SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256)
-SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
- _mm256_stream_load_si256)
-// X86 non-temporal load needs aligned access
-SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES,
- _mm256_store_si256)
-SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO,
- _mm256_storeu_si256)
-SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
- _mm256_stream_si256)
-// X86 non-temporal store needs aligned access
-template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) {
- return _mm256_set1_epi8(__builtin_bit_cast(char, value));
-}
-template <>
-inline uint64_t X86Backend::notEquals<__m256i>(__m256i a, __m256i b) {
- using T = char __attribute__((__vector_size__(32)));
- return _mm256_movemask_epi8(T(a) != T(b));
-}
-#endif // HAS_M256
-
-#if HAS_M512
-SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512)
-SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512)
-SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
- _mm512_stream_load_si512)
-// X86 non-temporal load needs aligned access
-SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES,
- _mm512_store_si512)
-SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO,
- _mm512_storeu_si512)
-SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
- _mm512_stream_si512)
-// X86 non-temporal store needs aligned access
-template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) {
- return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value)));
-}
-template <>
-inline uint64_t X86Backend::notEquals<__m512i>(__m512i a, __m512i b) {
- return _mm512_cmpneq_epi8_mask(a, b);
-}
-#endif // HAS_M512
-
-namespace x86 {
-using _1 = SizedOp<X86Backend, 1>;
-using _2 = SizedOp<X86Backend, 2>;
-using _3 = SizedOp<X86Backend, 3>;
-using _4 = SizedOp<X86Backend, 4>;
-using _8 = SizedOp<X86Backend, 8>;
-using _16 = SizedOp<X86Backend, 16>;
-using _32 = SizedOp<X86Backend, 32>;
-using _64 = SizedOp<X86Backend, 64>;
-using _128 = SizedOp<X86Backend, 128>;
-} // namespace x86
-
-} // namespace __llvm_libc
-
-#endif // defined(LLVM_LIBC_ARCH_X86)
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
diff --git a/libc/src/string/memory_utils/backends.h b/libc/src/string/memory_utils/backends.h
deleted file mode 100644
index 6d241fa5eb289..0000000000000
--- a/libc/src/string/memory_utils/backends.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Elementary operations to compose memory primitives ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the concept of a Backend.
-// It constitutes the lowest level of the framework and is akin to instruction
-// selection. It defines how to implement aligned/unaligned,
-// temporal/non-temporal native loads and stores for a particular architecture
-// as well as efficient ways to fill and compare types.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
-
-#include "src/string/memory_utils/address.h" // Temporality, Aligned
-#include "src/string/memory_utils/sized_op.h" // SizedOp
-#include <stddef.h> // size_t
-#include <stdint.h> // uint##_t
-
-namespace __llvm_libc {
-
-// Backends must implement the following interface.
-struct NoBackend {
- static constexpr bool IS_BACKEND_TYPE = true;
-
- // Loads a T from `src` honoring Temporality and Alignment.
- template <typename T, Temporality, Aligned> static T load(const T *src);
-
- // Stores a T to `dst` honoring Temporality and Alignment.
- template <typename T, Temporality, Aligned>
- static void store(T *dst, T value);
-
- // Returns a T filled with `value` bytes.
- template <typename T> static T splat(ubyte value);
-
- // Returns zero iff v1 == v2.
- template <typename T> static uint64_t notEquals(T v1, T v2);
-
- // Returns zero iff v1 == v2, a negative number if v1 < v2 and a positive
- // number otherwise.
- template <typename T> static int32_t threeWayCmp(T v1, T v2);
-
- // Returns the type to use to consume Size bytes.
- // If no type handles Size bytes at once
- template <size_t Size> using getNextType = void;
-};
-
-} // namespace __llvm_libc
-
-// We inline all backend implementations here to simplify the build system.
-// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef.
-#include "src/string/memory_utils/backend_aarch64.h"
-#include "src/string/memory_utils/backend_scalar.h"
-#include "src/string/memory_utils/backend_x86.h"
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h
deleted file mode 100644
index 2bca50d6c56d1..0000000000000
--- a/libc/src/string/memory_utils/sized_op.h
+++ /dev/null
@@ -1,180 +0,0 @@
-//===-- Sized Operations --------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SizedOp struct that serves as the middle end of the
-// framework. It implements sized memory operations by breaking them down into
-// simpler types whose availability is described in the Backend. It also
-// provides a way to load and store sized chunks of memory (necessary for the
-// move operation). SizedOp are the building blocks of higher order algorithms
-// like HeadTail, Align or Loop.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
-
-#include <stddef.h> // size_t
-
-#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
-#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \
- __has_builtin(__builtin_memcpy_inline)
-#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
-
-#ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
-#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \
- __has_builtin(__builtin_memset_inline)
-#endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
-
-namespace __llvm_libc {
-
-template <typename Backend, size_t Size> struct SizedOp {
- static constexpr size_t SIZE = Size;
- // Define instantiations of SizedOp as a fixed size operation.
- // i.e. an operation that is composable by types in algorithm.h
- static constexpr bool IS_FIXED_SIZE = true;
-
-private:
- static_assert(Backend::IS_BACKEND_TYPE);
- static_assert(SIZE > 0);
- using type = typename Backend::template getNextType<Size>;
- static constexpr size_t TYPE_SIZE = sizeof(type);
- static_assert(SIZE >= TYPE_SIZE);
- static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE;
- using NextBlock = SizedOp<Backend, NEXT_SIZE>;
-
- // Returns whether we can use an aligned operations.
- // This is possible because the address type carries known compile-time
- // alignment informations.
- template <typename T, typename AddrT> static constexpr Aligned isAligned() {
- static_assert(IsAddressType<AddrT>::value);
- return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES
- : Aligned::NO;
- }
-
- // Loads a value of the current `type` from `src`.
- // This function is responsible for extracting Temporality and Alignment from
- // the Address type.
- template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) {
- static_assert(IsAddressType<SrcAddrT>::value && SrcAddrT::IS_READ);
- constexpr auto AS = isAligned<type, SrcAddrT>();
- constexpr auto TS = SrcAddrT::TEMPORALITY;
- return Backend::template load<type, TS, AS>(as<const type>(src));
- }
-
- // Stores a value of the current `type` to `dst`.
- // This function is responsible for extracting Temporality and Alignment from
- // the Address type.
- template <typename DstAddrT>
- static inline void nativeStore(type value, DstAddrT dst) {
- static_assert(IsAddressType<DstAddrT>::value && DstAddrT::IS_WRITE);
- constexpr auto AS = isAligned<type, DstAddrT>();
- constexpr auto TS = DstAddrT::TEMPORALITY;
- return Backend::template store<type, TS, AS>(as<type>(dst), value);
- }
-
- // A well aligned POD structure to store Size bytes.
- // This is used to implement the move operations.
- struct Value {
- alignas(alignof(type)) ubyte payload[Size];
- };
-
-public:
- template <typename DstAddrT, typename SrcAddrT>
- static inline void copy(DstAddrT dst, SrcAddrT src) {
- static_assert(IsAddressType<DstAddrT>::value && DstAddrT::IS_WRITE);
- static_assert(IsAddressType<SrcAddrT>::value && SrcAddrT::IS_READ);
- if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE &&
- DstAddrT::TEMPORALITY == Temporality::TEMPORAL &&
- SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) {
- // delegate optimized copy to compiler.
- __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size);
- return;
- }
- nativeStore(nativeLoad(src), dst);
- if constexpr (NEXT_SIZE > 0)
- NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
- }
-
- template <typename DstAddrT, typename SrcAddrT>
- static inline void move(DstAddrT dst, SrcAddrT src) {
- const auto payload = nativeLoad(src);
- if constexpr (NEXT_SIZE > 0)
- NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
- nativeStore(payload, dst);
- }
-
- template <typename DstAddrT>
- static inline void set(DstAddrT dst, ubyte value) {
- if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE &&
- DstAddrT::TEMPORALITY == Temporality::TEMPORAL) {
- // delegate optimized set to compiler.
- __builtin_memset_inline(dst.ptr(), static_cast<int>(value), Size);
- return;
- }
- nativeStore(Backend::template splat<type>(value), dst);
- if constexpr (NEXT_SIZE > 0)
- NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value);
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
- const uint64_t current =
- Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2));
- if constexpr (NEXT_SIZE > 0) {
- // In the case where we cannot handle Size with single operation (e.g.
- // Size == 3) we can either return early if current is non zero or
- // aggregate all the operations through the bitwise or operator.
- // We chose the later to reduce branching.
- return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1),
- offsetAddr<TYPE_SIZE>(src2)));
- } else {
- return current;
- }
- }
-
- template <typename SrcAddrT1, typename SrcAddrT2>
- static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
- const auto a = nativeLoad(src1);
- const auto b = nativeLoad(src2);
- // If we cannot handle Size as a single operation we have two choices:
- // - Either use Backend's threeWayCmp directly and return it is non
- // zero.
- //
- // if (int32_t res = Backend::template threeWayCmp<type>(a, b))
- // return res;
- //
- // - Or use Backend's notEquals first and use threeWayCmp only if
- //
diff erent, the assumption here is that notEquals is faster than
- // threeWayCmp and that we can save cycles when the Size needs to be
- // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1)
- //
- // if (Backend::template notEquals<type>(a, b))
- // return Backend::template threeWayCmp<type>(a, b);
- //
- // We chose the former to reduce code bloat and branching.
- if (int32_t res = Backend::template threeWayCmp<type>(a, b))
- return res;
- if constexpr (NEXT_SIZE > 0)
- return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1),
- offsetAddr<TYPE_SIZE>(src2));
- return 0;
- }
-
- template <typename SrcAddrT> static Value load(SrcAddrT src) {
- Value output;
- copy(DstAddr<alignof(type)>(output.payload), src);
- return output;
- }
-
- template <typename DstAddrT> static void store(DstAddrT dst, Value value) {
- copy(dst, SrcAddr<alignof(type)>(value.payload));
- }
-};
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt
index 4d8e45d8cdce5..8f926273de5d5 100644
--- a/libc/test/src/string/memory_utils/CMakeLists.txt
+++ b/libc/test/src/string/memory_utils/CMakeLists.txt
@@ -3,8 +3,6 @@ add_libc_unittest(
SUITE
libc_string_unittests
SRCS
- address_test.cpp
- backend_test.cpp
elements_test.cpp
memory_access_test.cpp
utils_test.cpp
@@ -17,19 +15,3 @@ add_libc_unittest(
libc.src.__support.CPP.array
libc.src.__support.CPP.span
)
-
-if(NOT LLVM_LIBC_FULL_BUILD)
-# Disabling this unittest in fullbuild mode as #include<sstream> is pulling an
-# incomplete pthread implementation from llvm-libc.
-add_libc_unittest(
- algorithm_test
- SUITE
- libc_string_unittests
- SRCS
- algorithm_test.cpp
- DEPENDS
- libc.src.string.memory_utils.memory_utils
- libc.src.__support.CPP.array
- libc.src.__support.CPP.span
-)
-endif()
diff --git a/libc/test/src/string/memory_utils/address_test.cpp b/libc/test/src/string/memory_utils/address_test.cpp
deleted file mode 100644
index fe9361ba573e5..0000000000000
--- a/libc/test/src/string/memory_utils/address_test.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "utils/UnitTest/Test.h"
-#include <src/string/memory_utils/address.h>
-
-namespace __llvm_libc {
-
-TEST(LlvmLibcAddress, AliasAreAddresses) {
- ASSERT_TRUE(IsAddressType<SrcAddr<1>>::value);
- ASSERT_TRUE(IsAddressType<DstAddr<1>>::value);
- ASSERT_TRUE(IsAddressType<NtSrcAddr<1>>::value);
- ASSERT_TRUE(IsAddressType<NtDstAddr<1>>::value);
-}
-
-TEST(LlvmLibcAddress, AliasHaveRightPermissions) {
- ASSERT_TRUE(SrcAddr<1>::IS_READ);
- ASSERT_TRUE(NtSrcAddr<1>::IS_READ);
- ASSERT_TRUE(DstAddr<1>::IS_WRITE);
- ASSERT_TRUE(NtDstAddr<1>::IS_WRITE);
-}
-
-TEST(LlvmLibcAddress, AliasHaveRightSemantic) {
- ASSERT_EQ(SrcAddr<1>::TEMPORALITY, Temporality::TEMPORAL);
- ASSERT_EQ(DstAddr<1>::TEMPORALITY, Temporality::TEMPORAL);
- ASSERT_EQ(NtSrcAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL);
- ASSERT_EQ(NtDstAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL);
-}
-
-TEST(LlvmLibcAddress, AliasHaveRightAlignment) {
- ASSERT_EQ(SrcAddr<1>::ALIGNMENT, size_t(1));
- ASSERT_EQ(SrcAddr<4>::ALIGNMENT, size_t(4));
-}
-
-TEST(LlvmLibcAddress, NarrowAlignment) {
- // Address 8-byte aligned, offset by 8.
- ASSERT_EQ(offsetAddr<8>(SrcAddr<8>(nullptr)).ALIGNMENT, size_t(8));
- // Address 16-byte aligned, offset by 4.
- ASSERT_EQ(offsetAddr<4>(SrcAddr<16>(nullptr)).ALIGNMENT, size_t(4));
- // Address 4-byte aligned, offset by 16.
- ASSERT_EQ(offsetAddr<16>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(4));
- // Address 4-byte aligned, offset by 1.
- ASSERT_EQ(offsetAddr<1>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(1));
- // Address 4-byte aligned, offset by 2.
- ASSERT_EQ(offsetAddr<2>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2));
- // Address 4-byte aligned, offset by 6.
- ASSERT_EQ(offsetAddr<6>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2));
- // Address 4-byte aligned, offset by 10.
- ASSERT_EQ(offsetAddr<10>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2));
- // Address 8-byte aligned, offset by 6.
- ASSERT_EQ(offsetAddr<6>(SrcAddr<8>(nullptr)).ALIGNMENT, size_t(2));
-}
-
-TEST(LlvmLibcAddress, OffsetAddr) {
- ubyte a;
- SrcAddr<1> addr(&a);
- ASSERT_EQ((const void *)offsetAddr<4>(addr).ptr(), (const void *)(&a + 4));
- ASSERT_EQ((const void *)offsetAddr<32>(addr).ptr(), (const void *)(&a + 32));
-}
-
-TEST(LlvmLibcAddress, AssumeAligned) {
- SrcAddr<16> addr(nullptr);
- ASSERT_EQ(offsetAddrAssumeAligned<8>(addr, 0).ALIGNMENT, size_t(8));
- ASSERT_EQ(offsetAddrAssumeAligned<1>(addr, 0).ALIGNMENT, size_t(1));
- ASSERT_EQ(offsetAddrMultiplesOf<4>(addr, 0).ALIGNMENT, size_t(4));
- ASSERT_EQ(offsetAddrMultiplesOf<32>(addr, 0).ALIGNMENT, size_t(16));
-}
-
-TEST(LlvmLibcAddress, offsetAddrAssumeAligned) {
- ubyte a;
- SrcAddr<1> addr(&a);
- ASSERT_EQ((const void *)offsetAddrAssumeAligned<1>(addr, 17).ptr(),
- (const void *)(&a + 17));
-}
-
-TEST(LlvmLibcAddress, offsetAddrMultiplesOf) {
- ubyte a;
- SrcAddr<1> addr(&a);
- ASSERT_EQ((const void *)offsetAddrMultiplesOf<4>(addr, 16).ptr(),
- (const void *)(&a + 16));
-}
-
-} // namespace __llvm_libc
diff --git a/libc/test/src/string/memory_utils/algorithm_test.cpp b/libc/test/src/string/memory_utils/algorithm_test.cpp
deleted file mode 100644
index d973fbcd5c19a..0000000000000
--- a/libc/test/src/string/memory_utils/algorithm_test.cpp
+++ /dev/null
@@ -1,566 +0,0 @@
-#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 0
-#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 0
-
-#include "utils/UnitTest/Test.h"
-#include <src/__support/CPP/array.h>
-#include <src/string/memory_utils/algorithm.h>
-#include <src/string/memory_utils/backends.h>
-
-#include <sstream>
-
-namespace __llvm_libc {
-
-struct alignas(64) Buffer : cpp::array<char, 128> {
- bool contains(const char *ptr) const {
- return ptr >= data() && ptr < (data() + size());
- }
- size_t getOffset(const char *ptr) const { return ptr - data(); }
- void fill(char c) {
- for (auto itr = begin(); itr != end(); ++itr)
- *itr = c;
- }
-};
-
-static Buffer buffer1;
-static Buffer buffer2;
-static std::ostringstream LOG;
-
-struct TestBackend {
- static constexpr bool IS_BACKEND_TYPE = true;
-
- template <typename T> static void log(const char *Action, const char *ptr) {
- LOG << Action << "<" << sizeof(T) << "> ";
- if (buffer1.contains(ptr))
- LOG << "a[" << buffer1.getOffset(ptr) << "]";
- else if (buffer2.contains(ptr))
- LOG << "b[" << buffer2.getOffset(ptr) << "]";
- LOG << "\n";
- }
-
- template <typename T, Temporality TS, Aligned AS>
- static T load(const T *src) {
- log<T>((AS == Aligned::YES ? "LdA" : "LdU"),
- reinterpret_cast<const char *>(src));
- return Scalar64BitBackend::load<T, TS, AS>(src);
- }
-
- template <typename T, Temporality TS, Aligned AS>
- static void store(T *dst, T value) {
- log<T>((AS == Aligned::YES ? "StA" : "StU"),
- reinterpret_cast<const char *>(dst));
- Scalar64BitBackend::store<T, TS, AS>(dst, value);
- }
-
- template <typename T> static inline T splat(ubyte value) {
- LOG << "Splat<" << sizeof(T) << "> " << (unsigned)value << '\n';
- return Scalar64BitBackend::splat<T>(value);
- }
-
- template <typename T> static inline uint64_t notEquals(T v1, T v2) {
- LOG << "Neq<" << sizeof(T) << ">\n";
- return Scalar64BitBackend::notEquals<T>(v1, v2);
- }
-
- template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
- LOG << "Diff<" << sizeof(T) << ">\n";
- return Scalar64BitBackend::threeWayCmp<T>(v1, v2);
- }
-
- template <size_t Size>
- using getNextType = Scalar64BitBackend::getNextType<Size>;
-};
-
-struct LlvmLibcAlgorithm : public testing::Test {
- void SetUp() override {
- LOG = std::ostringstream();
- LOG << '\n';
- }
-
- void fillEqual() {
- buffer1.fill('a');
- buffer2.fill('a');
- }
-
- void fillDifferent() {
- buffer1.fill('a');
- buffer2.fill('b');
- }
-
- const char *getTrace() {
- trace_ = LOG.str();
- return trace_.c_str();
- }
-
- const char *stripComments(const char *expected) {
- expected_.clear();
- std::stringstream ss(expected);
- std::string line;
- while (std::getline(ss, line, '\n')) {
- const auto pos = line.find('#');
- if (pos == std::string::npos) {
- expected_ += line;
- } else {
- auto log = line.substr(0, pos);
- while (!log.empty() && std::isspace(log.back()))
- log.pop_back();
- expected_ += log;
- }
- expected_ += '\n';
- }
- return expected_.c_str();
- }
-
- template <size_t Align = 1> SrcAddr<Align> buf1(size_t offset = 0) const {
- return buffer1.data() + offset;
- }
- template <size_t Align = 1> SrcAddr<Align> buf2(size_t offset = 0) const {
- return buffer2.data() + offset;
- }
- template <size_t Align = 1> DstAddr<Align> dst(size_t offset = 0) const {
- return buffer1.data() + offset;
- }
- template <size_t Align = 1> SrcAddr<Align> src(size_t offset = 0) const {
- return buffer2.data() + offset;
- }
-
-private:
- std::string trace_;
- std::string expected_;
-};
-
-using _8 = SizedOp<TestBackend, 8>;
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing fixed fized forward operations
-///////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////
-// Copy
-
-TEST_F(LlvmLibcAlgorithm, copy_1) {
- SizedOp<TestBackend, 1>::copy(dst(), src());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> b[0]
-StU<1> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, copy_15) {
- SizedOp<TestBackend, 15>::copy(dst(), src());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0]
-LdU<4> b[8]
-StU<4> a[8]
-LdU<2> b[12]
-StU<2> a[12]
-LdU<1> b[14]
-StU<1> a[14]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, copy_16) {
- SizedOp<TestBackend, 16>::copy(dst(), src());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0]
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Move
-
-TEST_F(LlvmLibcAlgorithm, move_1) {
- SizedOp<TestBackend, 1>::move(dst(), src());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> b[0]
-StU<1> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, move_15) {
- SizedOp<TestBackend, 15>::move(dst(), src());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-LdU<4> b[8]
-LdU<2> b[12]
-LdU<1> b[14]
-StU<1> a[14]
-StU<2> a[12]
-StU<4> a[8]
-StU<8> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, move_16) {
- SizedOp<TestBackend, 16>::move(dst(), src());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-LdU<8> b[8]
-StU<8> a[8]
-StU<8> a[0]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// set
-
-TEST_F(LlvmLibcAlgorithm, set_1) {
- SizedOp<TestBackend, 1>::set(dst(), ubyte{42});
- EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<1> 42
-StU<1> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, set_15) {
- SizedOp<TestBackend, 15>::set(dst(), ubyte{42});
- EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<8> 42
-StU<8> a[0]
-Splat<4> 42
-StU<4> a[8]
-Splat<2> 42
-StU<2> a[12]
-Splat<1> 42
-StU<1> a[14]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, set_16) {
- SizedOp<TestBackend, 16>::set(dst(), ubyte{42});
- EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<8> 42
-StU<8> a[0]
-Splat<8> 42
-StU<8> a[8]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//
diff erent
-
-TEST_F(LlvmLibcAlgorithm,
diff erent_1) {
- fillEqual();
- SizedOp<TestBackend, 1>::isDifferent(buf1(), buf2());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[0]
-LdU<1> b[0]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm,
diff erent_15) {
- fillEqual();
- SizedOp<TestBackend, 15>::isDifferent(buf1(), buf2());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Neq<8>
-LdU<4> a[8]
-LdU<4> b[8]
-Neq<4>
-LdU<2> a[12]
-LdU<2> b[12]
-Neq<2>
-LdU<1> a[14]
-LdU<1> b[14]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm,
diff erent_15_no_shortcircuit) {
- fillDifferent();
- SizedOp<TestBackend, 15>::isDifferent(buf1(), buf2());
- // If buffer compare isDifferent we continue to aggregate.
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Neq<8>
-LdU<4> a[8]
-LdU<4> b[8]
-Neq<4>
-LdU<2> a[12]
-LdU<2> b[12]
-Neq<2>
-LdU<1> a[14]
-LdU<1> b[14]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm,
diff erent_16) {
- fillEqual();
- SizedOp<TestBackend, 16>::isDifferent(buf1(), buf2());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Neq<8>
-LdU<8> a[8]
-LdU<8> b[8]
-Neq<8>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// three_way_cmp
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_1) {
- fillEqual();
- SizedOp<TestBackend, 1>::threeWayCmp(buf1(), buf2());
- // Buffer compare equal, returning 0 and no call to Diff.
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[0]
-LdU<1> b[0]
-Diff<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_15) {
- fillEqual();
- SizedOp<TestBackend, 15>::threeWayCmp(buf1(), buf2());
- // Buffer compare equal, returning 0 and no call to Diff.
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Diff<8>
-LdU<4> a[8]
-LdU<4> b[8]
-Diff<4>
-LdU<2> a[12]
-LdU<2> b[12]
-Diff<2>
-LdU<1> a[14]
-LdU<1> b[14]
-Diff<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_neq_15_shortcircuit) {
- fillDifferent();
- SizedOp<TestBackend, 16>::threeWayCmp(buf1(), buf2());
- // If buffer compare isDifferent we stop early.
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Diff<8>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_16) {
- fillEqual();
- SizedOp<TestBackend, 16>::threeWayCmp(buf1(), buf2());
- // Buffer compare equal, returning 0 and no call to Diff.
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Diff<8>
-LdU<8> a[8]
-LdU<8> b[8]
-Diff<8>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing skip operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, skip_and_set) {
- Skip<11>::Then<SizedOp<TestBackend, 1>>::set(dst(), ubyte{42});
- EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<1> 42
-StU<1> a[11]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, skip_and_
diff erent_1) {
- Skip<11>::Then<SizedOp<TestBackend, 1>>::isDifferent(buf1(), buf2());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[11]
-LdU<1> b[11]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, skip_and_three_way_cmp_8) {
- Skip<11>::Then<SizedOp<TestBackend, 1>>::threeWayCmp(buf1(), buf2());
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[11]
-LdU<1> b[11]
-Diff<1>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing tail operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, tail_copy_8) {
- Tail<_8>::copy(dst(), src(), 16);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_move_8) {
- Tail<_8>::move(dst(), src(), 16);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_set_8) {
- Tail<_8>::set(dst(), ubyte{42}, 16);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<8> 42
-StU<8> a[8]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_
diff erent_8) {
- fillEqual();
- Tail<_8>::isDifferent(buf1(), buf2(), 16);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[8]
-LdU<8> b[8]
-Neq<8>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_three_way_cmp_8) {
- fillEqual();
- Tail<_8>::threeWayCmp(buf1(), buf2(), 16);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[8]
-LdU<8> b[8]
-Diff<8>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing HeadTail operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, head_tail_copy_8) {
- HeadTail<_8>::copy(dst(), src(), 16);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0]
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing Loop operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, loop_copy_one_iteration_and_tail) {
- Loop<_8>::copy(dst(), src(), 10);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # covers 0-7
-LdU<8> b[2]
-StU<8> a[2] # covers 2-9
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, loop_copy_two_iteration_and_tail) {
- Loop<_8>::copy(dst(), src(), 17);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # covers 0-7
-LdU<8> b[8]
-StU<8> a[8] # covers 8-15
-LdU<8> b[9]
-StU<8> a[9] # covers 9-16
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, loop_with_one_turn_is_inefficient_but_ok) {
- Loop<_8>::copy(dst(), src(), 8);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # first iteration covers 0-7
-LdU<8> b[0] # tail also covers 0-7 but since Loop is supposed to be used
-StU<8> a[0] # with a sufficient number of iterations the tail cost is amortised
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, loop_with_round_number_of_turn) {
- Loop<_8>::copy(dst(), src(), 24);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # first iteration covers 0-7
-LdU<8> b[8]
-StU<8> a[8] # second iteration covers 8-15
-LdU<8> b[16]
-StU<8> a[16]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, dst_aligned_loop) {
- Loop<_8>::copy(dst<16>(), src(), 23);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StA<8> a[0] # store is aligned on 16B
-LdU<8> b[8]
-StA<8> a[8] # subsequent stores are aligned
-LdU<8> b[15]
-StU<8> a[15] # Tail is always unaligned
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, aligned_loop) {
- Loop<_8>::copy(dst<16>(), src<8>(), 23);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdA<8> b[0] # load is aligned on 8B
-StA<8> a[0] # store is aligned on 16B
-LdA<8> b[8] # subsequent loads are aligned
-StA<8> a[8] # subsequent stores are aligned
-LdU<8> b[15] # Tail is always unaligned
-StU<8> a[15] # Tail is always unaligned
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing Align operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, align_dst_copy_8) {
- Align<_8, Arg::Dst>::Then<Loop<_8>>::copy(dst(2), src(3), 31);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[3]
-StU<8> a[2] # First store covers unaligned bytes
-LdU<8> b[9]
-StA<8> a[8] # First aligned store
-LdU<8> b[17]
-StA<8> a[16] # Subsequent stores are aligned
-LdU<8> b[25]
-StA<8> a[24] # Subsequent stores are aligned
-LdU<8> b[26]
-StU<8> a[25] # Last store covers remaining bytes
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, align_src_copy_8) {
- Align<_8, Arg::Src>::Then<Loop<_8>>::copy(dst(2), src(3), 31);
- EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[3] # First load covers unaligned bytes
-StU<8> a[2]
-LdA<8> b[8] # First aligned load
-StU<8> a[7]
-LdA<8> b[16] # Subsequent loads are aligned
-StU<8> a[15]
-LdA<8> b[24] # Subsequent loads are aligned
-StU<8> a[23]
-LdU<8> b[26] # Last load covers remaining bytes
-StU<8> a[25]
-)"));
-}
-
-} // namespace __llvm_libc
diff --git a/libc/test/src/string/memory_utils/backend_test.cpp b/libc/test/src/string/memory_utils/backend_test.cpp
deleted file mode 100644
index 72fb7c4cf53b1..0000000000000
--- a/libc/test/src/string/memory_utils/backend_test.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-//===-- Unittests for backends --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/CPP/array.h"
-#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/span.h"
-#include "src/__support/architectures.h"
-#include "src/string/memory_utils/backends.h"
-#include "utils/UnitTest/Test.h"
-#include <string.h>
-
-namespace __llvm_libc {
-
-template <size_t Size> using Buffer = cpp::array<char, Size>;
-
-static char GetRandomChar() {
- // Implementation of C++ minstd_rand seeded with 123456789.
- // https://en.cppreference.com/w/cpp/numeric/random
- // "Minimum standard", recommended by Park, Miller, and Stockmeyer in 1993
- static constexpr const uint64_t a = 48271;
- static constexpr const uint64_t c = 0;
- static constexpr const uint64_t m = 2147483647;
- static uint64_t seed = 123456789;
- seed = (a * seed + c) % m;
- return seed;
-}
-
-static void Randomize(cpp::span<char> buffer) {
- for (auto ¤t : buffer)
- current = GetRandomChar();
-}
-
-template <size_t Size> static Buffer<Size> GetRandomBuffer() {
- Buffer<Size> buffer;
- Randomize(buffer);
- return buffer;
-}
-
-template <typename Backend, size_t Size> struct Conf {
- static_assert(Backend::IS_BACKEND_TYPE);
- using BufferT = Buffer<Size>;
- using T = typename Backend::template getNextType<Size>;
- static_assert(sizeof(T) == Size);
- static constexpr size_t SIZE = Size;
-
- static BufferT splat(ubyte value) {
- return cpp::bit_cast<BufferT>(Backend::template splat<T>(value));
- }
-
- static uint64_t notEquals(const BufferT &v1, const BufferT &v2) {
- return Backend::template notEquals<T>(cpp::bit_cast<T>(v1),
- cpp::bit_cast<T>(v2));
- }
-
- static int32_t threeWayCmp(const BufferT &v1, const BufferT &v2) {
- return Backend::template threeWayCmp<T>(cpp::bit_cast<T>(v1),
- cpp::bit_cast<T>(v2));
- }
-};
-
-using FunctionTypes = testing::TypeList< //
-#if defined(LLVM_LIBC_ARCH_X86) //
- Conf<X86Backend, 1>, //
- Conf<X86Backend, 2>, //
- Conf<X86Backend, 4>, //
- Conf<X86Backend, 8>, //
-#if HAS_M128
- Conf<X86Backend, 16>, //
-#endif
-#if HAS_M256
- Conf<X86Backend, 32>, //
-#endif
-#if HAS_M512
- Conf<X86Backend, 64>, //
-#endif
-#endif // defined(LLVM_LIBC_ARCH_X86)
- Conf<Scalar64BitBackend, 1>, //
- Conf<Scalar64BitBackend, 2>, //
- Conf<Scalar64BitBackend, 4>, //
- Conf<Scalar64BitBackend, 8> //
- >;
-
-TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) {
- for (auto value : cpp::array<uint8_t, 3>{0u, 1u, 255u}) {
- alignas(64) const auto stored =
- ParamType::splat(cpp::bit_cast<ubyte>(value));
- for (size_t i = 0; i < ParamType::SIZE; ++i)
- EXPECT_EQ(cpp::bit_cast<uint8_t>(stored[i]), value);
- }
-}
-
-TYPED_TEST(LlvmLibcMemoryBackend, notEquals, FunctionTypes) {
- alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
- EXPECT_EQ(ParamType::notEquals(a, a), uint64_t(0));
- for (size_t i = 0; i < a.size(); ++i) {
- alignas(64) auto b = a;
- ++b[i];
- EXPECT_NE(ParamType::notEquals(a, b), uint64_t(0));
- EXPECT_NE(ParamType::notEquals(b, a), uint64_t(0));
- }
-}
-
-TYPED_TEST(LlvmLibcMemoryBackend, threeWayCmp, FunctionTypes) {
- alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
- EXPECT_EQ(ParamType::threeWayCmp(a, a), 0);
- for (size_t i = 0; i < a.size(); ++i) {
- alignas(64) auto b = a;
- ++b[i];
- const auto cmp = memcmp(&a, &b, sizeof(a));
- ASSERT_NE(cmp, 0);
- if (cmp > 0) {
- EXPECT_GT(ParamType::threeWayCmp(a, b), 0);
- EXPECT_LT(ParamType::threeWayCmp(b, a), 0);
- } else {
- EXPECT_LT(ParamType::threeWayCmp(a, b), 0);
- EXPECT_GT(ParamType::threeWayCmp(b, a), 0);
- }
- }
-}
-
-template <typename Backend, size_t Size, Temporality TS, Aligned AS>
-struct LoadStoreConf {
- static_assert(Backend::IS_BACKEND_TYPE);
- using BufferT = Buffer<Size>;
- using T = typename Backend::template getNextType<Size>;
- static_assert(sizeof(T) == Size);
- static constexpr size_t SIZE = Size;
-
- static BufferT load(const BufferT &ref) {
- const auto *ptr = cpp::bit_cast<const T *>(ref.data());
- const T value = Backend::template load<T, TS, AS>(ptr);
- return cpp::bit_cast<BufferT>(value);
- }
-
- static void store(BufferT &ref, const BufferT value) {
- auto *ptr = cpp::bit_cast<T *>(ref.data());
- Backend::template store<T, TS, AS>(ptr, cpp::bit_cast<T>(value));
- }
-};
-
-using LoadStoreTypes = testing::TypeList< //
-#if defined(LLVM_LIBC_ARCH_X86) //
- LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::YES>, //
- LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::YES>, //
- LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::YES>, //
- LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::YES>, //
-#if HAS_M128
- LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::YES>, //
- LoadStoreConf<X86Backend, 16, Temporality::NON_TEMPORAL, Aligned::YES>, //
-#endif
-#if HAS_M256
- LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::YES>, //
- LoadStoreConf<X86Backend, 32, Temporality::NON_TEMPORAL, Aligned::YES>, //
-#endif
-#if HAS_M512
- LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::YES>, //
- LoadStoreConf<X86Backend, 64, Temporality::NON_TEMPORAL, Aligned::YES>, //
-#endif
-#endif // defined(LLVM_LIBC_ARCH_X86)
- LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL,
- Aligned::YES>, //
- LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL,
- Aligned::YES>, //
- LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL,
- Aligned::YES>, //
- LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::NO>, //
- LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::YES> //
- >;
-
-TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) {
- alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
- const auto loaded = ParamType::load(expected);
- for (size_t i = 0; i < ParamType::SIZE; ++i)
- EXPECT_EQ(loaded[i], expected[i]);
-}
-
-TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) {
- alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
- alignas(64) typename ParamType::BufferT stored;
- ParamType::store(stored, expected);
- for (size_t i = 0; i < ParamType::SIZE; ++i)
- EXPECT_EQ(stored[i], expected[i]);
-}
-
-} // namespace __llvm_libc
More information about the libc-commits
mailing list