[libc-commits] [libc] 2188cf9 - [libc][NFC] Remove new framework, a simpler one is coming

Mon Sep 26 05:43:11 PDT 2022

Author: Guillaume Chatelet
Date: 2022-09-26T12:42:38Z
New Revision: 2188cf9fa4d012b3ce484f9e97b66581569c1157

URL: https://github.com/llvm/llvm-project/commit/2188cf9fa4d012b3ce484f9e97b66581569c1157
DIFF: https://github.com/llvm/llvm-project/commit/2188cf9fa4d012b3ce484f9e97b66581569c1157.diff

LOG: [libc][NFC] Remove new framework, a simpler one is coming

Added: 
    

Modified: 
    libc/test/src/string/memory_utils/CMakeLists.txt

Removed: 
    libc/src/string/memory_utils/address.h
    libc/src/string/memory_utils/algorithm.h
    libc/src/string/memory_utils/backend_aarch64.h
    libc/src/string/memory_utils/backend_scalar.h
    libc/src/string/memory_utils/backend_x86.h
    libc/src/string/memory_utils/backends.h
    libc/src/string/memory_utils/sized_op.h
    libc/test/src/string/memory_utils/address_test.cpp
    libc/test/src/string/memory_utils/algorithm_test.cpp
    libc/test/src/string/memory_utils/backend_test.cpp


################################################################################
diff  --git a/libc/src/string/memory_utils/address.h b/libc/src/string/memory_utils/address.h
deleted file mode 100644
index caa71be5b1da9..0000000000000

--- a/libc/src/string/memory_utils/address.h
+++ /dev/null
@@ -1,133 +0,0 @@
-//===-- Strongly typed address with alignment and access semantics --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H
-
-#include "src/__support/CPP/type_traits.h" // cpp::ConditionalType
-#include "src/string/memory_utils/utils.h" // is_power2
-#include <stddef.h>                        // size_t
-#include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
-
-namespace __llvm_libc {
-
-// Utility to enable static_assert(false) in templates.
-template <bool flag = false> static void DeferredStaticAssert(const char *msg) {
-  static_assert(flag, "compilation error");
-}
-
-// A non-coercible type to represent raw data.
-enum class ubyte : unsigned char { ZERO = 0 };
-
-// Address attribute specifying whether the underlying load / store operations
-// are temporal or non-temporal.
-enum class Temporality { TEMPORAL, NON_TEMPORAL };
-
-// Address attribute specifying whether the underlying load / store operations
-// are aligned or unaligned.
-enum class Aligned { NO, YES };
-
-// Address attribute to discriminate between readable and writable addresses.
-enum class Permission { Read, Write };
-
-// Address is semantically equivalent to a pointer but also conveys compile time
-// information that helps with instructions selection (aligned/unaligned,
-// temporal/non-temporal).
-template <size_t Alignment, Permission P, Temporality TS> struct Address {
-  static_assert(is_power2(Alignment));
-  static constexpr size_t ALIGNMENT = Alignment;
-  static constexpr Permission PERMISSION = P;
-  static constexpr Temporality TEMPORALITY = TS;
-  static constexpr bool IS_READ = P == Permission::Read;
-  static constexpr bool IS_WRITE = P == Permission::Write;
-  using PointeeType = cpp::conditional_t<!IS_WRITE, const ubyte, ubyte>;
-  using VoidType = cpp::conditional_t<!IS_WRITE, const void, void>;
-
-  Address(VoidType *ptr) : ptr_(reinterpret_cast<PointeeType *>(ptr)) {}
-
-  PointeeType *ptr() const {
-    return reinterpret_cast<PointeeType *>(
-        __builtin_assume_aligned(ptr_, ALIGNMENT));
-  }
-
-  PointeeType *const ptr_;
-
-  template <size_t ByteOffset> auto offset(size_t byte_offset) const {
-    static constexpr size_t NewAlignment = commonAlign<ByteOffset>();
-    return Address<NewAlignment, PERMISSION, TEMPORALITY>(ptr_ + byte_offset);
-  }
-
-private:
-  static constexpr size_t gcd(size_t A, size_t B) {
-    return B == 0 ? A : gcd(B, A % B);
-  }
-
-  template <size_t ByteOffset> static constexpr size_t commonAlign() {
-    constexpr size_t GCD = gcd(ByteOffset, ALIGNMENT);
-    if constexpr (is_power2(GCD))
-      return GCD;
-    else
-      return 1;
-  }
-};
-
-template <typename T> struct IsAddressType : public cpp::false_type {};
-template <size_t Alignment, Permission P, Temporality TS>
-struct IsAddressType<Address<Alignment, P, TS>> : public cpp::true_type {};
-
-// Reinterpret the address as a pointer to T.
-// This is not UB since the underlying pointer always refers to a `char` in a
-// buffer of raw data.
-template <typename T, typename AddrT> static T *as(AddrT addr) {
-  static_assert(IsAddressType<AddrT>::value);
-  return reinterpret_cast<T *>(addr.ptr());
-}
-
-// Offsets the address by a compile time amount, this allows propagating
-// alignment whenever possible.
-template <size_t ByteOffset, typename AddrT>
-static auto offsetAddr(AddrT addr) {
-  static_assert(IsAddressType<AddrT>::value);
-  return addr.template offset<ByteOffset>(ByteOffset);
-}
-
-// Offsets the address by a runtime amount but assuming that the resulting
-// address will be Alignment aligned.
-template <size_t Alignment, typename AddrT>
-static auto offsetAddrAssumeAligned(AddrT addr, size_t byte_offset) {
-  static_assert(IsAddressType<AddrT>::value);
-  return Address<Alignment, AddrT::PERMISSION, AddrT::TEMPORALITY>(addr.ptr_ +
-                                                                   byte_offset);
-}
-
-// Offsets the address by a runtime amount that is assumed to be a multiple of
-// ByteOffset. This allows to propagate the address alignment whenever possible.
-template <size_t ByteOffset, typename AddrT>
-static auto offsetAddrMultiplesOf(AddrT addr, ptr
diff _t byte_offset) {
-  static_assert(IsAddressType<AddrT>::value);
-  return addr.template offset<ByteOffset>(byte_offset);
-}
-
-// User friendly aliases for common address types.
-template <size_t Alignment>
-using SrcAddr = Address<Alignment, Permission::Read, Temporality::TEMPORAL>;
-template <size_t Alignment>
-using DstAddr = Address<Alignment, Permission::Write, Temporality::TEMPORAL>;
-template <size_t Alignment>
-using NtSrcAddr =
-    Address<Alignment, Permission::Read, Temporality::NON_TEMPORAL>;
-template <size_t Alignment>
-using NtDstAddr =
-    Address<Alignment, Permission::Write, Temporality::NON_TEMPORAL>;
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H

diff  --git a/libc/src/string/memory_utils/algorithm.h b/libc/src/string/memory_utils/algorithm.h
deleted file mode 100644
index 6355ffe04562f..0000000000000
--- a/libc/src/string/memory_utils/algorithm.h
+++ /dev/null
@@ -1,463 +0,0 @@
-//===-- Algorithms to compose sized memory operations ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Higher order primitives that build upon the SizedOpT facility.
-// They constitute the basic blocks for composing memory functions.
-// This file defines the following operations:
-// - Skip
-// - Tail
-// - HeadTail
-// - Loop
-// - Align
-//
-// See each class for documentation.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
-
-#include "src/string/memory_utils/address.h" // Address
-#include "src/string/memory_utils/utils.h"   // offset_to_next_aligned
-
-#include <stddef.h> // ptr
diff _t
-
-namespace __llvm_libc {
-
-// We are not yet allowed to use asserts in low level memory operations as
-// assert itself could depend on them.
-// We define this empty macro so we can enable them as soon as possible and keep
-// track of invariants.
-#define LIBC_ASSERT(COND)
-
-// An operation that allows to skip the specified amount of bytes.
-template <ptr
diff _t Bytes> struct Skip {
-  template <typename NextT> struct Then {
-    template <typename DstAddrT>
-    static inline void set(DstAddrT dst, ubyte value) {
-      static_assert(NextT::IS_FIXED_SIZE);
-      NextT::set(offsetAddr<Bytes>(dst), value);
-    }
-
-    template <typename SrcAddrT1, typename SrcAddrT2>
-    static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
-      static_assert(NextT::IS_FIXED_SIZE);
-      return NextT::isDifferent(offsetAddr<Bytes>(src1),
-                                offsetAddr<Bytes>(src2));
-    }
-
-    template <typename SrcAddrT1, typename SrcAddrT2>
-    static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
-      static_assert(NextT::IS_FIXED_SIZE);
-      return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
-                                offsetAddr<Bytes>(src2));
-    }
-
-    template <typename SrcAddrT1, typename SrcAddrT2>
-    static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
-                                      size_t runtime_size) {
-      static_assert(NextT::IS_RUNTIME_SIZE);
-      return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
-                                offsetAddr<Bytes>(src2), runtime_size - Bytes);
-    }
-  };
-};
-
-// Compute the address of a tail operation.
-// Because of the runtime size, we loose the alignment information.
-template <size_t Size, typename AddrT>
-static auto tailAddr(AddrT addr, size_t runtime_size) {
-  static_assert(IsAddressType<AddrT>::value);
-  return offsetAddrAssumeAligned<1>(addr, runtime_size - Size);
-}
-
-// Perform the operation on the last 'Size' bytes of the buffer.
-//
-// e.g. with
-// [1234567812345678123]
-// [__XXXXXXXXXXXXXX___]
-// [________XXXXXXXX___]
-//
-// Precondition: `runtime_size >= Size`.
-template <typename SizedOpT> struct Tail {
-  static_assert(SizedOpT::IS_FIXED_SIZE);
-  static constexpr bool IS_RUNTIME_SIZE = true;
-  static constexpr size_t SIZE = SizedOpT::SIZE;
-
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-    SizedOpT::copy(tailAddr<SIZE>(dst, runtime_size),
-                   tailAddr<SIZE>(src, runtime_size));
-  }
-
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-    SizedOpT::move(tailAddr<SIZE>(dst, runtime_size),
-                   tailAddr<SIZE>(src, runtime_size));
-  }
-
-  template <typename DstAddrT>
-  static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
-    SizedOpT::set(tailAddr<SIZE>(dst, runtime_size), value);
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
-                                     size_t runtime_size) {
-    return SizedOpT::isDifferent(tailAddr<SIZE>(src1, runtime_size),
-                                 tailAddr<SIZE>(src2, runtime_size));
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
-                                    size_t runtime_size) {
-    return SizedOpT::threeWayCmp(tailAddr<SIZE>(src1, runtime_size),
-                                 tailAddr<SIZE>(src2, runtime_size));
-  }
-};
-
-// Perform the operation on the first and the last `SizedOpT::Size` bytes of the
-// buffer. This is useful for overlapping operations.
-//
-// e.g. with
-// [1234567812345678123]
-// [__XXXXXXXXXXXXXX___]
-// [__XXXXXXXX_________]
-// [________XXXXXXXX___]
-//
-// Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`.
-template <typename SizedOpT> struct HeadTail {
-  static_assert(SizedOpT::IS_FIXED_SIZE);
-  static constexpr bool IS_RUNTIME_SIZE = true;
-
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-    LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
-    SizedOpT::copy(dst, src);
-    Tail<SizedOpT>::copy(dst, src, runtime_size);
-  }
-
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-    LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
-    static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
-    // The load and store operations can be performed in any order as long as
-    // they are not interleaved. More investigations are needed to determine the
-    // best order.
-    auto head = SizedOpT::load(src);
-    auto tail = SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
-    SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail);
-    SizedOpT::store(dst, head);
-  }
-
-  template <typename DstAddrT>
-  static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
-    LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
-    SizedOpT::set(dst, value);
-    Tail<SizedOpT>::set(dst, value, runtime_size);
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
-                                     size_t runtime_size) {
-    LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
-    // Two strategies can be applied here:
-    // 1. Compute head and tail and compose them with a bitwise or operation.
-    // 2. Stop early if head is 
diff erent.
-    // We chose the later because HeadTail operations are typically performed
-    // with sizes ranging from 4 to 256 bytes. The cost of the loads is then
-    // significantly larger than the cost of the branch.
-    if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
-      return res;
-    return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
-                                    size_t runtime_size) {
-    LIBC_ASSERT(runtime_size >= SizedOpT::SIZE &&
-                runtime_size <= 2 * SizedOpT::SIZE);
-    if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
-      return res;
-    return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
-  }
-};
-
-// Simple loop ending with a Tail operation.
-//
-// e.g. with
-// [12345678123456781234567812345678]
-// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
-// [__XXXXXXXX_______________________]
-// [__________XXXXXXXX_______________]
-// [__________________XXXXXXXX_______]
-// [______________________XXXXXXXX___]
-//
-// Precondition:
-// - runtime_size >= Size
-template <typename SizedOpT> struct Loop {
-  static_assert(SizedOpT::IS_FIXED_SIZE);
-  static constexpr bool IS_RUNTIME_SIZE = true;
-  static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
-
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-    size_t offset = 0;
-    do {
-      SizedOpT::copy(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
-                     offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
-      offset += BLOCK_SIZE;
-    } while (offset < runtime_size - BLOCK_SIZE);
-    Tail<SizedOpT>::copy(dst, src, runtime_size);
-  }
-
-  // Move forward suitable when dst < src. We load the tail bytes before
-  // handling the loop.
-  //
-  // e.g. Moving two bytes
-  // [   |       |       |       |       |]
-  // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
-  // [_________________________LLLLLLLL___]
-  // [___LLLLLLLL_________________________]
-  // [_SSSSSSSS___________________________]
-  // [___________LLLLLLLL_________________]
-  // [_________SSSSSSSS___________________]
-  // [___________________LLLLLLLL_________]
-  // [_________________SSSSSSSS___________]
-  // [_______________________SSSSSSSS_____]
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-    const auto tail_value =
-        SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
-    size_t offset = 0;
-    do {
-      SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
-                     offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
-      offset += BLOCK_SIZE;
-    } while (offset < runtime_size - BLOCK_SIZE);
-    SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail_value);
-  }
-
-  // Move backward suitable when dst > src. We load the head bytes before
-  // handling the loop.
-  //
-  // e.g. Moving two bytes
-  // [   |       |       |       |       |]
-  // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
-  // [___LLLLLLLL_________________________]
-  // [_________________________LLLLLLLL___]
-  // [___________________________SSSSSSSS_]
-  // [_________________LLLLLLLL___________]
-  // [___________________SSSSSSSS_________]
-  // [_________LLLLLLLL___________________]
-  // [___________SSSSSSSS_________________]
-  // [_____SSSSSSSS_______________________]
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void move_backward(DstAddrT dst, SrcAddrT src,
-                                   size_t runtime_size) {
-    const auto head_value = SizedOpT::load(src);
-    ptr
diff _t offset = runtime_size - BLOCK_SIZE;
-    do {
-      SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
-                     offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
-      offset -= BLOCK_SIZE;
-    } while (offset >= 0);
-    SizedOpT::store(dst, head_value);
-  }
-
-  template <typename DstAddrT>
-  static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
-    size_t offset = 0;
-    do {
-      SizedOpT::set(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), value);
-      offset += BLOCK_SIZE;
-    } while (offset < runtime_size - BLOCK_SIZE);
-    Tail<SizedOpT>::set(dst, value, runtime_size);
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
-                                     size_t runtime_size) {
-    size_t offset = 0;
-    do {
-      if (uint64_t res = SizedOpT::isDifferent(
-              offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
-              offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
-        return res;
-      offset += BLOCK_SIZE;
-    } while (offset < runtime_size - BLOCK_SIZE);
-    return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
-                                    size_t runtime_size) {
-    size_t offset = 0;
-    do {
-      if (int32_t res = SizedOpT::threeWayCmp(
-              offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
-              offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
-        return res;
-      offset += BLOCK_SIZE;
-    } while (offset < runtime_size - BLOCK_SIZE);
-    return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
-  }
-};
-
-// Aligns using a statically-sized operation, then calls the subsequent NextT
-// operation.
-//
-// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
-// Align<_16, Arg::Dst>::Then<Loop<_32>>::copy(dst, src, runtime_size);
-enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
-template <typename SizedOpT, Arg AlignOn = Arg::_1> struct Align {
-  static_assert(SizedOpT::IS_FIXED_SIZE);
-
-  template <typename NextT> struct Then {
-    static_assert(NextT::IS_RUNTIME_SIZE);
-
-    template <typename DstAddrT, typename SrcAddrT>
-    static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-      SizedOpT::copy(dst, src);
-      auto aligned = align(dst, src, runtime_size);
-      NextT::copy(aligned.arg1, aligned.arg2, aligned.size);
-    }
-
-    // Move forward suitable when dst < src. The alignment is performed with
-    // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
-    //
-    // e.g. Moving two bytes and making sure src is then aligned.
-    // [  |       |       |       |      ]
-    // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
-    // [____LLLLLLLL_____________________]
-    // [___________LLLLLLLL______________]
-    // [_SSSSSSSS________________________]
-    // [________SSSSSSSS_________________]
-    //
-    // e.g. Moving two bytes and making sure dst is then aligned.
-    // [  |       |       |       |      ]
-    // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
-    // [____LLLLLLLL_____________________]
-    // [______LLLLLLLL___________________]
-    // [_SSSSSSSS________________________]
-    // [___SSSSSSSS______________________]
-    template <typename DstAddrT, typename SrcAddrT>
-    static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
-      auto aligned_after_begin = align(dst, src, runtime_size);
-      // We move pointers forward by Size so we can perform HeadTail.
-      auto aligned = aligned_after_begin.stepForward();
-      HeadTail<SizedOpT>::move(dst, src, runtime_size - aligned.size);
-      NextT::move(aligned.arg1, aligned.arg2, aligned.size);
-    }
-
-    // Move backward suitable when dst > src. The alignment is performed with
-    // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
-    //
-    // e.g. Moving two bytes backward and making sure src is then aligned.
-    // [  |       |       |       |      ]
-    // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
-    // [ _________________LLLLLLLL_______]
-    // [ ___________________LLLLLLLL_____]
-    // [____________________SSSSSSSS_____]
-    // [______________________SSSSSSSS___]
-    //
-    // e.g. Moving two bytes and making sure dst is then aligned.
-    // [  |       |       |       |      ]
-    // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
-    // [ _______________LLLLLLLL_________]
-    // [ ___________________LLLLLLLL_____]
-    // [__________________SSSSSSSS_______]
-    // [______________________SSSSSSSS___]
-    template <typename DstAddrT, typename SrcAddrT>
-    static inline void move_backward(DstAddrT dst, SrcAddrT src,
-                                     size_t runtime_size) {
-      const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size);
-      const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size);
-      auto aligned_after_end = align(dst_end, src_end, 0);
-      // We move pointers back by 2 x Size so we can perform HeadTail.
-      auto aligned = aligned_after_end.stepBack().stepBack();
-      HeadTail<SizedOpT>::move(aligned.arg1, aligned.arg2, aligned.size);
-      NextT::move_backward(dst, src, runtime_size - aligned.size);
-    }
-
-    template <typename DstAddrT>
-    static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
-      SizedOpT::set(dst, value);
-      DstAddrT _(nullptr);
-      auto aligned = align(dst, _, runtime_size);
-      NextT::set(aligned.arg1, value, aligned.size);
-    }
-
-    template <typename SrcAddrT1, typename SrcAddrT2>
-    static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
-                                       size_t runtime_size) {
-      if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
-        return res;
-      auto aligned = align(src1, src2, runtime_size);
-      return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size);
-    }
-
-    template <typename SrcAddrT1, typename SrcAddrT2>
-    static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
-                                      size_t runtime_size) {
-      if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
-        return res;
-      auto aligned = align(src1, src2, runtime_size);
-      return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size);
-    }
-  };
-
-private:
-  static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE;
-  static_assert(ALIGN_OP_SIZE > 1);
-
-  template <typename Arg1AddrT, typename Arg2AddrT> struct Aligned {
-    Arg1AddrT arg1;
-    Arg2AddrT arg2;
-    size_t size;
-
-    Aligned stepForward() const {
-      return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, ALIGN_OP_SIZE),
-                     offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, ALIGN_OP_SIZE),
-                     size - ALIGN_OP_SIZE};
-    }
-
-    Aligned stepBack() const {
-      return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, -ALIGN_OP_SIZE),
-                     offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, -ALIGN_OP_SIZE),
-                     size + ALIGN_OP_SIZE};
-    }
-  };
-
-  template <typename Arg1AddrT, typename Arg2AddrT>
-  static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) {
-    return Aligned<Arg1AddrT, Arg2AddrT>{arg1, arg2, size};
-  }
-
-  template <typename Arg1AddrT, typename Arg2AddrT>
-  static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) {
-    static_assert(IsAddressType<Arg1AddrT>::value);
-    static_assert(IsAddressType<Arg2AddrT>::value);
-    if constexpr (AlignOn == Arg::_1) {
-      auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg1.ptr_);
-      return makeAligned(offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg1, offset),
-                         offsetAddrAssumeAligned<1>(arg2, offset),
-                         runtime_size - offset);
-    } else if constexpr (AlignOn == Arg::_2) {
-      auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg2.ptr_);
-      return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset),
-                         offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg2, offset),
-                         runtime_size - offset);
-    } else {
-      DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2");
-    }
-  }
-};
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H

diff  --git a/libc/src/string/memory_utils/backend_aarch64.h b/libc/src/string/memory_utils/backend_aarch64.h
deleted file mode 100644
index 8077a098ff9c0..0000000000000
--- a/libc/src/string/memory_utils/backend_aarch64.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===-- Elementary operations for aarch64 ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
-
-#if !defined(LLVM_LIBC_ARCH_AARCH64)
-#include "src/string/memory_utils/backend_scalar.h"
-
-#ifdef __ARM_NEON
-#include <arm_neon.h>
-#endif
-
-namespace __llvm_libc {
-
-struct Aarch64Backend : public Scalar64BitBackend {
-  static constexpr bool IS_BACKEND_TYPE = true;
-
-  template <typename T, Temporality TS, Aligned AS,
-            cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline T load(const T *src) {
-    return Scalar64BitBackend::template load<T, TS, AS>(src);
-  }
-};
-
-// Implementation of the SizedOp abstraction for the set operation.
-struct Zva64 {
-  static constexpr size_t SIZE = 64;
-
-  template <typename DstAddrT>
-  static inline void set(DstAddrT dst, ubyte value) {
-#if __SIZEOF_POINTER__ == 4
-    asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
-#else
-    asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
-#endif
-  }
-};
-
-inline static bool hasZva() {
-  uint64_t zva_val;
-  asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
-  // DC ZVA is permitted if DZP, bit [4] is zero.
-  // BS, bits [3:0] is log2 of the block size in words.
-  // So the next line checks whether the instruction is permitted and block size
-  // is 16 words (i.e. 64 bytes).
-  return (zva_val & 0b11111) == 0b00100;
-}
-
-namespace aarch64 {
-using _1 = SizedOp<Aarch64Backend, 1>;
-using _2 = SizedOp<Aarch64Backend, 2>;
-using _3 = SizedOp<Aarch64Backend, 3>;
-using _4 = SizedOp<Aarch64Backend, 4>;
-using _8 = SizedOp<Aarch64Backend, 8>;
-using _16 = SizedOp<Aarch64Backend, 16>;
-using _32 = SizedOp<Aarch64Backend, 32>;
-using _64 = SizedOp<Aarch64Backend, 64>;
-using _128 = SizedOp<Aarch64Backend, 128>;
-} // namespace aarch64
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_ARCH_AARCH64
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H

diff  --git a/libc/src/string/memory_utils/backend_scalar.h b/libc/src/string/memory_utils/backend_scalar.h
deleted file mode 100644
index dba36b159baa6..0000000000000
--- a/libc/src/string/memory_utils/backend_scalar.h
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- Elementary operations for native scalar types ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
-
-#include "src/__support/CPP/type_traits.h" // ConditionalType, enable_if_t
-#include "src/__support/endian.h"
-
-namespace __llvm_libc {
-
-struct Scalar64BitBackend {
-  static constexpr bool IS_BACKEND_TYPE = true;
-
-  template <typename T>
-  static constexpr bool IsScalarType =
-      cpp::is_same_v<T, uint8_t> || cpp::is_same_v<T, uint16_t> ||
-      cpp::is_same_v<T, uint32_t> || cpp::is_same_v<T, uint64_t>;
-
-  template <typename T, Temporality TS, Aligned AS>
-  static inline T load(const T *src) {
-    static_assert(IsScalarType<T>);
-    static_assert(TS == Temporality::TEMPORAL,
-                  "Scalar load does not support non-temporal access");
-    return *src;
-  }
-
-  template <typename T, Temporality TS, Aligned AS>
-  static inline void store(T *dst, T value) {
-    static_assert(IsScalarType<T>);
-    static_assert(TS == Temporality::TEMPORAL,
-                  "Scalar store does not support non-temporal access");
-    *dst = value;
-  }
-
-  template <typename T> static inline T splat(ubyte value) {
-    static_assert(IsScalarType<T>);
-    return (T(~0ULL) / T(0xFF)) * T(value);
-  }
-
-  template <typename T> static inline uint64_t notEquals(T v1, T v2) {
-    static_assert(IsScalarType<T>);
-    return v1 ^ v2;
-  }
-
-  template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
-    DeferredStaticAssert("not implemented");
-  }
-
-  // Returns the type to use to consume Size bytes.
-  template <size_t Size>
-  using getNextType = cpp::conditional_t<
-      Size >= 8, uint64_t,
-      cpp::conditional_t<Size >= 4, uint32_t,
-                         cpp::conditional_t<Size >= 2, uint16_t, uint8_t>>>;
-};
-
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint8_t>(uint8_t a, uint8_t b) {
-  const int16_t la = Endian::to_big_endian(a);
-  const int16_t lb = Endian::to_big_endian(b);
-  return la - lb;
-}
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint16_t>(uint16_t a,
-                                                         uint16_t b) {
-  const int32_t la = Endian::to_big_endian(a);
-  const int32_t lb = Endian::to_big_endian(b);
-  return la - lb;
-}
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint32_t>(uint32_t a,
-                                                         uint32_t b) {
-  const uint32_t la = Endian::to_big_endian(a);
-  const uint32_t lb = Endian::to_big_endian(b);
-  return la > lb ? 1 : la < lb ? -1 : 0;
-}
-template <>
-int32_t inline Scalar64BitBackend::threeWayCmp<uint64_t>(uint64_t a,
-                                                         uint64_t b) {
-  const uint64_t la = Endian::to_big_endian(a);
-  const uint64_t lb = Endian::to_big_endian(b);
-  return la > lb ? 1 : la < lb ? -1 : 0;
-}
-
-namespace scalar {
-using _1 = SizedOp<Scalar64BitBackend, 1>;
-using _2 = SizedOp<Scalar64BitBackend, 2>;
-using _3 = SizedOp<Scalar64BitBackend, 3>;
-using _4 = SizedOp<Scalar64BitBackend, 4>;
-using _8 = SizedOp<Scalar64BitBackend, 8>;
-using _16 = SizedOp<Scalar64BitBackend, 16>;
-using _32 = SizedOp<Scalar64BitBackend, 32>;
-using _64 = SizedOp<Scalar64BitBackend, 64>;
-using _128 = SizedOp<Scalar64BitBackend, 128>;
-} // namespace scalar
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H

diff  --git a/libc/src/string/memory_utils/backend_x86.h b/libc/src/string/memory_utils/backend_x86.h
deleted file mode 100644
index cfdfcdf90131c..0000000000000
--- a/libc/src/string/memory_utils/backend_x86.h
+++ /dev/null
@@ -1,219 +0,0 @@
-//===-- Elementary operations for x86 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
-
-#if defined(LLVM_LIBC_ARCH_X86)
-#include "src/__support/CPP/type_traits.h" // ConditionalType, enable_if_t
-#include "src/string/memory_utils/backend_scalar.h"
-
-#ifdef __SSE2__
-#include <immintrin.h>
-#endif //  __SSE2__
-
-#if defined(__SSE2__)
-#define HAS_M128 true
-#else
-#define HAS_M128 false
-#endif
-
-#if defined(__AVX2__)
-#define HAS_M256 true
-#else
-#define HAS_M256 false
-#endif
-
-#if defined(__AVX512F__) and defined(__AVX512BW__)
-#define HAS_M512 true
-#else
-#define HAS_M512 false
-#endif
-
-namespace __llvm_libc {
-struct X86Backend : public Scalar64BitBackend {
-  static constexpr bool IS_BACKEND_TYPE = true;
-
-  // Scalar types use base class implementations.
-  template <typename T, Temporality TS, Aligned AS,
-            cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline T load(const T *src) {
-    return Scalar64BitBackend::template load<T, TS, AS>(src);
-  }
-
-  // Scalar types use base class implementations.
-  template <typename T, Temporality TS, Aligned AS,
-            cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline void store(T *dst, T value) {
-    Scalar64BitBackend::template store<T, TS, AS>(dst, value);
-  }
-
-  // Scalar types use base class implementations.
-  template <typename T,
-            cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline uint64_t notEquals(T v1, T v2) {
-    return Scalar64BitBackend::template notEquals<T>(v1, v2);
-  }
-
-  // Scalar types use base class implementations.
-  template <typename T,
-            cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline T splat(ubyte value) {
-    return Scalar64BitBackend::template splat<T>(value);
-  }
-
-  // Scalar types use base class implementations.
-  template <typename T,
-            cpp::enable_if_t<Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline int32_t threeWayCmp(T v1, T v2) {
-    return Scalar64BitBackend::template threeWayCmp<T>(v1, v2);
-  }
-
-  // X86 types are specialized below.
-  template <typename T, Temporality TS, Aligned AS,
-            cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline T load(const T *src);
-
-  // X86 types are specialized below.
-  template <typename T, Temporality TS, Aligned AS,
-            cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline void store(T *dst, T value);
-
-  // X86 types are specialized below.
-  template <typename T,
-            cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline T splat(ubyte value);
-
-  // X86 types are specialized below.
-  template <typename T,
-            cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline uint64_t notEquals(T v1, T v2);
-
-  template <typename T,
-            cpp::enable_if_t<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
-  static inline int32_t threeWayCmp(T v1, T v2) {
-    return char_
diff (reinterpret_cast<char *>(&v1),
-                     reinterpret_cast<char *>(&v2), notEquals(v1, v2));
-  }
-
-  // Returns the type to use to consume Size bytes.
-  template <size_t Size>
-  using getNextType = cpp::conditional_t<
-      (HAS_M512 && Size >= 64), __m512i,
-      cpp::conditional_t<
-          (HAS_M256 && Size >= 32), __m256i,
-          cpp::conditional_t<(HAS_M128 && Size >= 16), __m128i,
-                             Scalar64BitBackend::getNextType<Size>>>>;
-
-private:
-  static inline int32_t char_
diff (const char *a, const char *b, uint64_t mask) {
-    const size_t 
diff _index = mask == 0 ? 0 : __builtin_ctzll(mask);
-    const int16_t ca = (unsigned char)a[
diff _index];
-    const int16_t cb = (unsigned char)b[
diff _index];
-    return ca - cb;
-  }
-};
-
-static inline void repmovsb(void *dst, const void *src, size_t runtime_size) {
-  asm volatile("rep movsb"
-               : "+D"(dst), "+S"(src), "+c"(runtime_size)
-               :
-               : "memory");
-}
-
-#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC)                                   \
-  template <> inline T X86Backend::load<T, OS, AS>(const T *src) {             \
-    return INTRISIC(const_cast<T *>(src));                                     \
-  }
-#define SPECIALIZE_STORE(T, OS, AS, INTRISIC)                                  \
-  template <> inline void X86Backend::store<T, OS, AS>(T * dst, T value) {     \
-    INTRISIC(dst, value);                                                      \
-  }
-
-#if HAS_M128
-SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128)
-SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128)
-SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
-                _mm_stream_load_si128)
-// X86 non-temporal load needs aligned access
-SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128)
-SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128)
-SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
-                 _mm_stream_si128)
-// X86 non-temporal store needs aligned access
-template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) {
-  return _mm_set1_epi8(__builtin_bit_cast(char, value));
-}
-template <>
-inline uint64_t X86Backend::notEquals<__m128i>(__m128i a, __m128i b) {
-  using T = char __attribute__((__vector_size__(16)));
-  return _mm_movemask_epi8(T(a) != T(b));
-}
-#endif // HAS_M128
-
-#if HAS_M256
-SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256)
-SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256)
-SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
-                _mm256_stream_load_si256)
-// X86 non-temporal load needs aligned access
-SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES,
-                 _mm256_store_si256)
-SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO,
-                 _mm256_storeu_si256)
-SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
-                 _mm256_stream_si256)
-// X86 non-temporal store needs aligned access
-template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) {
-  return _mm256_set1_epi8(__builtin_bit_cast(char, value));
-}
-template <>
-inline uint64_t X86Backend::notEquals<__m256i>(__m256i a, __m256i b) {
-  using T = char __attribute__((__vector_size__(32)));
-  return _mm256_movemask_epi8(T(a) != T(b));
-}
-#endif // HAS_M256
-
-#if HAS_M512
-SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512)
-SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512)
-SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
-                _mm512_stream_load_si512)
-// X86 non-temporal load needs aligned access
-SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES,
-                 _mm512_store_si512)
-SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO,
-                 _mm512_storeu_si512)
-SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
-                 _mm512_stream_si512)
-// X86 non-temporal store needs aligned access
-template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) {
-  return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value)));
-}
-template <>
-inline uint64_t X86Backend::notEquals<__m512i>(__m512i a, __m512i b) {
-  return _mm512_cmpneq_epi8_mask(a, b);
-}
-#endif // HAS_M512
-
-namespace x86 {
-using _1 = SizedOp<X86Backend, 1>;
-using _2 = SizedOp<X86Backend, 2>;
-using _3 = SizedOp<X86Backend, 3>;
-using _4 = SizedOp<X86Backend, 4>;
-using _8 = SizedOp<X86Backend, 8>;
-using _16 = SizedOp<X86Backend, 16>;
-using _32 = SizedOp<X86Backend, 32>;
-using _64 = SizedOp<X86Backend, 64>;
-using _128 = SizedOp<X86Backend, 128>;
-} // namespace x86
-
-} // namespace __llvm_libc
-
-#endif // defined(LLVM_LIBC_ARCH_X86)
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H

diff  --git a/libc/src/string/memory_utils/backends.h b/libc/src/string/memory_utils/backends.h
deleted file mode 100644
index 6d241fa5eb289..0000000000000
--- a/libc/src/string/memory_utils/backends.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Elementary operations to compose memory primitives ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the concept of a Backend.
-// It constitutes the lowest level of the framework and is akin to instruction
-// selection. It defines how to implement aligned/unaligned,
-// temporal/non-temporal native loads and stores for a particular architecture
-// as well as efficient ways to fill and compare types.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
-
-#include "src/string/memory_utils/address.h" // Temporality, Aligned
-#include "src/string/memory_utils/sized_op.h" // SizedOp
-#include <stddef.h>                           // size_t
-#include <stdint.h>                           // uint##_t
-
-namespace __llvm_libc {
-
-// Backends must implement the following interface.
-struct NoBackend {
-  static constexpr bool IS_BACKEND_TYPE = true;
-
-  // Loads a T from `src` honoring Temporality and Alignment.
-  template <typename T, Temporality, Aligned> static T load(const T *src);
-
-  // Stores a T to `dst` honoring Temporality and Alignment.
-  template <typename T, Temporality, Aligned>
-  static void store(T *dst, T value);
-
-  // Returns a T filled with `value` bytes.
-  template <typename T> static T splat(ubyte value);
-
-  // Returns zero iff v1 == v2.
-  template <typename T> static uint64_t notEquals(T v1, T v2);
-
-  // Returns zero iff v1 == v2, a negative number if v1 < v2 and a positive
-  // number otherwise.
-  template <typename T> static int32_t threeWayCmp(T v1, T v2);
-
-  // Returns the type to use to consume Size bytes.
-  // If no type handles Size bytes at once
-  template <size_t Size> using getNextType = void;
-};
-
-} // namespace __llvm_libc
-
-// We inline all backend implementations here to simplify the build system.
-// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef.
-#include "src/string/memory_utils/backend_aarch64.h"
-#include "src/string/memory_utils/backend_scalar.h"
-#include "src/string/memory_utils/backend_x86.h"
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H

diff  --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h
deleted file mode 100644
index 2bca50d6c56d1..0000000000000
--- a/libc/src/string/memory_utils/sized_op.h
+++ /dev/null
@@ -1,180 +0,0 @@
-//===-- Sized Operations --------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SizedOp struct that serves as the middle end of the
-// framework. It implements sized memory operations by breaking them down into
-// simpler types whose availability is described in the Backend. It also
-// provides a way to load and store sized chunks of memory (necessary for the
-// move operation). SizedOp are the building blocks of higher order algorithms
-// like HeadTail, Align or Loop.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
-
-#include <stddef.h> // size_t
-
-#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
-#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE                                    \
-  __has_builtin(__builtin_memcpy_inline)
-#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
-
-#ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
-#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE                                    \
-  __has_builtin(__builtin_memset_inline)
-#endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
-
-namespace __llvm_libc {
-
-template <typename Backend, size_t Size> struct SizedOp {
-  static constexpr size_t SIZE = Size;
-  // Define instantiations of SizedOp as a fixed size operation.
-  // i.e. an operation that is composable by types in algorithm.h
-  static constexpr bool IS_FIXED_SIZE = true;
-
-private:
-  static_assert(Backend::IS_BACKEND_TYPE);
-  static_assert(SIZE > 0);
-  using type = typename Backend::template getNextType<Size>;
-  static constexpr size_t TYPE_SIZE = sizeof(type);
-  static_assert(SIZE >= TYPE_SIZE);
-  static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE;
-  using NextBlock = SizedOp<Backend, NEXT_SIZE>;
-
-  // Returns whether we can use an aligned operations.
-  // This is possible because the address type carries known compile-time
-  // alignment informations.
-  template <typename T, typename AddrT> static constexpr Aligned isAligned() {
-    static_assert(IsAddressType<AddrT>::value);
-    return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES
-                                                                 : Aligned::NO;
-  }
-
-  // Loads a value of the current `type` from `src`.
-  // This function is responsible for extracting Temporality and Alignment from
-  // the Address type.
-  template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) {
-    static_assert(IsAddressType<SrcAddrT>::value && SrcAddrT::IS_READ);
-    constexpr auto AS = isAligned<type, SrcAddrT>();
-    constexpr auto TS = SrcAddrT::TEMPORALITY;
-    return Backend::template load<type, TS, AS>(as<const type>(src));
-  }
-
-  // Stores a value of the current `type` to `dst`.
-  // This function is responsible for extracting Temporality and Alignment from
-  // the Address type.
-  template <typename DstAddrT>
-  static inline void nativeStore(type value, DstAddrT dst) {
-    static_assert(IsAddressType<DstAddrT>::value && DstAddrT::IS_WRITE);
-    constexpr auto AS = isAligned<type, DstAddrT>();
-    constexpr auto TS = DstAddrT::TEMPORALITY;
-    return Backend::template store<type, TS, AS>(as<type>(dst), value);
-  }
-
-  // A well aligned POD structure to store Size bytes.
-  // This is used to implement the move operations.
-  struct Value {
-    alignas(alignof(type)) ubyte payload[Size];
-  };
-
-public:
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void copy(DstAddrT dst, SrcAddrT src) {
-    static_assert(IsAddressType<DstAddrT>::value && DstAddrT::IS_WRITE);
-    static_assert(IsAddressType<SrcAddrT>::value && SrcAddrT::IS_READ);
-    if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE &&
-                  DstAddrT::TEMPORALITY == Temporality::TEMPORAL &&
-                  SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) {
-      // delegate optimized copy to compiler.
-      __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size);
-      return;
-    }
-    nativeStore(nativeLoad(src), dst);
-    if constexpr (NEXT_SIZE > 0)
-      NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
-  }
-
-  template <typename DstAddrT, typename SrcAddrT>
-  static inline void move(DstAddrT dst, SrcAddrT src) {
-    const auto payload = nativeLoad(src);
-    if constexpr (NEXT_SIZE > 0)
-      NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
-    nativeStore(payload, dst);
-  }
-
-  template <typename DstAddrT>
-  static inline void set(DstAddrT dst, ubyte value) {
-    if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE &&
-                  DstAddrT::TEMPORALITY == Temporality::TEMPORAL) {
-      // delegate optimized set to compiler.
-      __builtin_memset_inline(dst.ptr(), static_cast<int>(value), Size);
-      return;
-    }
-    nativeStore(Backend::template splat<type>(value), dst);
-    if constexpr (NEXT_SIZE > 0)
-      NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value);
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
-    const uint64_t current =
-        Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2));
-    if constexpr (NEXT_SIZE > 0) {
-      // In the case where we cannot handle Size with single operation (e.g.
-      // Size == 3) we can either return early if current is non zero or
-      // aggregate all the operations through the bitwise or operator.
-      // We chose the later to reduce branching.
-      return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1),
-                                               offsetAddr<TYPE_SIZE>(src2)));
-    } else {
-      return current;
-    }
-  }
-
-  template <typename SrcAddrT1, typename SrcAddrT2>
-  static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
-    const auto a = nativeLoad(src1);
-    const auto b = nativeLoad(src2);
-    // If we cannot handle Size as a single operation we have two choices:
-    // - Either use Backend's threeWayCmp directly and return it is non
-    // zero.
-    //
-    //   if (int32_t res = Backend::template threeWayCmp<type>(a, b))
-    //     return res;
-    //
-    // - Or use Backend's notEquals first and use threeWayCmp only if
-    // 
diff erent, the assumption here is that notEquals is faster than
-    // threeWayCmp and that we can save cycles when the Size needs to be
-    // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1)
-    //
-    //   if (Backend::template notEquals<type>(a, b))
-    //     return Backend::template threeWayCmp<type>(a, b);
-    //
-    // We chose the former to reduce code bloat and branching.
-    if (int32_t res = Backend::template threeWayCmp<type>(a, b))
-      return res;
-    if constexpr (NEXT_SIZE > 0)
-      return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1),
-                                    offsetAddr<TYPE_SIZE>(src2));
-    return 0;
-  }
-
-  template <typename SrcAddrT> static Value load(SrcAddrT src) {
-    Value output;
-    copy(DstAddr<alignof(type)>(output.payload), src);
-    return output;
-  }
-
-  template <typename DstAddrT> static void store(DstAddrT dst, Value value) {
-    copy(dst, SrcAddr<alignof(type)>(value.payload));
-  }
-};
-
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H

diff  --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt
index 4d8e45d8cdce5..8f926273de5d5 100644
--- a/libc/test/src/string/memory_utils/CMakeLists.txt
+++ b/libc/test/src/string/memory_utils/CMakeLists.txt
@@ -3,8 +3,6 @@ add_libc_unittest(
   SUITE
     libc_string_unittests
   SRCS
-    address_test.cpp
-    backend_test.cpp
     elements_test.cpp
     memory_access_test.cpp
     utils_test.cpp
@@ -17,19 +15,3 @@ add_libc_unittest(
     libc.src.__support.CPP.array
     libc.src.__support.CPP.span
 )
-
-if(NOT LLVM_LIBC_FULL_BUILD)
-# Disabling this unittest in fullbuild mode as #include<sstream> is pulling an
-# incomplete pthread implementation from llvm-libc.
-add_libc_unittest(
-  algorithm_test
-  SUITE
-    libc_string_unittests
-  SRCS
-    algorithm_test.cpp
-  DEPENDS
-    libc.src.string.memory_utils.memory_utils
-    libc.src.__support.CPP.array
-    libc.src.__support.CPP.span
-)
-endif()

diff  --git a/libc/test/src/string/memory_utils/address_test.cpp b/libc/test/src/string/memory_utils/address_test.cpp
deleted file mode 100644
index fe9361ba573e5..0000000000000
--- a/libc/test/src/string/memory_utils/address_test.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "utils/UnitTest/Test.h"
-#include <src/string/memory_utils/address.h>
-
-namespace __llvm_libc {
-
-TEST(LlvmLibcAddress, AliasAreAddresses) {
-  ASSERT_TRUE(IsAddressType<SrcAddr<1>>::value);
-  ASSERT_TRUE(IsAddressType<DstAddr<1>>::value);
-  ASSERT_TRUE(IsAddressType<NtSrcAddr<1>>::value);
-  ASSERT_TRUE(IsAddressType<NtDstAddr<1>>::value);
-}
-
-TEST(LlvmLibcAddress, AliasHaveRightPermissions) {
-  ASSERT_TRUE(SrcAddr<1>::IS_READ);
-  ASSERT_TRUE(NtSrcAddr<1>::IS_READ);
-  ASSERT_TRUE(DstAddr<1>::IS_WRITE);
-  ASSERT_TRUE(NtDstAddr<1>::IS_WRITE);
-}
-
-TEST(LlvmLibcAddress, AliasHaveRightSemantic) {
-  ASSERT_EQ(SrcAddr<1>::TEMPORALITY, Temporality::TEMPORAL);
-  ASSERT_EQ(DstAddr<1>::TEMPORALITY, Temporality::TEMPORAL);
-  ASSERT_EQ(NtSrcAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL);
-  ASSERT_EQ(NtDstAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL);
-}
-
-TEST(LlvmLibcAddress, AliasHaveRightAlignment) {
-  ASSERT_EQ(SrcAddr<1>::ALIGNMENT, size_t(1));
-  ASSERT_EQ(SrcAddr<4>::ALIGNMENT, size_t(4));
-}
-
-TEST(LlvmLibcAddress, NarrowAlignment) {
-  // Address 8-byte aligned, offset by 8.
-  ASSERT_EQ(offsetAddr<8>(SrcAddr<8>(nullptr)).ALIGNMENT, size_t(8));
-  // Address 16-byte aligned, offset by 4.
-  ASSERT_EQ(offsetAddr<4>(SrcAddr<16>(nullptr)).ALIGNMENT, size_t(4));
-  // Address 4-byte aligned, offset by 16.
-  ASSERT_EQ(offsetAddr<16>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(4));
-  // Address 4-byte aligned, offset by 1.
-  ASSERT_EQ(offsetAddr<1>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(1));
-  // Address 4-byte aligned, offset by 2.
-  ASSERT_EQ(offsetAddr<2>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2));
-  // Address 4-byte aligned, offset by 6.
-  ASSERT_EQ(offsetAddr<6>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2));
-  // Address 4-byte aligned, offset by 10.
-  ASSERT_EQ(offsetAddr<10>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2));
-  // Address 8-byte aligned, offset by 6.
-  ASSERT_EQ(offsetAddr<6>(SrcAddr<8>(nullptr)).ALIGNMENT, size_t(2));
-}
-
-TEST(LlvmLibcAddress, OffsetAddr) {
-  ubyte a;
-  SrcAddr<1> addr(&a);
-  ASSERT_EQ((const void *)offsetAddr<4>(addr).ptr(), (const void *)(&a + 4));
-  ASSERT_EQ((const void *)offsetAddr<32>(addr).ptr(), (const void *)(&a + 32));
-}
-
-TEST(LlvmLibcAddress, AssumeAligned) {
-  SrcAddr<16> addr(nullptr);
-  ASSERT_EQ(offsetAddrAssumeAligned<8>(addr, 0).ALIGNMENT, size_t(8));
-  ASSERT_EQ(offsetAddrAssumeAligned<1>(addr, 0).ALIGNMENT, size_t(1));
-  ASSERT_EQ(offsetAddrMultiplesOf<4>(addr, 0).ALIGNMENT, size_t(4));
-  ASSERT_EQ(offsetAddrMultiplesOf<32>(addr, 0).ALIGNMENT, size_t(16));
-}
-
-TEST(LlvmLibcAddress, offsetAddrAssumeAligned) {
-  ubyte a;
-  SrcAddr<1> addr(&a);
-  ASSERT_EQ((const void *)offsetAddrAssumeAligned<1>(addr, 17).ptr(),
-            (const void *)(&a + 17));
-}
-
-TEST(LlvmLibcAddress, offsetAddrMultiplesOf) {
-  ubyte a;
-  SrcAddr<1> addr(&a);
-  ASSERT_EQ((const void *)offsetAddrMultiplesOf<4>(addr, 16).ptr(),
-            (const void *)(&a + 16));
-}
-
-} // namespace __llvm_libc

diff  --git a/libc/test/src/string/memory_utils/algorithm_test.cpp b/libc/test/src/string/memory_utils/algorithm_test.cpp
deleted file mode 100644
index d973fbcd5c19a..0000000000000
--- a/libc/test/src/string/memory_utils/algorithm_test.cpp
+++ /dev/null
@@ -1,566 +0,0 @@
-#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 0
-#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 0
-
-#include "utils/UnitTest/Test.h"
-#include <src/__support/CPP/array.h>
-#include <src/string/memory_utils/algorithm.h>
-#include <src/string/memory_utils/backends.h>
-
-#include <sstream>
-
-namespace __llvm_libc {
-
-struct alignas(64) Buffer : cpp::array<char, 128> {
-  bool contains(const char *ptr) const {
-    return ptr >= data() && ptr < (data() + size());
-  }
-  size_t getOffset(const char *ptr) const { return ptr - data(); }
-  void fill(char c) {
-    for (auto itr = begin(); itr != end(); ++itr)
-      *itr = c;
-  }
-};
-
-static Buffer buffer1;
-static Buffer buffer2;
-static std::ostringstream LOG;
-
-struct TestBackend {
-  static constexpr bool IS_BACKEND_TYPE = true;
-
-  template <typename T> static void log(const char *Action, const char *ptr) {
-    LOG << Action << "<" << sizeof(T) << "> ";
-    if (buffer1.contains(ptr))
-      LOG << "a[" << buffer1.getOffset(ptr) << "]";
-    else if (buffer2.contains(ptr))
-      LOG << "b[" << buffer2.getOffset(ptr) << "]";
-    LOG << "\n";
-  }
-
-  template <typename T, Temporality TS, Aligned AS>
-  static T load(const T *src) {
-    log<T>((AS == Aligned::YES ? "LdA" : "LdU"),
-           reinterpret_cast<const char *>(src));
-    return Scalar64BitBackend::load<T, TS, AS>(src);
-  }
-
-  template <typename T, Temporality TS, Aligned AS>
-  static void store(T *dst, T value) {
-    log<T>((AS == Aligned::YES ? "StA" : "StU"),
-           reinterpret_cast<const char *>(dst));
-    Scalar64BitBackend::store<T, TS, AS>(dst, value);
-  }
-
-  template <typename T> static inline T splat(ubyte value) {
-    LOG << "Splat<" << sizeof(T) << "> " << (unsigned)value << '\n';
-    return Scalar64BitBackend::splat<T>(value);
-  }
-
-  template <typename T> static inline uint64_t notEquals(T v1, T v2) {
-    LOG << "Neq<" << sizeof(T) << ">\n";
-    return Scalar64BitBackend::notEquals<T>(v1, v2);
-  }
-
-  template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
-    LOG << "Diff<" << sizeof(T) << ">\n";
-    return Scalar64BitBackend::threeWayCmp<T>(v1, v2);
-  }
-
-  template <size_t Size>
-  using getNextType = Scalar64BitBackend::getNextType<Size>;
-};
-
-struct LlvmLibcAlgorithm : public testing::Test {
-  void SetUp() override {
-    LOG = std::ostringstream();
-    LOG << '\n';
-  }
-
-  void fillEqual() {
-    buffer1.fill('a');
-    buffer2.fill('a');
-  }
-
-  void fillDifferent() {
-    buffer1.fill('a');
-    buffer2.fill('b');
-  }
-
-  const char *getTrace() {
-    trace_ = LOG.str();
-    return trace_.c_str();
-  }
-
-  const char *stripComments(const char *expected) {
-    expected_.clear();
-    std::stringstream ss(expected);
-    std::string line;
-    while (std::getline(ss, line, '\n')) {
-      const auto pos = line.find('#');
-      if (pos == std::string::npos) {
-        expected_ += line;
-      } else {
-        auto log = line.substr(0, pos);
-        while (!log.empty() && std::isspace(log.back()))
-          log.pop_back();
-        expected_ += log;
-      }
-      expected_ += '\n';
-    }
-    return expected_.c_str();
-  }
-
-  template <size_t Align = 1> SrcAddr<Align> buf1(size_t offset = 0) const {
-    return buffer1.data() + offset;
-  }
-  template <size_t Align = 1> SrcAddr<Align> buf2(size_t offset = 0) const {
-    return buffer2.data() + offset;
-  }
-  template <size_t Align = 1> DstAddr<Align> dst(size_t offset = 0) const {
-    return buffer1.data() + offset;
-  }
-  template <size_t Align = 1> SrcAddr<Align> src(size_t offset = 0) const {
-    return buffer2.data() + offset;
-  }
-
-private:
-  std::string trace_;
-  std::string expected_;
-};
-
-using _8 = SizedOp<TestBackend, 8>;
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing fixed fized forward operations
-///////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////
-// Copy
-
-TEST_F(LlvmLibcAlgorithm, copy_1) {
-  SizedOp<TestBackend, 1>::copy(dst(), src());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> b[0]
-StU<1> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, copy_15) {
-  SizedOp<TestBackend, 15>::copy(dst(), src());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0]
-LdU<4> b[8]
-StU<4> a[8]
-LdU<2> b[12]
-StU<2> a[12]
-LdU<1> b[14]
-StU<1> a[14]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, copy_16) {
-  SizedOp<TestBackend, 16>::copy(dst(), src());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0]
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Move
-
-TEST_F(LlvmLibcAlgorithm, move_1) {
-  SizedOp<TestBackend, 1>::move(dst(), src());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> b[0]
-StU<1> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, move_15) {
-  SizedOp<TestBackend, 15>::move(dst(), src());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-LdU<4> b[8]
-LdU<2> b[12]
-LdU<1> b[14]
-StU<1> a[14]
-StU<2> a[12]
-StU<4> a[8]
-StU<8> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, move_16) {
-  SizedOp<TestBackend, 16>::move(dst(), src());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-LdU<8> b[8]
-StU<8> a[8]
-StU<8> a[0]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// set
-
-TEST_F(LlvmLibcAlgorithm, set_1) {
-  SizedOp<TestBackend, 1>::set(dst(), ubyte{42});
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<1> 42
-StU<1> a[0]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, set_15) {
-  SizedOp<TestBackend, 15>::set(dst(), ubyte{42});
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<8> 42
-StU<8> a[0]
-Splat<4> 42
-StU<4> a[8]
-Splat<2> 42
-StU<2> a[12]
-Splat<1> 42
-StU<1> a[14]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, set_16) {
-  SizedOp<TestBackend, 16>::set(dst(), ubyte{42});
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<8> 42
-StU<8> a[0]
-Splat<8> 42
-StU<8> a[8]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// 
diff erent
-
-TEST_F(LlvmLibcAlgorithm, 
diff erent_1) {
-  fillEqual();
-  SizedOp<TestBackend, 1>::isDifferent(buf1(), buf2());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[0]
-LdU<1> b[0]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, 
diff erent_15) {
-  fillEqual();
-  SizedOp<TestBackend, 15>::isDifferent(buf1(), buf2());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Neq<8>
-LdU<4> a[8]
-LdU<4> b[8]
-Neq<4>
-LdU<2> a[12]
-LdU<2> b[12]
-Neq<2>
-LdU<1> a[14]
-LdU<1> b[14]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, 
diff erent_15_no_shortcircuit) {
-  fillDifferent();
-  SizedOp<TestBackend, 15>::isDifferent(buf1(), buf2());
-  // If buffer compare isDifferent we continue to aggregate.
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Neq<8>
-LdU<4> a[8]
-LdU<4> b[8]
-Neq<4>
-LdU<2> a[12]
-LdU<2> b[12]
-Neq<2>
-LdU<1> a[14]
-LdU<1> b[14]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, 
diff erent_16) {
-  fillEqual();
-  SizedOp<TestBackend, 16>::isDifferent(buf1(), buf2());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Neq<8>
-LdU<8> a[8]
-LdU<8> b[8]
-Neq<8>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// three_way_cmp
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_1) {
-  fillEqual();
-  SizedOp<TestBackend, 1>::threeWayCmp(buf1(), buf2());
-  // Buffer compare equal, returning 0 and no call to Diff.
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[0]
-LdU<1> b[0]
-Diff<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_15) {
-  fillEqual();
-  SizedOp<TestBackend, 15>::threeWayCmp(buf1(), buf2());
-  // Buffer compare equal, returning 0 and no call to Diff.
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Diff<8>
-LdU<4> a[8]
-LdU<4> b[8]
-Diff<4>
-LdU<2> a[12]
-LdU<2> b[12]
-Diff<2>
-LdU<1> a[14]
-LdU<1> b[14]
-Diff<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_neq_15_shortcircuit) {
-  fillDifferent();
-  SizedOp<TestBackend, 16>::threeWayCmp(buf1(), buf2());
-  // If buffer compare isDifferent we stop early.
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Diff<8>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_16) {
-  fillEqual();
-  SizedOp<TestBackend, 16>::threeWayCmp(buf1(), buf2());
-  // Buffer compare equal, returning 0 and no call to Diff.
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[0]
-LdU<8> b[0]
-Diff<8>
-LdU<8> a[8]
-LdU<8> b[8]
-Diff<8>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing skip operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, skip_and_set) {
-  Skip<11>::Then<SizedOp<TestBackend, 1>>::set(dst(), ubyte{42});
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<1> 42
-StU<1> a[11]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, skip_and_
diff erent_1) {
-  Skip<11>::Then<SizedOp<TestBackend, 1>>::isDifferent(buf1(), buf2());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[11]
-LdU<1> b[11]
-Neq<1>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, skip_and_three_way_cmp_8) {
-  Skip<11>::Then<SizedOp<TestBackend, 1>>::threeWayCmp(buf1(), buf2());
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<1> a[11]
-LdU<1> b[11]
-Diff<1>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing tail operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, tail_copy_8) {
-  Tail<_8>::copy(dst(), src(), 16);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_move_8) {
-  Tail<_8>::move(dst(), src(), 16);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_set_8) {
-  Tail<_8>::set(dst(), ubyte{42}, 16);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-Splat<8> 42
-StU<8> a[8]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_
diff erent_8) {
-  fillEqual();
-  Tail<_8>::isDifferent(buf1(), buf2(), 16);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[8]
-LdU<8> b[8]
-Neq<8>
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, tail_three_way_cmp_8) {
-  fillEqual();
-  Tail<_8>::threeWayCmp(buf1(), buf2(), 16);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> a[8]
-LdU<8> b[8]
-Diff<8>
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing HeadTail operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, head_tail_copy_8) {
-  HeadTail<_8>::copy(dst(), src(), 16);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0]
-LdU<8> b[8]
-StU<8> a[8]
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing Loop operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, loop_copy_one_iteration_and_tail) {
-  Loop<_8>::copy(dst(), src(), 10);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # covers 0-7
-LdU<8> b[2]
-StU<8> a[2] # covers 2-9
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, loop_copy_two_iteration_and_tail) {
-  Loop<_8>::copy(dst(), src(), 17);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # covers 0-7
-LdU<8> b[8]
-StU<8> a[8] # covers 8-15
-LdU<8> b[9]
-StU<8> a[9] # covers 9-16
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, loop_with_one_turn_is_inefficient_but_ok) {
-  Loop<_8>::copy(dst(), src(), 8);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # first iteration covers 0-7
-LdU<8> b[0] # tail also covers 0-7 but since Loop is supposed to be used
-StU<8> a[0] # with a sufficient number of iterations the tail cost is amortised
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, loop_with_round_number_of_turn) {
-  Loop<_8>::copy(dst(), src(), 24);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StU<8> a[0] # first iteration covers 0-7
-LdU<8> b[8]
-StU<8> a[8] # second iteration covers 8-15
-LdU<8> b[16]
-StU<8> a[16]
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, dst_aligned_loop) {
-  Loop<_8>::copy(dst<16>(), src(), 23);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[0]
-StA<8> a[0] # store is aligned on 16B
-LdU<8> b[8]
-StA<8> a[8] # subsequent stores are aligned
-LdU<8> b[15]
-StU<8> a[15] # Tail is always unaligned
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, aligned_loop) {
-  Loop<_8>::copy(dst<16>(), src<8>(), 23);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdA<8> b[0] # load is aligned on 8B
-StA<8> a[0] # store is aligned on 16B
-LdA<8> b[8] # subsequent loads are aligned
-StA<8> a[8] # subsequent stores are aligned
-LdU<8> b[15] # Tail is always unaligned
-StU<8> a[15] # Tail is always unaligned
-)"));
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//// Testing Align operations
-///////////////////////////////////////////////////////////////////////////////
-
-TEST_F(LlvmLibcAlgorithm, align_dst_copy_8) {
-  Align<_8, Arg::Dst>::Then<Loop<_8>>::copy(dst(2), src(3), 31);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[3]
-StU<8> a[2] # First store covers unaligned bytes
-LdU<8> b[9]
-StA<8> a[8] # First aligned store
-LdU<8> b[17]
-StA<8> a[16] # Subsequent stores are aligned
-LdU<8> b[25]
-StA<8> a[24] # Subsequent stores are aligned
-LdU<8> b[26]
-StU<8> a[25] # Last store covers remaining bytes
-)"));
-}
-
-TEST_F(LlvmLibcAlgorithm, align_src_copy_8) {
-  Align<_8, Arg::Src>::Then<Loop<_8>>::copy(dst(2), src(3), 31);
-  EXPECT_STREQ(getTrace(), stripComments(R"(
-LdU<8> b[3] # First load covers unaligned bytes
-StU<8> a[2]
-LdA<8> b[8] # First aligned load
-StU<8> a[7]
-LdA<8> b[16] # Subsequent loads are aligned
-StU<8> a[15]
-LdA<8> b[24] # Subsequent loads are aligned
-StU<8> a[23]
-LdU<8> b[26] # Last load covers remaining bytes
-StU<8> a[25]
-)"));
-}
-
-} // namespace __llvm_libc

diff  --git a/libc/test/src/string/memory_utils/backend_test.cpp b/libc/test/src/string/memory_utils/backend_test.cpp
deleted file mode 100644
index 72fb7c4cf53b1..0000000000000
--- a/libc/test/src/string/memory_utils/backend_test.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-//===-- Unittests for backends --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/CPP/array.h"
-#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/span.h"
-#include "src/__support/architectures.h"
-#include "src/string/memory_utils/backends.h"
-#include "utils/UnitTest/Test.h"
-#include <string.h>
-
-namespace __llvm_libc {
-
-template <size_t Size> using Buffer = cpp::array<char, Size>;
-
-static char GetRandomChar() {
-  // Implementation of C++ minstd_rand seeded with 123456789.
-  // https://en.cppreference.com/w/cpp/numeric/random
-  // "Minimum standard", recommended by Park, Miller, and Stockmeyer in 1993
-  static constexpr const uint64_t a = 48271;
-  static constexpr const uint64_t c = 0;
-  static constexpr const uint64_t m = 2147483647;
-  static uint64_t seed = 123456789;
-  seed = (a * seed + c) % m;
-  return seed;
-}
-
-static void Randomize(cpp::span<char> buffer) {
-  for (auto &current : buffer)
-    current = GetRandomChar();
-}
-
-template <size_t Size> static Buffer<Size> GetRandomBuffer() {
-  Buffer<Size> buffer;
-  Randomize(buffer);
-  return buffer;
-}
-
-template <typename Backend, size_t Size> struct Conf {
-  static_assert(Backend::IS_BACKEND_TYPE);
-  using BufferT = Buffer<Size>;
-  using T = typename Backend::template getNextType<Size>;
-  static_assert(sizeof(T) == Size);
-  static constexpr size_t SIZE = Size;
-
-  static BufferT splat(ubyte value) {
-    return cpp::bit_cast<BufferT>(Backend::template splat<T>(value));
-  }
-
-  static uint64_t notEquals(const BufferT &v1, const BufferT &v2) {
-    return Backend::template notEquals<T>(cpp::bit_cast<T>(v1),
-                                          cpp::bit_cast<T>(v2));
-  }
-
-  static int32_t threeWayCmp(const BufferT &v1, const BufferT &v2) {
-    return Backend::template threeWayCmp<T>(cpp::bit_cast<T>(v1),
-                                            cpp::bit_cast<T>(v2));
-  }
-};
-
-using FunctionTypes = testing::TypeList< //
-#if defined(LLVM_LIBC_ARCH_X86)          //
-    Conf<X86Backend, 1>,                 //
-    Conf<X86Backend, 2>,                 //
-    Conf<X86Backend, 4>,                 //
-    Conf<X86Backend, 8>,                 //
-#if HAS_M128
-    Conf<X86Backend, 16>, //
-#endif
-#if HAS_M256
-    Conf<X86Backend, 32>, //
-#endif
-#if HAS_M512
-    Conf<X86Backend, 64>, //
-#endif
-#endif                           // defined(LLVM_LIBC_ARCH_X86)
-    Conf<Scalar64BitBackend, 1>, //
-    Conf<Scalar64BitBackend, 2>, //
-    Conf<Scalar64BitBackend, 4>, //
-    Conf<Scalar64BitBackend, 8>  //
-    >;
-
-TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) {
-  for (auto value : cpp::array<uint8_t, 3>{0u, 1u, 255u}) {
-    alignas(64) const auto stored =
-        ParamType::splat(cpp::bit_cast<ubyte>(value));
-    for (size_t i = 0; i < ParamType::SIZE; ++i)
-      EXPECT_EQ(cpp::bit_cast<uint8_t>(stored[i]), value);
-  }
-}
-
-TYPED_TEST(LlvmLibcMemoryBackend, notEquals, FunctionTypes) {
-  alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
-  EXPECT_EQ(ParamType::notEquals(a, a), uint64_t(0));
-  for (size_t i = 0; i < a.size(); ++i) {
-    alignas(64) auto b = a;
-    ++b[i];
-    EXPECT_NE(ParamType::notEquals(a, b), uint64_t(0));
-    EXPECT_NE(ParamType::notEquals(b, a), uint64_t(0));
-  }
-}
-
-TYPED_TEST(LlvmLibcMemoryBackend, threeWayCmp, FunctionTypes) {
-  alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
-  EXPECT_EQ(ParamType::threeWayCmp(a, a), 0);
-  for (size_t i = 0; i < a.size(); ++i) {
-    alignas(64) auto b = a;
-    ++b[i];
-    const auto cmp = memcmp(&a, &b, sizeof(a));
-    ASSERT_NE(cmp, 0);
-    if (cmp > 0) {
-      EXPECT_GT(ParamType::threeWayCmp(a, b), 0);
-      EXPECT_LT(ParamType::threeWayCmp(b, a), 0);
-    } else {
-      EXPECT_LT(ParamType::threeWayCmp(a, b), 0);
-      EXPECT_GT(ParamType::threeWayCmp(b, a), 0);
-    }
-  }
-}
-
-template <typename Backend, size_t Size, Temporality TS, Aligned AS>
-struct LoadStoreConf {
-  static_assert(Backend::IS_BACKEND_TYPE);
-  using BufferT = Buffer<Size>;
-  using T = typename Backend::template getNextType<Size>;
-  static_assert(sizeof(T) == Size);
-  static constexpr size_t SIZE = Size;
-
-  static BufferT load(const BufferT &ref) {
-    const auto *ptr = cpp::bit_cast<const T *>(ref.data());
-    const T value = Backend::template load<T, TS, AS>(ptr);
-    return cpp::bit_cast<BufferT>(value);
-  }
-
-  static void store(BufferT &ref, const BufferT value) {
-    auto *ptr = cpp::bit_cast<T *>(ref.data());
-    Backend::template store<T, TS, AS>(ptr, cpp::bit_cast<T>(value));
-  }
-};
-
-using LoadStoreTypes = testing::TypeList<                              //
-#if defined(LLVM_LIBC_ARCH_X86)                                        //
-    LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::NO>,  //
-    LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::YES>, //
-    LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::NO>,  //
-    LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::YES>, //
-    LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::NO>,  //
-    LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::YES>, //
-    LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::NO>,  //
-    LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::YES>, //
-#if HAS_M128
-    LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::NO>,      //
-    LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::YES>,     //
-    LoadStoreConf<X86Backend, 16, Temporality::NON_TEMPORAL, Aligned::YES>, //
-#endif
-#if HAS_M256
-    LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::NO>,      //
-    LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::YES>,     //
-    LoadStoreConf<X86Backend, 32, Temporality::NON_TEMPORAL, Aligned::YES>, //
-#endif
-#if HAS_M512
-    LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::NO>,      //
-    LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::YES>,     //
-    LoadStoreConf<X86Backend, 64, Temporality::NON_TEMPORAL, Aligned::YES>, //
-#endif
-#endif // defined(LLVM_LIBC_ARCH_X86)
-    LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL, Aligned::NO>, //
-    LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL,
-                  Aligned::YES>,                                              //
-    LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL, Aligned::NO>, //
-    LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL,
-                  Aligned::YES>,                                              //
-    LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL, Aligned::NO>, //
-    LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL,
-                  Aligned::YES>,                                              //
-    LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::NO>, //
-    LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::YES> //
-    >;
-
-TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) {
-  alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
-  const auto loaded = ParamType::load(expected);
-  for (size_t i = 0; i < ParamType::SIZE; ++i)
-    EXPECT_EQ(loaded[i], expected[i]);
-}
-
-TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) {
-  alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
-  alignas(64) typename ParamType::BufferT stored;
-  ParamType::store(stored, expected);
-  for (size_t i = 0; i < ParamType::SIZE; ++i)
-    EXPECT_EQ(stored[i], expected[i]);
-}
-
-} // namespace __llvm_libc