[libc-commits] [libc] 67fe3bd - [libc][mem*] Introduce Sized/Backends for new mem framework
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Wed Jun 22 04:21:19 PDT 2022
Author: Guillaume Chatelet
Date: 2022-06-22T11:21:06Z
New Revision: 67fe3bd33ce7698e35a98f165ec6ba6090aeff85
URL: https://github.com/llvm/llvm-project/commit/67fe3bd33ce7698e35a98f165ec6ba6090aeff85
DIFF: https://github.com/llvm/llvm-project/commit/67fe3bd33ce7698e35a98f165ec6ba6090aeff85.diff
LOG: [libc][mem*] Introduce Sized/Backends for new mem framework
This patch is a subpart of D125768 intented to make the review easier.
The `SizedOp` struct represents operations to be performed on a certain number of bytes.
It is responsible for breaking them down into platform types and forwarded to the `Backend`.
The `Backend` struct represents a lower level abstraction that works only on types (`uint8_t`, `__m128i`, ...).
It is similar to instruction selection.
Differential Revision: https://reviews.llvm.org/D126768
Added:
libc/src/string/memory_utils/backend_aarch64.h
libc/src/string/memory_utils/backend_scalar.h
libc/src/string/memory_utils/backend_x86.h
libc/src/string/memory_utils/backends.h
libc/src/string/memory_utils/sized_op.h
libc/test/src/string/memory_utils/backend_test.cpp
Modified:
libc/test/src/string/memory_utils/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/src/string/memory_utils/backend_aarch64.h b/libc/src/string/memory_utils/backend_aarch64.h
new file mode 100644
index 0000000000000..7dc9b33c17b47
--- /dev/null
+++ b/libc/src/string/memory_utils/backend_aarch64.h
@@ -0,0 +1,71 @@
+//===-- Elementary operations for aarch64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
+
+#if !defined(LLVM_LIBC_ARCH_AARCH64)
+#include "src/string/memory_utils/backend_scalar.h"
+
+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#endif
+
+namespace __llvm_libc {
+
+struct Aarch64Backend : public Scalar64BitBackend {
+ static constexpr bool IS_BACKEND_TYPE = true;
+
+ template <typename T, Temporality TS, Aligned AS,
+ cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline T load(const T *src) {
+ return Scalar64BitBackend::template load<T, TS, AS>(src);
+ }
+};
+
+// Implementation of the SizedOp abstraction for the set operation.
+struct Zva64 {
+ static constexpr size_t SIZE = 64;
+
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value) {
+#if __SIZEOF_POINTER__ == 4
+ asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
+#else
+ asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
+#endif
+ }
+};
+
+inline static bool hasZva() {
+ uint64_t zva_val;
+ asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
+ // DC ZVA is permitted if DZP, bit [4] is zero.
+ // BS, bits [3:0] is log2 of the block size in words.
+ // So the next line checks whether the instruction is permitted and block size
+ // is 16 words (i.e. 64 bytes).
+ return (zva_val & 0b11111) == 0b00100;
+}
+
+namespace aarch64 {
+using _1 = SizedOp<Aarch64Backend, 1>;
+using _2 = SizedOp<Aarch64Backend, 2>;
+using _3 = SizedOp<Aarch64Backend, 3>;
+using _4 = SizedOp<Aarch64Backend, 4>;
+using _8 = SizedOp<Aarch64Backend, 8>;
+using _16 = SizedOp<Aarch64Backend, 16>;
+using _32 = SizedOp<Aarch64Backend, 32>;
+using _64 = SizedOp<Aarch64Backend, 64>;
+using _128 = SizedOp<Aarch64Backend, 128>;
+} // namespace aarch64
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_ARCH_AARCH64
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
diff --git a/libc/src/string/memory_utils/backend_scalar.h b/libc/src/string/memory_utils/backend_scalar.h
new file mode 100644
index 0000000000000..00b8ed66d9cf1
--- /dev/null
+++ b/libc/src/string/memory_utils/backend_scalar.h
@@ -0,0 +1,104 @@
+//===-- Elementary operations for native scalar types ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
+
+#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType
+#include "src/__support/endian.h"
+
+namespace __llvm_libc {
+
+struct Scalar64BitBackend {
+ static constexpr bool IS_BACKEND_TYPE = true;
+
+ template <typename T>
+ static constexpr bool IsScalarType =
+ cpp::IsSameV<T, uint8_t> || cpp::IsSameV<T, uint16_t> ||
+ cpp::IsSameV<T, uint32_t> || cpp::IsSameV<T, uint64_t>;
+
+ template <typename T, Temporality TS, Aligned AS>
+ static inline T load(const T *src) {
+ static_assert(IsScalarType<T>);
+ static_assert(TS == Temporality::TEMPORAL,
+ "Scalar load does not support non-temporal access");
+ return *src;
+ }
+
+ template <typename T, Temporality TS, Aligned AS>
+ static inline void store(T *dst, T value) {
+ static_assert(IsScalarType<T>);
+ static_assert(TS == Temporality::TEMPORAL,
+ "Scalar store does not support non-temporal access");
+ *dst = value;
+ }
+
+ template <typename T> static inline T splat(ubyte value) {
+ static_assert(IsScalarType<T>);
+ return (T(~0ULL) / T(0xFF)) * T(value);
+ }
+
+ template <typename T> static inline uint64_t notEquals(T v1, T v2) {
+ static_assert(IsScalarType<T>);
+ return v1 ^ v2;
+ }
+
+ template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
+ DeferredStaticAssert("not implemented");
+ }
+
+ // Returns the type to use to consume Size bytes.
+ template <size_t Size>
+ using getNextType = cpp::ConditionalType<
+ Size >= 8, uint64_t,
+ cpp::ConditionalType<Size >= 4, uint32_t,
+ cpp::ConditionalType<Size >= 2, uint16_t, uint8_t>>>;
+};
+
+template <>
+int32_t inline Scalar64BitBackend::threeWayCmp<uint8_t>(uint8_t a, uint8_t b) {
+ const int16_t la = Endian::to_big_endian(a);
+ const int16_t lb = Endian::to_big_endian(b);
+ return la - lb;
+}
+template <>
+int32_t inline Scalar64BitBackend::threeWayCmp<uint16_t>(uint16_t a,
+ uint16_t b) {
+ const int32_t la = Endian::to_big_endian(a);
+ const int32_t lb = Endian::to_big_endian(b);
+ return la - lb;
+}
+template <>
+int32_t inline Scalar64BitBackend::threeWayCmp<uint32_t>(uint32_t a,
+ uint32_t b) {
+ const uint32_t la = Endian::to_big_endian(a);
+ const uint32_t lb = Endian::to_big_endian(b);
+ return la > lb ? 1 : la < lb ? -1 : 0;
+}
+template <>
+int32_t inline Scalar64BitBackend::threeWayCmp<uint64_t>(uint64_t a,
+ uint64_t b) {
+ const uint64_t la = Endian::to_big_endian(a);
+ const uint64_t lb = Endian::to_big_endian(b);
+ return la > lb ? 1 : la < lb ? -1 : 0;
+}
+
+namespace scalar {
+using _1 = SizedOp<Scalar64BitBackend, 1>;
+using _2 = SizedOp<Scalar64BitBackend, 2>;
+using _3 = SizedOp<Scalar64BitBackend, 3>;
+using _4 = SizedOp<Scalar64BitBackend, 4>;
+using _8 = SizedOp<Scalar64BitBackend, 8>;
+using _16 = SizedOp<Scalar64BitBackend, 16>;
+using _32 = SizedOp<Scalar64BitBackend, 32>;
+using _64 = SizedOp<Scalar64BitBackend, 64>;
+using _128 = SizedOp<Scalar64BitBackend, 128>;
+} // namespace scalar
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
diff --git a/libc/src/string/memory_utils/backend_x86.h b/libc/src/string/memory_utils/backend_x86.h
new file mode 100644
index 0000000000000..aee1d2275e0ba
--- /dev/null
+++ b/libc/src/string/memory_utils/backend_x86.h
@@ -0,0 +1,221 @@
+//===-- Elementary operations for x86 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
+
+#if defined(LLVM_LIBC_ARCH_X86)
+#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType
+#include "src/string/memory_utils/backend_scalar.h"
+
+#ifdef __SSE2__
+#include <immintrin.h>
+#endif // __SSE2__
+
+#if defined(__SSE2__)
+#define HAS_M128 true
+#else
+#define HAS_M128 false
+#endif
+
+#if defined(__AVX2__)
+#define HAS_M256 true
+#else
+#define HAS_M256 false
+#endif
+
+#if defined(__AVX512F__) and defined(__AVX512BW__)
+#define HAS_M512 true
+#else
+#define HAS_M512 false
+#endif
+
+namespace __llvm_libc {
+struct X86Backend : public Scalar64BitBackend {
+ static constexpr bool IS_BACKEND_TYPE = true;
+
+ // Scalar types use base class implementations.
+ template <typename T, Temporality TS, Aligned AS,
+ cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline T load(const T *src) {
+ return Scalar64BitBackend::template load<T, TS, AS>(src);
+ }
+
+ // Scalar types use base class implementations.
+ template <typename T, Temporality TS, Aligned AS,
+ cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline void store(T *dst, T value) {
+ Scalar64BitBackend::template store<T, TS, AS>(dst, value);
+ }
+
+ // Scalar types use base class implementations.
+ template <typename T,
+ cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline uint64_t notEquals(T v1, T v2) {
+ return Scalar64BitBackend::template notEquals<T>(v1, v2);
+ }
+
+ // Scalar types use base class implementations.
+ template <typename T,
+ cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline T splat(ubyte value) {
+ return Scalar64BitBackend::template splat<T>(value);
+ }
+
+ // Scalar types use base class implementations.
+ template <typename T,
+ cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline int32_t threeWayCmp(T v1, T v2) {
+ return Scalar64BitBackend::template threeWayCmp<T>(v1, v2);
+ }
+
+ // X86 types are specialized below.
+ template <
+ typename T, Temporality TS, Aligned AS,
+ cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline T load(const T *src);
+
+ // X86 types are specialized below.
+ template <
+ typename T, Temporality TS, Aligned AS,
+ cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
+ static inline void store(T *dst, T value);
+
+ // X86 types are specialized below.
+ template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>,
+ bool> = true>
+ static inline T splat(ubyte value);
+
+ // X86 types are specialized below.
+ template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>,
+ bool> = true>
+ static inline uint64_t notEquals(T v1, T v2);
+
+ template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>,
+ bool> = true>
+ static inline int32_t threeWayCmp(T v1, T v2) {
+ return char_
diff (reinterpret_cast<char *>(&v1),
+ reinterpret_cast<char *>(&v2), notEquals(v1, v2));
+ }
+
+ // Returns the type to use to consume Size bytes.
+ template <size_t Size>
+ using getNextType = cpp::ConditionalType<
+ (HAS_M512 && Size >= 64), __m512i,
+ cpp::ConditionalType<
+ (HAS_M256 && Size >= 32), __m256i,
+ cpp::ConditionalType<(HAS_M128 && Size >= 16), __m128i,
+ Scalar64BitBackend::getNextType<Size>>>>;
+
+private:
+ static inline int32_t char_
diff (const char *a, const char *b, uint64_t mask) {
+ const size_t
diff _index = mask == 0 ? 0 : __builtin_ctzll(mask);
+ const int16_t ca = (unsigned char)a[
diff _index];
+ const int16_t cb = (unsigned char)b[
diff _index];
+ return ca - cb;
+ }
+};
+
+static inline void repmovsb(void *dst, const void *src, size_t runtime_size) {
+ asm volatile("rep movsb"
+ : "+D"(dst), "+S"(src), "+c"(runtime_size)
+ :
+ : "memory");
+}
+
+#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \
+ template <> inline T X86Backend::load<T, OS, AS>(const T *src) { \
+ return INTRISIC(const_cast<T *>(src)); \
+ }
+#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \
+ template <> inline void X86Backend::store<T, OS, AS>(T * dst, T value) { \
+ INTRISIC(dst, value); \
+ }
+
+#if HAS_M128
+SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128)
+SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128)
+SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
+ _mm_stream_load_si128)
+// X86 non-temporal load needs aligned access
+SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128)
+SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128)
+SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
+ _mm_stream_si128)
+// X86 non-temporal store needs aligned access
+template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) {
+ return _mm_set1_epi8(__builtin_bit_cast(char, value));
+}
+template <>
+inline uint64_t X86Backend::notEquals<__m128i>(__m128i a, __m128i b) {
+ using T = char __attribute__((__vector_size__(16)));
+ return _mm_movemask_epi8(T(a) != T(b));
+}
+#endif // HAS_M128
+
+#if HAS_M256
+SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256)
+SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256)
+SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
+ _mm256_stream_load_si256)
+// X86 non-temporal load needs aligned access
+SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES,
+ _mm256_store_si256)
+SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO,
+ _mm256_storeu_si256)
+SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
+ _mm256_stream_si256)
+// X86 non-temporal store needs aligned access
+template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) {
+ return _mm256_set1_epi8(__builtin_bit_cast(char, value));
+}
+template <>
+inline uint64_t X86Backend::notEquals<__m256i>(__m256i a, __m256i b) {
+ using T = char __attribute__((__vector_size__(32)));
+ return _mm256_movemask_epi8(T(a) != T(b));
+}
+#endif // HAS_M256
+
+#if HAS_M512
+SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512)
+SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512)
+SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
+ _mm512_stream_load_si512)
+// X86 non-temporal load needs aligned access
+SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES,
+ _mm512_store_si512)
+SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO,
+ _mm512_storeu_si512)
+SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
+ _mm512_stream_si512)
+// X86 non-temporal store needs aligned access
+template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) {
+ return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value)));
+}
+template <>
+inline uint64_t X86Backend::notEquals<__m512i>(__m512i a, __m512i b) {
+ return _mm512_cmpneq_epi8_mask(a, b);
+}
+#endif // HAS_M512
+
+namespace x86 {
+using _1 = SizedOp<X86Backend, 1>;
+using _2 = SizedOp<X86Backend, 2>;
+using _3 = SizedOp<X86Backend, 3>;
+using _4 = SizedOp<X86Backend, 4>;
+using _8 = SizedOp<X86Backend, 8>;
+using _16 = SizedOp<X86Backend, 16>;
+using _32 = SizedOp<X86Backend, 32>;
+using _64 = SizedOp<X86Backend, 64>;
+using _128 = SizedOp<X86Backend, 128>;
+} // namespace x86
+
+} // namespace __llvm_libc
+
+#endif // defined(LLVM_LIBC_ARCH_X86)
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
diff --git a/libc/src/string/memory_utils/backends.h b/libc/src/string/memory_utils/backends.h
new file mode 100644
index 0000000000000..6d241fa5eb289
--- /dev/null
+++ b/libc/src/string/memory_utils/backends.h
@@ -0,0 +1,60 @@
+//===-- Elementary operations to compose memory primitives ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the concept of a Backend.
+// It constitutes the lowest level of the framework and is akin to instruction
+// selection. It defines how to implement aligned/unaligned,
+// temporal/non-temporal native loads and stores for a particular architecture
+// as well as efficient ways to fill and compare types.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
+
+#include "src/string/memory_utils/address.h" // Temporality, Aligned
+#include "src/string/memory_utils/sized_op.h" // SizedOp
+#include <stddef.h> // size_t
+#include <stdint.h> // uint##_t
+
+namespace __llvm_libc {
+
+// Backends must implement the following interface.
+struct NoBackend {
+ static constexpr bool IS_BACKEND_TYPE = true;
+
+ // Loads a T from `src` honoring Temporality and Alignment.
+ template <typename T, Temporality, Aligned> static T load(const T *src);
+
+ // Stores a T to `dst` honoring Temporality and Alignment.
+ template <typename T, Temporality, Aligned>
+ static void store(T *dst, T value);
+
+ // Returns a T filled with `value` bytes.
+ template <typename T> static T splat(ubyte value);
+
+ // Returns zero iff v1 == v2.
+ template <typename T> static uint64_t notEquals(T v1, T v2);
+
+ // Returns zero iff v1 == v2, a negative number if v1 < v2 and a positive
+ // number otherwise.
+ template <typename T> static int32_t threeWayCmp(T v1, T v2);
+
+ // Returns the type to use to consume Size bytes.
+ // If no type handles Size bytes at once
+ template <size_t Size> using getNextType = void;
+};
+
+} // namespace __llvm_libc
+
+// We inline all backend implementations here to simplify the build system.
+// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef.
+#include "src/string/memory_utils/backend_aarch64.h"
+#include "src/string/memory_utils/backend_scalar.h"
+#include "src/string/memory_utils/backend_x86.h"
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h
new file mode 100644
index 0000000000000..12ace7cc6bdfc
--- /dev/null
+++ b/libc/src/string/memory_utils/sized_op.h
@@ -0,0 +1,177 @@
+//===-- Sized Operations --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SizedOp struct that serves as the middle end of the
+// framework. It implements sized memory operations by breaking them down into
+// simpler types whose availability is described in the Backend. It also
+// provides a way to load and store sized chunks of memory (necessary for the
+// move operation). SizedOp are the building blocks of higher order algorithms
+// like HeadTail, Align or Loop.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
+
+#include <stddef.h> // size_t
+
+#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
+#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \
+ __has_builtin(__builtin_memcpy_inline)
+#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
+
+#ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
+#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \
+ __has_builtin(__builtin_memset_inline)
+#endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
+
+namespace __llvm_libc {
+
+template <typename Backend, size_t Size> struct SizedOp {
+ static constexpr size_t SIZE = Size;
+
+private:
+ static_assert(Backend::IS_BACKEND_TYPE);
+ static_assert(SIZE > 0);
+ using type = typename Backend::template getNextType<Size>;
+ static constexpr size_t TYPE_SIZE = sizeof(type);
+ static_assert(SIZE >= TYPE_SIZE);
+ static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE;
+ using NextBlock = SizedOp<Backend, NEXT_SIZE>;
+
+ // Returns whether we can use an aligned operations.
+ // This is possible because the address type carries known compile-time
+ // alignment informations.
+ template <typename T, typename AddrT> static constexpr Aligned isAligned() {
+ static_assert(IsAddressType<AddrT>::Value);
+ return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES
+ : Aligned::NO;
+ }
+
+ // Loads a value of the current `type` from `src`.
+ // This function is responsible for extracting Temporality and Alignment from
+ // the Address type.
+ template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) {
+ static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
+ constexpr auto AS = isAligned<type, SrcAddrT>();
+ constexpr auto TS = SrcAddrT::TEMPORALITY;
+ return Backend::template load<type, TS, AS>(as<const type>(src));
+ }
+
+ // Stores a value of the current `type` to `dst`.
+ // This function is responsible for extracting Temporality and Alignment from
+ // the Address type.
+ template <typename DstAddrT>
+ static inline void nativeStore(type value, DstAddrT dst) {
+ static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
+ constexpr auto AS = isAligned<type, DstAddrT>();
+ constexpr auto TS = DstAddrT::TEMPORALITY;
+ return Backend::template store<type, TS, AS>(as<type>(dst), value);
+ }
+
+ // A well aligned POD structure to store Size bytes.
+ // This is used to implement the move operations.
+ struct Value {
+ alignas(alignof(type)) ubyte payload[Size];
+ };
+
+public:
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void copy(DstAddrT dst, SrcAddrT src) {
+ static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
+ static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
+ if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE &&
+ DstAddrT::TEMPORALITY == Temporality::TEMPORAL &&
+ SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) {
+ // delegate optimized copy to compiler.
+ __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size);
+ return;
+ }
+ nativeStore(nativeLoad(src), dst);
+ if constexpr (NEXT_SIZE > 0)
+ NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
+ }
+
+ template <typename DstAddrT, typename SrcAddrT>
+ static inline void move(DstAddrT dst, SrcAddrT src) {
+ const auto payload = nativeLoad(src);
+ if constexpr (NEXT_SIZE > 0)
+ NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
+ nativeStore(payload, dst);
+ }
+
+ template <typename DstAddrT>
+ static inline void set(DstAddrT dst, ubyte value) {
+ if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE &&
+ DstAddrT::TEMPORALITY == Temporality::TEMPORAL) {
+ // delegate optimized set to compiler.
+ __builtin_memset_inline(dst.ptr(), value, Size);
+ return;
+ }
+ nativeStore(Backend::template splat<type>(value), dst);
+ if constexpr (NEXT_SIZE > 0)
+ NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value);
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
+ const uint64_t current =
+ Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2));
+ if constexpr (NEXT_SIZE > 0) {
+ // In the case where we cannot handle Size with single operation (e.g.
+ // Size == 3) we can either return early if current is non zero or
+ // aggregate all the operations through the bitwise or operator.
+ // We chose the later to reduce branching.
+ return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1),
+ offsetAddr<TYPE_SIZE>(src2)));
+ } else {
+ return current;
+ }
+ }
+
+ template <typename SrcAddrT1, typename SrcAddrT2>
+ static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
+ const auto a = nativeLoad(src1);
+ const auto b = nativeLoad(src2);
+ // If we cannot handle Size as a single operation we have two choices:
+ // - Either use Backend's threeWayCmp directly and return it is non
+ // zero.
+ //
+ // if (int32_t res = Backend::template threeWayCmp<type>(a, b))
+ // return res;
+ //
+ // - Or use Backend's notEquals first and use threeWayCmp only if
+ //
diff erent, the assumption here is that notEquals is faster than
+ // threeWayCmp and that we can save cycles when the Size needs to be
+ // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1)
+ //
+ // if (Backend::template notEquals<type>(a, b))
+ // return Backend::template threeWayCmp<type>(a, b);
+ //
+ // We chose the former to reduce code bloat and branching.
+ if (int32_t res = Backend::template threeWayCmp<type>(a, b))
+ return res;
+ if constexpr (NEXT_SIZE > 0)
+ return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1),
+ offsetAddr<TYPE_SIZE>(src2));
+ return 0;
+ }
+
+ template <typename SrcAddrT> static Value load(SrcAddrT src) {
+ Value output;
+ copy(DstAddr<alignof(type)>(output.payload), src);
+ return output;
+ }
+
+ template <typename DstAddrT> static void store(DstAddrT dst, Value value) {
+ copy(dst, SrcAddr<alignof(type)>(value.payload));
+ }
+};
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt
index d5d32107636d5..83926e48aff55 100644
--- a/libc/test/src/string/memory_utils/CMakeLists.txt
+++ b/libc/test/src/string/memory_utils/CMakeLists.txt
@@ -4,6 +4,7 @@ add_libc_unittest(
libc_string_unittests
SRCS
address_test.cpp
+ backend_test.cpp
elements_test.cpp
memory_access_test.cpp
utils_test.cpp
diff --git a/libc/test/src/string/memory_utils/backend_test.cpp b/libc/test/src/string/memory_utils/backend_test.cpp
new file mode 100644
index 0000000000000..27418b7c9933e
--- /dev/null
+++ b/libc/test/src/string/memory_utils/backend_test.cpp
@@ -0,0 +1,197 @@
+//===-- Unittests for backends --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/Array.h"
+#include "src/__support/CPP/ArrayRef.h"
+#include "src/__support/CPP/Bit.h"
+#include "src/__support/architectures.h"
+#include "src/string/memory_utils/backends.h"
+#include "utils/UnitTest/Test.h"
+#include <string.h>
+
+namespace __llvm_libc {
+
+template <size_t Size> using Buffer = cpp::Array<char, Size>;
+
+static char GetRandomChar() {
+ // Implementation of C++ minstd_rand seeded with 123456789.
+ // https://en.cppreference.com/w/cpp/numeric/random
+ // "Minimum standard", recommended by Park, Miller, and Stockmeyer in 1993
+ static constexpr const uint64_t a = 48271;
+ static constexpr const uint64_t c = 0;
+ static constexpr const uint64_t m = 2147483647;
+ static uint64_t seed = 123456789;
+ seed = (a * seed + c) % m;
+ return seed;
+}
+
+static void Randomize(cpp::MutableArrayRef<char> buffer) {
+ for (auto ¤t : buffer)
+ current = GetRandomChar();
+}
+
+template <size_t Size> static Buffer<Size> GetRandomBuffer() {
+ Buffer<Size> buffer;
+ Randomize(buffer);
+ return buffer;
+}
+
+template <typename Backend, size_t Size> struct Conf {
+ static_assert(Backend::IS_BACKEND_TYPE);
+ using BufferT = Buffer<Size>;
+ using T = typename Backend::template getNextType<Size>;
+ static_assert(sizeof(T) == Size);
+ static constexpr size_t SIZE = Size;
+
+ static BufferT splat(ubyte value) {
+ return bit_cast<BufferT>(Backend::template splat<T>(value));
+ }
+
+ static uint64_t notEquals(const BufferT &v1, const BufferT &v2) {
+ return Backend::template notEquals<T>(bit_cast<T>(v1), bit_cast<T>(v2));
+ }
+
+ static int32_t threeWayCmp(const BufferT &v1, const BufferT &v2) {
+ return Backend::template threeWayCmp<T>(bit_cast<T>(v1), bit_cast<T>(v2));
+ }
+};
+
+using FunctionTypes = testing::TypeList< //
+#if defined(LLVM_LIBC_ARCH_X86) //
+ Conf<X86Backend, 1>, //
+ Conf<X86Backend, 2>, //
+ Conf<X86Backend, 4>, //
+ Conf<X86Backend, 8>, //
+#if HAS_M128
+ Conf<X86Backend, 16>, //
+#endif
+#if HAS_M256
+ Conf<X86Backend, 32>, //
+#endif
+#if HAS_M512
+ Conf<X86Backend, 64>, //
+#endif
+#endif // defined(LLVM_LIBC_ARCH_X86)
+ Conf<Scalar64BitBackend, 1>, //
+ Conf<Scalar64BitBackend, 2>, //
+ Conf<Scalar64BitBackend, 4>, //
+ Conf<Scalar64BitBackend, 8> //
+ >;
+
+TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) {
+ for (auto value : cpp::Array<uint8_t, 3>{0u, 1u, 255u}) {
+ alignas(64) const auto stored = ParamType::splat(bit_cast<ubyte>(value));
+ for (size_t i = 0; i < ParamType::SIZE; ++i)
+ EXPECT_EQ(bit_cast<uint8_t>(stored[i]), value);
+ }
+}
+
+TYPED_TEST(LlvmLibcMemoryBackend, notEquals, FunctionTypes) {
+ alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
+ EXPECT_EQ(ParamType::notEquals(a, a), 0UL);
+ for (size_t i = 0; i < a.size(); ++i) {
+ alignas(64) auto b = a;
+ ++b[i];
+ EXPECT_NE(ParamType::notEquals(a, b), 0UL);
+ EXPECT_NE(ParamType::notEquals(b, a), 0UL);
+ }
+}
+
+TYPED_TEST(LlvmLibcMemoryBackend, threeWayCmp, FunctionTypes) {
+ alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
+ EXPECT_EQ(ParamType::threeWayCmp(a, a), 0);
+ for (size_t i = 0; i < a.size(); ++i) {
+ alignas(64) auto b = a;
+ ++b[i];
+ const auto cmp = memcmp(&a, &b, sizeof(a));
+ ASSERT_NE(cmp, 0);
+ if (cmp > 0) {
+ EXPECT_GT(ParamType::threeWayCmp(a, b), 0);
+ EXPECT_LT(ParamType::threeWayCmp(b, a), 0);
+ } else {
+ EXPECT_LT(ParamType::threeWayCmp(a, b), 0);
+ EXPECT_GT(ParamType::threeWayCmp(b, a), 0);
+ }
+ }
+}
+
+template <typename Backend, size_t Size, Temporality TS, Aligned AS>
+struct LoadStoreConf {
+ static_assert(Backend::IS_BACKEND_TYPE);
+ using BufferT = Buffer<Size>;
+ using T = typename Backend::template getNextType<Size>;
+ static_assert(sizeof(T) == Size);
+ static constexpr size_t SIZE = Size;
+
+ static BufferT load(const BufferT &ref) {
+ const auto *ptr = bit_cast<const T *>(ref.data());
+ const T value = Backend::template load<T, TS, AS>(ptr);
+ return bit_cast<BufferT>(value);
+ }
+
+ static void store(BufferT &ref, const BufferT value) {
+ auto *ptr = bit_cast<T *>(ref.data());
+ Backend::template store<T, TS, AS>(ptr, bit_cast<T>(value));
+ }
+};
+
+using LoadStoreTypes = testing::TypeList< //
+#if defined(LLVM_LIBC_ARCH_X86) //
+ LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::YES>, //
+ LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::YES>, //
+ LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::YES>, //
+ LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::YES>, //
+#if HAS_M128
+ LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::YES>, //
+ LoadStoreConf<X86Backend, 16, Temporality::NON_TEMPORAL, Aligned::YES>, //
+#endif
+#if HAS_M256
+ LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::YES>, //
+ LoadStoreConf<X86Backend, 32, Temporality::NON_TEMPORAL, Aligned::YES>, //
+#endif
+#if HAS_M512
+ LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::YES>, //
+ LoadStoreConf<X86Backend, 64, Temporality::NON_TEMPORAL, Aligned::YES>, //
+#endif
+#endif // defined(LLVM_LIBC_ARCH_X86)
+ LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL,
+ Aligned::YES>, //
+ LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL,
+ Aligned::YES>, //
+ LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL,
+ Aligned::YES>, //
+ LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::NO>, //
+ LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::YES> //
+ >;
+
+TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) {
+ alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
+ const auto loaded = ParamType::load(expected);
+ for (size_t i = 0; i < ParamType::SIZE; ++i)
+ EXPECT_EQ(loaded[i], expected[i]);
+}
+
+TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) {
+ alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
+ alignas(64) typename ParamType::BufferT stored;
+ ParamType::store(stored, expected);
+ for (size_t i = 0; i < ParamType::SIZE; ++i)
+ EXPECT_EQ(stored[i], expected[i]);
+}
+
+} // namespace __llvm_libc
More information about the libc-commits
mailing list