[libc-commits] [libc] [libc] Update the memory helper functions for simd types (PR #160174)
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Fri Sep 26 05:32:38 PDT 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/160174
>From 442156ba7ad8d59cacbda90f05762b971df0f00a Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 22 Sep 2025 13:46:49 -0500
Subject: [PATCH] [libc] Update the memory helper functions for simd types
Summary:
This unifies the interface to just be a bunch of `load` and `store`
functions that optionally accept a mask / indices for gathers and
scatters with masks.
I had to rename this from `load` and `store` because it conflicts with
the other version in `op_generic`. I might just work around that with a
trait instead.
inline
---
libc/src/__support/CPP/simd.h | 70 ++++++++++++++-----
.../memory_utils/generic/inline_strlen.h | 5 +-
libc/test/src/__support/CPP/simd_test.cpp | 62 ++++++++++++++++
3 files changed, 119 insertions(+), 18 deletions(-)
diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index d2a5b17fa4b9f..422d2f4c8433d 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -287,34 +287,72 @@ LIBC_INLINE constexpr static T hmax(simd<T, N> v) {
}
// Accessor helpers.
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T load_unaligned(const void *ptr) {
+template <typename T>
+LIBC_INLINE T constexpr static load(const void *ptr, bool aligned = false) {
+ if (aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
T tmp;
- __builtin_memcpy(&tmp, ptr, sizeof(T));
+ __builtin_memcpy_inline(
+ &tmp, reinterpret_cast<const simd_element_type_t<T> *>(ptr), sizeof(T));
return tmp;
}
template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T load_aligned(const void *ptr) {
- return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T)));
+LIBC_INLINE constexpr static void store(T v, void *ptr, bool aligned = false) {
+ if (aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ __builtin_memcpy_inline(ptr, &v, sizeof(T));
}
template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T store_unaligned(T v, void *ptr) {
- __builtin_memcpy(ptr, &v, sizeof(T));
+LIBC_INLINE constexpr static T
+load_masked(simd<bool, simd_size_v<T>> mask, const void *ptr,
+ T passthru = internal::poison<T>(), bool aligned = false) {
+ if (aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ return __builtin_masked_load(
+ mask, reinterpret_cast<const simd_element_type_t<T> *>(ptr), passthru);
}
template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T store_aligned(T v, void *ptr) {
- store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T)));
+LIBC_INLINE constexpr static void store_masked(simd<bool, simd_size_v<T>> mask,
+ T v, void *ptr,
+ bool aligned = false) {
+ if (aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ __builtin_masked_store(mask, v,
+ reinterpret_cast<simd_element_type_t<T> *>(ptr));
+}
+template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static T gather(simd<bool, simd_size_v<T>> mask, Idx idx,
+ const void *base, bool aligned = false) {
+ if (aligned)
+ base = __builtin_assume_aligned(base, alignof(T));
+ return __builtin_masked_gather(
+ mask, idx, reinterpret_cast<const simd_element_type_t<T> *>(base));
+}
+template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static void scatter(simd<bool, simd_size_v<T>> mask,
+ Idx idx, T v, void *base,
+ bool aligned = false) {
+ if (aligned)
+ base = __builtin_assume_aligned(base, alignof(T));
+ __builtin_masked_scatter(mask, idx, v,
+ reinterpret_cast<simd_element_type_t<T> *>(base));
}
template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T
-masked_load(simd<bool, simd_size_v<T>> m, void *ptr,
- T passthru = internal::poison<simd_element_type<T>>()) {
- return __builtin_masked_load(m, ptr, passthru);
+LIBC_INLINE constexpr static T
+expand(simd<bool, simd_size_v<T>> mask, const void *ptr,
+ T passthru = internal::poison<T>(), bool aligned = false) {
+ if (aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ return __builtin_masked_expand_load(
+ mask, reinterpret_cast<const simd_element_type_t<T> *>(ptr), passthru);
}
template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T masked_store(simd<bool, simd_size_v<T>> m, T v, void *ptr) {
- __builtin_masked_store(
- m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
+LIBC_INLINE constexpr static void compress(simd<bool, simd_size_v<T>> mask, T v,
+ void *ptr, bool aligned = false) {
+ if (aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ __builtin_masked_compress_store(
+ mask, v, reinterpret_cast<simd_element_type_t<T> *>(ptr));
}
// Construction helpers.
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 5e553e301d4da..d7435afb03719 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -32,14 +32,15 @@ string_length(const char *src) {
const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
__builtin_align_down(src, alignment));
- cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
+ cpp::simd<char> chars = cpp::load<cpp::simd<char>>(aligned, /*aligned=*/true);
cpp::simd_mask<char> mask = chars == null_byte;
size_t offset = src - reinterpret_cast<const char *>(aligned);
if (cpp::any_of(shift_mask(mask, offset)))
return cpp::find_first_set(shift_mask(mask, offset));
for (;;) {
- cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
+ cpp::simd<char> chars = cpp::load<cpp::simd<char>>(++aligned,
+ /*aligned=*/true);
cpp::simd_mask<char> mask = chars == null_byte;
if (cpp::any_of(mask))
return (reinterpret_cast<const char *>(aligned) - src) +
diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp
index c8f34df8ab028..8bead8461d649 100644
--- a/libc/test/src/__support/CPP/simd_test.cpp
+++ b/libc/test/src/__support/CPP/simd_test.cpp
@@ -86,3 +86,65 @@ TEST(LlvmLibcSIMDTest, SplitConcat) {
cpp::simd<char, 8> n = cpp::concat(c, c, c, c, c, c, c, c);
EXPECT_TRUE(cpp::all_of(n == ~0));
}
+
+TEST(LlvmLibcSIMDTest, LoadStore) {
+ constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE];
+
+ cpp::simd<int> v1 = cpp::splat(1);
+ cpp::store(v1, buf);
+ cpp::simd<int> v2 = cpp::load<cpp::simd<int>>(buf);
+
+ EXPECT_TRUE(cpp::all_of(v1 == 1));
+ EXPECT_TRUE(cpp::all_of(v2 == 1));
+
+ cpp::simd<int> v3 = cpp::splat(2);
+ cpp::store(v3, buf, /*aligned=*/true);
+ cpp::simd<int> v4 = cpp::load<cpp::simd<int>>(buf, /*aligned=*/true);
+
+ EXPECT_TRUE(cpp::all_of(v3 == 2));
+ EXPECT_TRUE(cpp::all_of(v4 == 2));
+}
+
+TEST(LlvmLibcSIMDTest, MaskedLoadStore) {
+ constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
+
+ cpp::simd<int> mask = cpp::iota(0) % 2 == 0;
+ cpp::simd<int> v1 = cpp::splat(1);
+
+ cpp::store_masked<cpp::simd<int>>(mask, v1, buf);
+ cpp::simd<int> v2 = cpp::load_masked<cpp::simd<int>>(mask, buf);
+
+ EXPECT_TRUE(cpp::all_of((v2 == 1) == mask));
+}
+
+TEST(LlvmLibcSIMDTest, GatherScatter) {
+ constexpr int SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE];
+
+ cpp::simd<int> mask = cpp::iota(1);
+ cpp::simd<int> idx = cpp::iota(0);
+ cpp::simd<int> v1 = cpp::splat(1);
+
+ cpp::scatter<cpp::simd<int>>(mask, idx, v1, buf);
+ cpp::simd<int> v2 = cpp::gather<cpp::simd<int>>(mask, idx, buf);
+
+ EXPECT_TRUE(cpp::all_of(v1 == 1));
+ EXPECT_TRUE(cpp::all_of(v2 == 1));
+}
+
+TEST(LlvmLibcSIMDTest, MaskedCompressExpand) {
+ constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
+
+ cpp::simd<int> mask_expand = cpp::iota(0) % 2 == 0;
+ cpp::simd<int> mask_compress = 1;
+
+ cpp::simd<int> v1 = cpp::iota(0);
+
+ cpp::compress<cpp::simd<int>>(mask_compress, v1, buf);
+ cpp::simd<int> v2 = cpp::expand<cpp::simd<int>>(mask_expand, buf);
+
+ EXPECT_TRUE(cpp::all_of(!mask_expand || v2 <= SIZE / 2));
+}
More information about the libc-commits
mailing list