[libc-commits] [libc] [libc] Update the memory helper functions for simd types (PR #160174)

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Fri Sep 26 05:32:38 PDT 2025


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/160174

>From 442156ba7ad8d59cacbda90f05762b971df0f00a Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 22 Sep 2025 13:46:49 -0500
Subject: [PATCH] [libc] Update the memory helper functions for simd types

Summary:
This unifies the interface to just be a bunch of `load` and `store`
functions that optionally accept a mask / indices for gathers and
scatters with masks.

I had to rename this from `load` and `store` because it conflicts with
the other version in `op_generic`. I might just work around that with a
trait instead.

inline
---
 libc/src/__support/CPP/simd.h                 | 70 ++++++++++++++-----
 .../memory_utils/generic/inline_strlen.h      |  5 +-
 libc/test/src/__support/CPP/simd_test.cpp     | 62 ++++++++++++++++
 3 files changed, 119 insertions(+), 18 deletions(-)

diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index d2a5b17fa4b9f..422d2f4c8433d 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -287,34 +287,72 @@ LIBC_INLINE constexpr static T hmax(simd<T, N> v) {
 }
 
 // Accessor helpers.
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T load_unaligned(const void *ptr) {
+template <typename T>
+LIBC_INLINE T constexpr static load(const void *ptr, bool aligned = false) {
+  if (aligned)
+    ptr = __builtin_assume_aligned(ptr, alignof(T));
   T tmp;
-  __builtin_memcpy(&tmp, ptr, sizeof(T));
+  __builtin_memcpy_inline(
+      &tmp, reinterpret_cast<const simd_element_type_t<T> *>(ptr), sizeof(T));
   return tmp;
 }
 template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T load_aligned(const void *ptr) {
-  return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T)));
+LIBC_INLINE constexpr static void store(T v, void *ptr, bool aligned = false) {
+  if (aligned)
+    ptr = __builtin_assume_aligned(ptr, alignof(T));
+  __builtin_memcpy_inline(ptr, &v, sizeof(T));
 }
 template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T store_unaligned(T v, void *ptr) {
-  __builtin_memcpy(ptr, &v, sizeof(T));
+LIBC_INLINE constexpr static T
+load_masked(simd<bool, simd_size_v<T>> mask, const void *ptr,
+            T passthru = internal::poison<T>(), bool aligned = false) {
+  if (aligned)
+    ptr = __builtin_assume_aligned(ptr, alignof(T));
+  return __builtin_masked_load(
+      mask, reinterpret_cast<const simd_element_type_t<T> *>(ptr), passthru);
 }
 template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T store_aligned(T v, void *ptr) {
-  store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T)));
+LIBC_INLINE constexpr static void store_masked(simd<bool, simd_size_v<T>> mask,
+                                               T v, void *ptr,
+                                               bool aligned = false) {
+  if (aligned)
+    ptr = __builtin_assume_aligned(ptr, alignof(T));
+  __builtin_masked_store(mask, v,
+                         reinterpret_cast<simd_element_type_t<T> *>(ptr));
+}
+template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static T gather(simd<bool, simd_size_v<T>> mask, Idx idx,
+                                      const void *base, bool aligned = false) {
+  if (aligned)
+    base = __builtin_assume_aligned(base, alignof(T));
+  return __builtin_masked_gather(
+      mask, idx, reinterpret_cast<const simd_element_type_t<T> *>(base));
+}
+template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static void scatter(simd<bool, simd_size_v<T>> mask,
+                                          Idx idx, T v, void *base,
+                                          bool aligned = false) {
+  if (aligned)
+    base = __builtin_assume_aligned(base, alignof(T));
+  __builtin_masked_scatter(mask, idx, v,
+                           reinterpret_cast<simd_element_type_t<T> *>(base));
 }
 template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T
-masked_load(simd<bool, simd_size_v<T>> m, void *ptr,
-            T passthru = internal::poison<simd_element_type<T>>()) {
-  return __builtin_masked_load(m, ptr, passthru);
+LIBC_INLINE constexpr static T
+expand(simd<bool, simd_size_v<T>> mask, const void *ptr,
+       T passthru = internal::poison<T>(), bool aligned = false) {
+  if (aligned)
+    ptr = __builtin_assume_aligned(ptr, alignof(T));
+  return __builtin_masked_expand_load(
+      mask, reinterpret_cast<const simd_element_type_t<T> *>(ptr), passthru);
 }
 template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T masked_store(simd<bool, simd_size_v<T>> m, T v, void *ptr) {
-  __builtin_masked_store(
-      m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
+LIBC_INLINE constexpr static void compress(simd<bool, simd_size_v<T>> mask, T v,
+                                           void *ptr, bool aligned = false) {
+  if (aligned)
+    ptr = __builtin_assume_aligned(ptr, alignof(T));
+  __builtin_masked_compress_store(
+      mask, v, reinterpret_cast<simd_element_type_t<T> *>(ptr));
 }
 
 // Construction helpers.
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 5e553e301d4da..d7435afb03719 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -32,14 +32,15 @@ string_length(const char *src) {
   const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
       __builtin_align_down(src, alignment));
 
-  cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
+  cpp::simd<char> chars = cpp::load<cpp::simd<char>>(aligned, /*aligned=*/true);
   cpp::simd_mask<char> mask = chars == null_byte;
   size_t offset = src - reinterpret_cast<const char *>(aligned);
   if (cpp::any_of(shift_mask(mask, offset)))
     return cpp::find_first_set(shift_mask(mask, offset));
 
   for (;;) {
-    cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
+    cpp::simd<char> chars = cpp::load<cpp::simd<char>>(++aligned,
+                                                       /*aligned=*/true);
     cpp::simd_mask<char> mask = chars == null_byte;
     if (cpp::any_of(mask))
       return (reinterpret_cast<const char *>(aligned) - src) +
diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp
index c8f34df8ab028..8bead8461d649 100644
--- a/libc/test/src/__support/CPP/simd_test.cpp
+++ b/libc/test/src/__support/CPP/simd_test.cpp
@@ -86,3 +86,65 @@ TEST(LlvmLibcSIMDTest, SplitConcat) {
   cpp::simd<char, 8> n = cpp::concat(c, c, c, c, c, c, c, c);
   EXPECT_TRUE(cpp::all_of(n == ~0));
 }
+
+TEST(LlvmLibcSIMDTest, LoadStore) {
+  constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+  alignas(alignof(cpp::simd<int>)) int buf[SIZE];
+
+  cpp::simd<int> v1 = cpp::splat(1);
+  cpp::store(v1, buf);
+  cpp::simd<int> v2 = cpp::load<cpp::simd<int>>(buf);
+
+  EXPECT_TRUE(cpp::all_of(v1 == 1));
+  EXPECT_TRUE(cpp::all_of(v2 == 1));
+
+  cpp::simd<int> v3 = cpp::splat(2);
+  cpp::store(v3, buf, /*aligned=*/true);
+  cpp::simd<int> v4 = cpp::load<cpp::simd<int>>(buf, /*aligned=*/true);
+
+  EXPECT_TRUE(cpp::all_of(v3 == 2));
+  EXPECT_TRUE(cpp::all_of(v4 == 2));
+}
+
+TEST(LlvmLibcSIMDTest, MaskedLoadStore) {
+  constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+  alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
+
+  cpp::simd<int> mask = cpp::iota(0) % 2 == 0;
+  cpp::simd<int> v1 = cpp::splat(1);
+
+  cpp::store_masked<cpp::simd<int>>(mask, v1, buf);
+  cpp::simd<int> v2 = cpp::load_masked<cpp::simd<int>>(mask, buf);
+
+  EXPECT_TRUE(cpp::all_of((v2 == 1) == mask));
+}
+
+TEST(LlvmLibcSIMDTest, GatherScatter) {
+  constexpr int SIZE = cpp::simd_size_v<cpp::simd<int>>;
+  alignas(alignof(cpp::simd<int>)) int buf[SIZE];
+
+  cpp::simd<int> mask = cpp::iota(1);
+  cpp::simd<int> idx = cpp::iota(0);
+  cpp::simd<int> v1 = cpp::splat(1);
+
+  cpp::scatter<cpp::simd<int>>(mask, idx, v1, buf);
+  cpp::simd<int> v2 = cpp::gather<cpp::simd<int>>(mask, idx, buf);
+
+  EXPECT_TRUE(cpp::all_of(v1 == 1));
+  EXPECT_TRUE(cpp::all_of(v2 == 1));
+}
+
+TEST(LlvmLibcSIMDTest, MaskedCompressExpand) {
+  constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+  alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
+
+  cpp::simd<int> mask_expand = cpp::iota(0) % 2 == 0;
+  cpp::simd<int> mask_compress = 1;
+
+  cpp::simd<int> v1 = cpp::iota(0);
+
+  cpp::compress<cpp::simd<int>>(mask_compress, v1, buf);
+  cpp::simd<int> v2 = cpp::expand<cpp::simd<int>>(mask_expand, buf);
+
+  EXPECT_TRUE(cpp::all_of(!mask_expand || v2 <= SIZE / 2));
+}



More information about the libc-commits mailing list