[libc-commits] [libc] 0b87d27 - [libc] Implement vector 'split' and 'concat' routines (#157537)

Wed Sep 10 15:51:52 PDT 2025

Author: Joseph Huber
Date: 2025-09-10T17:51:48-05:00
New Revision: 0b87d27097dc71916dd041e5be87fefd26006867

URL: https://github.com/llvm/llvm-project/commit/0b87d27097dc71916dd041e5be87fefd26006867
DIFF: https://github.com/llvm/llvm-project/commit/0b87d27097dc71916dd041e5be87fefd26006867.diff

LOG: [libc] Implement vector 'split' and 'concat' routines (#157537)

Summary:
This provides some helpers for the split and concatenation routines for
changing the size of an existing vector. This includes a simple tuple
type to do the splitting. The tuple doesn't support structured bindings
yet.

The concat function is more limited than what would be ideal, but the
shufflevector builtin requires things of equivalent sizes and I
didn't think it was worth wrangling with that just yet.

Added: 
    

Modified: 
    libc/src/__support/CPP/CMakeLists.txt
    libc/src/__support/CPP/simd.h
    libc/test/src/__support/CPP/simd_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt
index d9b86b4fd2973..a9cb67df0b427 100644

--- a/libc/src/__support/CPP/CMakeLists.txt
+++ b/libc/src/__support/CPP/CMakeLists.txt
@@ -224,4 +224,7 @@ add_header_library(
   simd
   HDRS
     simd.h
+  DEPENDS
+    .utility
+    .tuple
 )

diff  --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index 54fe70a6e9830..3c7e65acc3c0a 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -16,7 +16,9 @@
 #include "hdr/stdint_proxy.h"
 #include "src/__support/CPP/algorithm.h"
 #include "src/__support/CPP/limits.h"
+#include "src/__support/CPP/tuple.h"
 #include "src/__support/CPP/type_traits.h"
+#include "src/__support/CPP/utility/integer_sequence.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/config.h"
 
@@ -32,9 +34,6 @@ namespace cpp {
 
 namespace internal {
 
-template <typename T>
-using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT);
-
 #if defined(LIBC_TARGET_CPU_HAS_AVX512F)
 template <typename T>
 LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T);
@@ -48,9 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T);
 template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1;
 #endif
 
-template <typename T> LIBC_INLINE constexpr T poison() {
-  return __builtin_nondeterministic_value(T());
-}
 } // namespace internal
 
 // Type aliases.
@@ -61,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]];
 template <typename T>
 using simd_mask = simd<bool, internal::native_vector_size<T>>;
 
+namespace internal {
+
+template <typename T>
+using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT);
+
+template <typename T> LIBC_INLINE constexpr T poison() {
+  return __builtin_nondeterministic_value(T());
+}
+
+template <typename T, size_t N, size_t OriginalSize, size_t... Indices>
+LIBC_INLINE constexpr static cpp::simd<T, sizeof...(Indices)>
+extend(cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
+  return __builtin_shufflevector(
+      x, x, (Indices < OriginalSize ? static_cast<int>(Indices) : -1)...);
+}
+
+template <typename T, size_t N, size_t TargetSize, size_t OriginalSize>
+LIBC_INLINE constexpr static auto extend(cpp::simd<T, N> x) {
+  // Recursively resize an input vector to the target size, increasing its size
+  // by at most double the input size each step due to shufflevector limitation.
+  if constexpr (N == TargetSize)
+    return x;
+  else if constexpr (TargetSize <= 2 * N)
+    return extend<T, N, TargetSize>(x, cpp::make_index_sequence<TargetSize>{});
+  else
+    return extend<T, 2 * N, TargetSize, OriginalSize>(
+        extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
+}
+
+template <typename T, size_t N, size_t M, size_t... Indices>
+LIBC_INLINE constexpr static cpp::simd<T, N + M>
+concat(cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<Indices...>) {
+  constexpr size_t Size = cpp::max(N, M);
+  auto remap = [](size_t idx) -> int {
+    if (idx < N)
+      return static_cast<int>(idx);
+    if (idx < N + M)
+      return static_cast<int>((idx - N) + Size);
+    return -1;
+  };
+
+  // Extend the input vectors until they are the same size, then use the indices
+  // to shuffle in only the indices that correspond to the original values.
+  auto x_ext = extend<T, N, Size, N>(x);
+  auto y_ext = extend<T, M, Size, M>(y);
+  return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...);
+}
+
+template <typename T, size_t N, size_t Count, size_t Offset, size_t... Indices>
+LIBC_INLINE constexpr static cpp::simd<T, Count>
+slice(cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
+  return __builtin_shufflevector(x, x, (Offset + Indices)...);
+}
+
+template <typename T, size_t N, size_t Offset, size_t Head, size_t... Tail>
+LIBC_INLINE constexpr static auto split(cpp::simd<T, N> x) {
+  // Recursively splits the input vector by walking the variadic template list,
+  // increasing our current head each call.
+  auto result = cpp::make_tuple(
+      slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{}));
+  if constexpr (sizeof...(Tail) > 0)
+    return cpp::tuple_cat(result, split<T, N, Offset + Head, Tail...>(x));
+  else
+    return result;
+}
+
+} // namespace internal
+
 // Type trait helpers.
 template <typename T>
 struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> {
@@ -273,6 +337,25 @@ LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x,
   return m ? x : y;
 }
 
+// Shuffling helpers.
+template <typename T, size_t N, size_t M>
+LIBC_INLINE constexpr static auto concat(cpp::simd<T, N> x, cpp::simd<T, M> y) {
+  return internal::concat(x, y, make_index_sequence<N + M>{});
+}
+template <typename T, size_t N, size_t M, typename... Rest>
+LIBC_INLINE constexpr static auto concat(cpp::simd<T, N> x, cpp::simd<T, M> y,
+                                         Rest... rest) {
+  auto xy = concat(x, y);
+  if constexpr (sizeof...(Rest))
+    return concat(xy, rest...);
+  else
+    return xy;
+}
+template <size_t... Sizes, typename T, size_t N> auto split(cpp::simd<T, N> x) {
+  static_assert((... + Sizes) == N, "split sizes must sum to vector size");
+  return internal::split<T, N, 0, Sizes...>(x);
+}
+
 // TODO: where expressions, scalar overloads, ABI types.
 
 } // namespace cpp

diff  --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp
index 600bf65057b21..b4f5685e3b1d1 100644
--- a/libc/test/src/__support/CPP/simd_test.cpp
+++ b/libc/test/src/__support/CPP/simd_test.cpp
@@ -68,3 +68,19 @@ TEST(LlvmLibcSIMDTest, MaskOperations) {
   EXPECT_EQ(cpp::find_first_set(mask), 0);
   EXPECT_EQ(cpp::find_last_set(mask), 2);
 }
+
+TEST(LlvmLibcSIMDTest, SplitConcat) {
+  cpp::simd<char, 8> v{1, 1, 2, 2, 3, 3, 4, 4};
+  auto [v1, v2, v3, v4] = cpp::split<2, 2, 2, 2>(v);
+  EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v1 == 1)));
+  EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v2 == 2)));
+  EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v3 == 3)));
+  EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v4 == 4)));
+
+  cpp::simd<char, 8> m = cpp::concat(v1, v2, v3, v4);
+  EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(m == v)));
+
+  cpp::simd<char, 1> c(~0);
+  cpp::simd<char, 8> n = cpp::concat(c, c, c, c, c, c, c, c);
+  EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(n == ~0)));
+}