[libc-commits] [libc] [libc][mathvec] Add loop over scalar for unary FP32 (PR #199273)

Thu Jun 11 05:06:59 PDT 2026

================
@@ -0,0 +1,34 @@
+//===-- Implementation header for SIMD acosf ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATHVEC_ACOSF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATHVEC_ACOSF_H
+
+#include "src/__support/CPP/simd.h"
+#define LIBC_MATH (LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
+#include "src/__support/math/acosf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace mathvec {
+
+template <size_t N>
+LIBC_INLINE cpp::simd<float, N> acosf(cpp::simd<float, N> x) {
+  cpp::simd<float, N> result;
----------------
kaladron wrote:

@lntue What do you think of this.  I don't love the pointer arithmetic in each function.  I'd like us to fix up cpp::simd to give us the same functionality as C++26.  Something like this:

```c++
#include "src/__support/CPP/type_traits.h"
#include "src/__support/CPP/utility/integer_sequence.h"
#include "src/__support/macros/attributes.h"

namespace LIBC_NAMESPACE_DECL {
namespace cpp {

template <typename T, size_t N>
class simd {
public:
  using storage_type = T [[clang::ext_vector_type(N)]];
  using value_type = T;
  
  LIBC_INLINE constexpr static size_t size() { return N; }

private:
  storage_type value;

public:
  // 1. Default constructor
  LIBC_INLINE constexpr simd() = default;

  // 2. Implicit conversion from primitive vector type (for compatibility)
  LIBC_INLINE constexpr simd(storage_type v) : value(v) {}

  // 3. Broadcast constructor
  LIBC_INLINE constexpr explicit simd(T v) {
    value = storage_type(v); // Clang splat
  }

  // 4. C++26 Generator Constructor
  template <typename G>
  LIBC_INLINE constexpr explicit simd(G gen) {
    init(gen, cpp::make_index_sequence<N>{});
  }

  // Element access
  LIBC_INLINE constexpr T operator[](size_t i) const { return value[i]; }
  LIBC_INLINE constexpr T& operator[](size_t i) { return value[i]; }

  // Implicit conversion back to primitive vector type
  // This allows cpp::simd to be passed directly to Clang builtins (e.g. __builtin_elementwise_sin)
  LIBC_INLINE constexpr operator storage_type() const { return value; }
  LIBC_INLINE constexpr operator storage_type&() { return value; }

private:
  // Helper to expand the generator at compile time
  template <typename G, size_t... Indices>
  LIBC_INLINE constexpr void init(G gen, cpp::index_sequence<Indices...>) {
    // We construct the vector element-by-element.
    // Clang's ext_vector_type allows initialization from a list of elements.
    value = storage_type{gen(cpp::integral_constant<size_t, Indices>{})...};
  }
};

// Ensure the wrapper does not add size overhead
static_assert(sizeof(simd<float, 4>) == sizeof(float [[clang::ext_vector_type(4)]]));
static_assert(cpp::is_trivially_copyable_v<simd<float, 4>>);

} // namespace cpp
} // namespace LIBC_NAMESPACE_DECL
```

That should let the functions look like:

```c++
template <size_t N>
LIBC_INLINE cpp::simd<float, N> acosf(cpp::simd<float, N> x) {
  return cpp::simd<float, N>([&](<auto i>) { 
    return math::acosf(x[i]); 
  });
}
```

With no overhead in the final binary.   It's a deeper change to this (and we should provide the std::simd outside of this PR first)

With another wrapper like so:

```c++
namespace LIBC_NAMESPACE_DECL {
namespace mathvec {

// Generic Unary Vector Wrapper
template <auto ScalarFunc, typename T, size_t N>
LIBC_INLINE constexpr cpp::simd<T, N> unary_vectorize(cpp::simd<T, N> x) {
  return cpp::simd<T, N>([&](<auto i>) { 
    return ScalarFunc(x[i]); 
  });
}

// Generic Binary Vector Wrapper (for future use)
template <auto ScalarFunc, typename T, size_t N>
LIBC_INLINE constexpr cpp::simd<T, N> binary_vectorize(cpp::simd<T, N> x, cpp::simd<T, N> y) {
  return cpp::simd<T, N>([&](<auto i>) { 
    return ScalarFunc(x[i], y[i]); 
  });
}

} // namespace mathvec
} // namespace LIBC_NAMESPACE_DECL
```

The math functions become:

```c++
#include "src/__support/CPP/simd.h"
#include "src/__support/math/acosf.h"
#include "src/__support/mathvec/vector_wrapper.h"

namespace LIBC_NAMESPACE_DECL {
namespace mathvec {

template <size_t N>
LIBC_INLINE cpp::simd<float, N> acosf(cpp::simd<float, N> x) {
  return unary_vectorize<math::acosf>(x);
}

} // namespace mathvec
} // namespace LIBC_NAMESPACE_DECL
```

WDYT?

https://github.com/llvm/llvm-project/pull/199273