[clang] [X86][AVX10.2] Add comments for the avx10_2convertintrin.h file (PR #120766)
Phoebe Wang via cfe-commits
cfe-commits at lists.llvm.org
Sat Dec 21 06:40:22 PST 2024
================
@@ -24,567 +24,3243 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
__min_vector_width__(256)))
+/// Convert two 128-bit vectors, \a __A and \a __B, containing packed
+/// single-precision (32-bit) floating-point elements to a 128-bit vector
+/// containing FP16 elements.
+///
+/// \code{.operation}
+/// FOR i := 0 to 7
+/// IF i < 4
+/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
+/// ELSE
+/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4])
+/// FI
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
+///
+/// \param __A
+/// A 128-bit vector of [4 x float].
+/// \param __B
+/// A 128-bit vector of [4 x float].
+/// \returns
+/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the
+/// (converted) elements from \a __B; higher order elements correspond to the
+/// (converted) elements from \a __A.
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A,
__m128 __B) {
return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
(__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1));
}
+/// Convert two 128-bit vectors, \a __A and \a __B, containing packed
+/// single-precision (32-bit) floating-point elements to a 128-bit vector
+/// containing FP16 elements. Merging mask \a __U is used to determine if given
+/// element should be taken from \a __W instead.
+///
+/// \code{.operation}
+/// FOR i := 0 to 7
+/// IF mask[i]
+/// dst.fp16[i] := __W[i]
----------------
phoebewang wrote:
Switch it with the ELSE branch.
https://github.com/llvm/llvm-project/pull/120766
More information about the cfe-commits
mailing list