[clang] [X86][AVX10.2] Add comments for the avx10_2convertintrin.h file (PR #120766)

Sat Dec 21 06:40:22 PST 2024

================
@@ -24,567 +24,3243 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
                  __min_vector_width__(256)))
 
+/// Convert two 128-bit vectors, \a __A and \a __B, containing packed
+/// single-precision (32-bit) floating-point elements to a 128-bit vector
+/// containing FP16 elements.
+///
+/// \code{.operation}
+/// FOR i := 0 to 7
+/// 	IF i < 4
+/// 		dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
+/// 	ELSE
+/// 		dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4])
+/// 	FI
+/// ENDFOR
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction.
+///
+/// \param __A
+///    A 128-bit vector of [4 x float].
+/// \param __B
+///    A 128-bit vector of [4 x float].
+/// \returns
+///    A 128-bit vector of [8 x fp16]. Lower elements correspond to the
+///    (converted) elements from \a __B; higher order elements correspond to the
+///    (converted) elements from \a __A.
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A,
                                                                __m128 __B) {
   return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
       (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1));
 }
 
+/// Convert two 128-bit vectors, \a __A and \a __B, containing packed
+/// single-precision (32-bit) floating-point elements to a 128-bit vector
+/// containing FP16 elements. Merging mask \a __U is used to determine if given
+/// element should be taken from \a __W instead.
+///
+/// \code{.operation}
+/// FOR i := 0 to 7
+/// 	IF mask[i]
+/// 		dst.fp16[i] := __W[i]
----------------
phoebewang wrote:

Switch it with the ELSE branch.

https://github.com/llvm/llvm-project/pull/120766