[clang] df08b34 - [NFC] Cleanup miscellaneous header items
Warren Ristow via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 26 14:38:19 PDT 2022
Author: Warren Ristow
Date: 2022-04-26T14:36:49-07:00
New Revision: df08b3493869540bad5d4b040dae814e078b411d
URL: https://github.com/llvm/llvm-project/commit/df08b3493869540bad5d4b040dae814e078b411d
DIFF: https://github.com/llvm/llvm-project/commit/df08b3493869540bad5d4b040dae814e078b411d.diff
LOG: [NFC] Cleanup miscellaneous header items
- Explain the use of the _MM_SHUFFLE and _MM_SHUFFLE2 macros
- Update some doxygen parameter descriptions to match the implementations
- Add "see also" doxygen tags to some intrinsics
- Minor clang-format changes
Reviewers: RKSimon
Differential Revision: https://reviews.llvm.org/D124469
Added:
Modified:
clang/lib/Headers/__wmmintrin_pclmul.h
clang/lib/Headers/avxintrin.h
clang/lib/Headers/bmiintrin.h
clang/lib/Headers/emmintrin.h
clang/lib/Headers/smmintrin.h
clang/lib/Headers/xmmintrin.h
Removed:
################################################################################
diff --git a/clang/lib/Headers/__wmmintrin_pclmul.h b/clang/lib/Headers/__wmmintrin_pclmul.h
index fef4b93dbb433..c9a6d50bdc89d 100644
--- a/clang/lib/Headers/__wmmintrin_pclmul.h
+++ b/clang/lib/Headers/__wmmintrin_pclmul.h
@@ -22,23 +22,23 @@
/// \headerfile <x86intrin.h>
///
/// \code
-/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);
+/// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I);
/// \endcode
///
/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.
///
-/// \param __X
+/// \param X
/// A 128-bit vector of [2 x i64] containing one of the source operands.
-/// \param __Y
+/// \param Y
/// A 128-bit vector of [2 x i64] containing one of the source operands.
-/// \param __I
+/// \param I
/// An immediate value specifying which 64-bit values to select from the
-/// operands. Bit 0 is used to select a value from operand \a __X, and bit
-/// 4 is used to select a value from operand \a __Y: \n
-/// Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n
-/// Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n
-/// Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n
-/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
+/// operands. Bit 0 is used to select a value from operand \a X, and bit
+/// 4 is used to select a value from operand \a Y: \n
+/// Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n
+/// Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n
+/// Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n
+/// Bit[4]=1 indicates that bits[127:64] of operand \a Y are used.
/// \returns The 128-bit integer vector containing the result of the carry-less
/// multiplication of the selected 64-bit values.
#define _mm_clmulepi64_si128(X, Y, I) \
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index df2d1a2690d8e..a8f953c260c2a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -1504,7 +1504,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n
/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \n
/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n
-/// 11: Bits [127:96] and [255:224] are copied from the selected operand.
+/// 11: Bits [127:96] and [255:224] are copied from the selected operand. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
+/// <c>[b6, b4, b2, b0]</c>.
/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
#define _mm256_shuffle_ps(a, b, mask) \
((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
@@ -1953,12 +1956,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// int _mm256_extract_epi32(__m256i X, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A 256-bit vector of [8 x i32].
-/// \param __imm
+/// \param N
/// An immediate integer operand with bits [2:0] determining which vector
/// element is extracted and returned.
/// \returns A 32-bit integer containing the extracted 32 bits of extended
@@ -1971,12 +1978,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// int _mm256_extract_epi16(__m256i X, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A 256-bit integer vector of [16 x i16].
-/// \param __imm
+/// \param N
/// An immediate integer operand with bits [3:0] determining which vector
/// element is extracted and returned.
/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
@@ -1990,12 +2001,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// int _mm256_extract_epi8(__m256i X, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A 256-bit integer vector of [32 x i8].
-/// \param __imm
+/// \param N
/// An immediate integer operand with bits [4:0] determining which vector
/// element is extracted and returned.
/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
@@ -2010,12 +2025,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// long long _mm256_extract_epi64(__m256i X, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A 256-bit integer vector of [4 x i64].
-/// \param __imm
+/// \param N
/// An immediate integer operand with bits [1:0] determining which vector
/// element is extracted and returned.
/// \returns A 64-bit integer containing the extracted 64 bits of extended
@@ -2030,18 +2049,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// __m256i _mm256_insert_epi32(__m256i X, int I, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A vector of [8 x i32] to be used by the insert operation.
-/// \param __b
+/// \param I
/// An integer value. The replacement value for the insert operation.
-/// \param __imm
+/// \param N
/// An immediate integer specifying the index of the vector element to be
/// replaced.
-/// \returns A copy of vector \a __a, after replacing its element indexed by
-/// \a __imm with \a __b.
+/// \returns A copy of vector \a X, after replacing its element indexed by
+/// \a N with \a I.
#define _mm256_insert_epi32(X, I, N) \
((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
(int)(I), (int)(N)))
@@ -2053,18 +2076,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// __m256i _mm256_insert_epi16(__m256i X, int I, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A vector of [16 x i16] to be used by the insert operation.
-/// \param __b
+/// \param I
/// An i16 integer value. The replacement value for the insert operation.
-/// \param __imm
+/// \param N
/// An immediate integer specifying the index of the vector element to be
/// replaced.
-/// \returns A copy of vector \a __a, after replacing its element indexed by
-/// \a __imm with \a __b.
+/// \returns A copy of vector \a X, after replacing its element indexed by
+/// \a N with \a I.
#define _mm256_insert_epi16(X, I, N) \
((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
(int)(I), (int)(N)))
@@ -2075,18 +2102,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// __m256i _mm256_insert_epi8(__m256i X, int I, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A vector of [32 x i8] to be used by the insert operation.
-/// \param __b
+/// \param I
/// An i8 integer value. The replacement value for the insert operation.
-/// \param __imm
+/// \param N
/// An immediate integer specifying the index of the vector element to be
/// replaced.
-/// \returns A copy of vector \a __a, after replacing its element indexed by
-/// \a __imm with \a __b.
+/// \returns A copy of vector \a X, after replacing its element indexed by
+/// \a N with \a I.
#define _mm256_insert_epi8(X, I, N) \
((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
(int)(I), (int)(N)))
@@ -2098,18 +2129,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// __m256i _mm256_insert_epi64(__m256i X, int I, const int N);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
/// instruction.
///
-/// \param __a
+/// \param X
/// A vector of [4 x i64] to be used by the insert operation.
-/// \param __b
+/// \param I
/// A 64-bit integer value. The replacement value for the insert operation.
-/// \param __imm
+/// \param N
/// An immediate integer specifying the index of the vector element to be
/// replaced.
-/// \returns A copy of vector \a __a, after replacing its element indexed by
-/// \a __imm with \a __b.
+/// \returns A copy of vector \a X, after replacing its element indexed by
+/// \a N with \a I.
#define _mm256_insert_epi64(X, I, N) \
((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
(long long)(I), (int)(N)))
diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h
index f583c215f919e..0db8ddfa0cbf3 100644
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@@ -47,6 +47,7 @@ __tzcnt_u16(unsigned short __X)
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of trailing zero
/// bits in the operand.
+/// \see _mm_tzcnt_32
static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)
{
@@ -63,6 +64,7 @@ __tzcnt_u32(unsigned int __X)
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
/// \returns An 32-bit integer containing the number of trailing zero bits in
/// the operand.
+/// \see __tzcnt_u32
static __inline__ int __RELAXED_FN_ATTRS
_mm_tzcnt_32(unsigned int __X)
{
@@ -83,6 +85,7 @@ _mm_tzcnt_32(unsigned int __X)
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of trailing zero
/// bits in the operand.
+/// \see _mm_tzcnt_64
static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)
{
@@ -99,6 +102,7 @@ __tzcnt_u64(unsigned long long __X)
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
/// \returns An 64-bit integer containing the number of trailing zero bits in
/// the operand.
+/// \see __tzcnt_u64
static __inline__ long long __RELAXED_FN_ATTRS
_mm_tzcnt_64(unsigned long long __X)
{
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 2078c7f0c11a4..c1e2915b6cb27 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -4126,21 +4126,25 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// __m128i _mm_extract_epi16(__m256i a, const int imm);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
///
-/// \param __a
+/// \param a
/// A 128-bit integer vector.
-/// \param __imm
-/// An immediate value. Bits [2:0] selects values from \a __a to be assigned
+/// \param imm
+/// An immediate value. Bits [2:0] selects values from \a a to be assigned
/// to bits[15:0] of the result. \n
-/// 000: assign values from bits [15:0] of \a __a. \n
-/// 001: assign values from bits [31:16] of \a __a. \n
-/// 010: assign values from bits [47:32] of \a __a. \n
-/// 011: assign values from bits [63:48] of \a __a. \n
-/// 100: assign values from bits [79:64] of \a __a. \n
-/// 101: assign values from bits [95:80] of \a __a. \n
-/// 110: assign values from bits [111:96] of \a __a. \n
-/// 111: assign values from bits [127:112] of \a __a.
+/// 000: assign values from bits [15:0] of \a a. \n
+/// 001: assign values from bits [31:16] of \a a. \n
+/// 010: assign values from bits [47:32] of \a a. \n
+/// 011: assign values from bits [63:48] of \a a. \n
+/// 100: assign values from bits [79:64] of \a a. \n
+/// 101: assign values from bits [95:80] of \a a. \n
+/// 110: assign values from bits [111:96] of \a a. \n
+/// 111: assign values from bits [127:112] of \a a.
/// \returns An integer, whose lower 16 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
#define _mm_extract_epi16(a, imm) \
@@ -4154,18 +4158,22 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
///
/// \headerfile <x86intrin.h>
///
+/// \code
+/// __m128i _mm_insert_epi16(__m256i a, int b, const int imm);
+/// \endcode
+///
/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
///
-/// \param __a
+/// \param a
/// A 128-bit integer vector of [8 x i16]. This vector is copied to the
/// result and then one of the eight elements in the result is replaced by
-/// the lower 16 bits of \a __b.
-/// \param __b
+/// the lower 16 bits of \a b.
+/// \param b
/// An integer. The lower 16 bits of this parameter are written to the
-/// result beginning at an offset specified by \a __imm.
-/// \param __imm
+/// result beginning at an offset specified by \a imm.
+/// \param imm
/// An immediate value specifying the bit offset in the result at which the
-/// lower 16 bits of \a __b are written.
+/// lower 16 bits of \a b are written.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi16(a, b, imm) \
((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
@@ -4213,7 +4221,10 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
/// 00: assign values from bits [31:0] of \a a. \n
/// 01: assign values from bits [63:32] of \a a. \n
/// 10: assign values from bits [95:64] of \a a. \n
-/// 11: assign values from bits [127:96] of \a a.
+/// 11: assign values from bits [127:96] of \a a. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
+/// <c>[b6, b4, b2, b0]</c>.
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shuffle_epi32(a, imm) \
((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
@@ -4244,6 +4255,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
/// 01: assign values from bits [31:16] of \a a. \n
/// 10: assign values from bits [47:32] of \a a. \n
/// 11: assign values from bits [63:48] of \a a. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
+/// <c>[b6, b4, b2, b0]</c>.
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflelo_epi16(a, imm) \
((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
@@ -4274,6 +4288,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
/// 01: assign values from bits [95:80] of \a a. \n
/// 10: assign values from bits [111:96] of \a a. \n
/// 11: assign values from bits [127:112] of \a a. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
+/// <c>[b6, b4, b2, b0]</c>.
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflehi_epi16(a, imm) \
((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
@@ -4617,6 +4634,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
/// Bit[0] = 1: upper element of \a a copied to lower element of result. \n
/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro.
+/// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form
+/// <c>[b1, b0]</c>.
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
#define _mm_shuffle_pd(a, b, i) \
((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 52b2f6f15bbc2..46fb7bcd4e091 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1213,8 +1213,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1,
/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.
///
/// \param __V
-/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-
-/// extended to 16-bit values.
+/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
+/// sign-extended to 16-bit values.
/// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
/* This function always performs a signed extension, but __v16qi is a char
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 1612d3d2773d5..4aa70d6e55a69 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2086,7 +2086,7 @@ _mm_storer_ps(float *__p, __m128 __a)
/// \headerfile <x86intrin.h>
///
/// \code
-/// void _mm_prefetch(const void * a, const int sel);
+/// void _mm_prefetch(const void *a, const int sel);
/// \endcode
///
/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.
@@ -2360,7 +2360,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
/// 00: assigned from bits [15:0] of \a a. \n
/// 01: assigned from bits [31:16] of \a a. \n
/// 10: assigned from bits [47:32] of \a a. \n
-/// 11: assigned from bits [63:48] of \a a.
+/// 11: assigned from bits [63:48] of \a a. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
+/// <c>[b6, b4, b2, b0]</c>.
/// \returns A 64-bit integer vector containing the shuffled values.
#define _mm_shuffle_pi16(a, n) \
((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
@@ -2602,7 +2605,10 @@ void _mm_setcsr(unsigned int __i);
/// 00: Bits [31:0] copied from the specified operand. \n
/// 01: Bits [63:32] copied from the specified operand. \n
/// 10: Bits [95:64] copied from the specified operand. \n
-/// 11: Bits [127:96] copied from the specified operand.
+/// 11: Bits [127:96] copied from the specified operand. \n
+/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
+/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
+/// <c>[b6, b4, b2, b0]</c>.
/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
#define _mm_shuffle_ps(a, b, mask) \
((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
More information about the cfe-commits
mailing list