[clang] 4190d99 - [X86] Add parentheses around casts in some of the X86 intrinsic headers.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 13 09:46:42 PDT 2021
Author: Craig Topper
Date: 2021-08-13T09:36:16-07:00
New Revision: 4190d99dfcab45cd67c32030d363f391c35570f2
URL: https://github.com/llvm/llvm-project/commit/4190d99dfcab45cd67c32030d363f391c35570f2
DIFF: https://github.com/llvm/llvm-project/commit/4190d99dfcab45cd67c32030d363f391c35570f2.diff
LOG: [X86] Add parentheses around casts in some of the X86 intrinsic headers.
This covers the SSE and AVX/AVX2 headers. AVX512 has a lot more macros
due to rounding mode.
Fixes part of PR51324.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D107843
Added:
Modified:
clang/lib/Headers/__wmmintrin_aes.h
clang/lib/Headers/avx2intrin.h
clang/lib/Headers/avxintrin.h
clang/lib/Headers/emmintrin.h
clang/lib/Headers/smmintrin.h
clang/lib/Headers/tmmintrin.h
clang/lib/Headers/xmmintrin.h
clang/test/CodeGen/X86/sse41-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/__wmmintrin_aes.h b/clang/lib/Headers/__wmmintrin_aes.h
index f540319c7fd2b..3010b38711e67 100644
--- a/clang/lib/Headers/__wmmintrin_aes.h
+++ b/clang/lib/Headers/__wmmintrin_aes.h
@@ -133,7 +133,7 @@ _mm_aesimc_si128(__m128i __V)
/// An 8-bit round constant used to generate the AES encryption key.
/// \returns A 128-bit round key for AES encryption.
#define _mm_aeskeygenassist_si128(C, R) \
- (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))
+ ((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R)))
#undef __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index cc16720949ea3..5064c87c2bb19 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -20,8 +20,8 @@
/* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm256_mpsadbw_epu8(X, Y, M) \
- (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
- (__v32qi)(__m256i)(Y), (int)(M))
+ ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_abs_epi8(__m256i __a)
@@ -114,8 +114,8 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
}
#define _mm256_alignr_epi8(a, b, n) \
- (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
- (__v32qi)(__m256i)(b), (n))
+ ((__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (n)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_and_si256(__m256i __a, __m256i __b)
@@ -149,8 +149,8 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
}
#define _mm256_blend_epi16(V1, V2, M) \
- (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
- (__v16hi)(__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
+ (__v16hi)(__m256i)(V2), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
@@ -467,13 +467,13 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
}
#define _mm256_shuffle_epi32(a, imm) \
- (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm)))
#define _mm256_shufflehi_epi16(a, imm) \
- (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm)))
#define _mm256_shufflelo_epi16(a, imm) \
- (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sign_epi8(__m256i __a, __m256i __b)
@@ -494,10 +494,10 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
}
#define _mm256_slli_si256(a, imm) \
- (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
#define _mm256_bslli_epi128(a, imm) \
- (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_slli_epi16(__m256i __a, int __count)
@@ -560,10 +560,10 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
}
#define _mm256_srli_si256(a, imm) \
- (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
#define _mm256_bsrli_epi128(a, imm) \
- (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_srli_epi16(__m256i __a, int __count)
@@ -743,12 +743,12 @@ _mm256_broadcastsi128_si256(__m128i __X)
#define _mm_broadcastsi128_si256(X) _mm256_broadcastsi128_si256(X)
#define _mm_blend_epi32(V1, V2, M) \
- (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
- (__v4si)(__m128i)(V2), (int)(M))
+ ((__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
+ (__v4si)(__m128i)(V2), (int)(M)))
#define _mm256_blend_epi32(V1, V2, M) \
- (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
- (__v8si)(__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_broadcastb_epi8(__m128i __X)
@@ -806,7 +806,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
}
#define _mm256_permute4x64_pd(V, M) \
- (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))
+ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
@@ -815,17 +815,17 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
}
#define _mm256_permute4x64_epi64(V, M) \
- (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))
+ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
#define _mm256_permute2x128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M)))
#define _mm256_extracti128_si256(V, M) \
- (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))
+ ((__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M)))
#define _mm256_inserti128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
- (__v2di)(__m128i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
+ (__v2di)(__m128i)(V2), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskload_epi32(int const *__X, __m256i __M)
@@ -936,211 +936,211 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
}
#define _mm_mask_i32gather_pd(a, m, i, mask, s) \
- (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2df)(__m128d)(mask), (s))
+ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)))
#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \
- (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4df)(__m256d)(mask), (s))
+ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)(__m256d)(mask), (s)))
#define _mm_mask_i64gather_pd(a, m, i, mask, s) \
- (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
- (double const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2df)(__m128d)(mask), (s))
+ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
+ (double const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)))
#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \
- (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
- (double const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4df)(__m256d)(mask), (s))
+ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
+ (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)(__m256d)(mask), (s)))
#define _mm_mask_i32gather_ps(a, m, i, mask, s) \
- (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4sf)(__m128)(mask), (s))
+ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \
- (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
- (float const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8sf)(__m256)(mask), (s))
+ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
+ (float const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8sf)(__m256)(mask), (s)))
#define _mm_mask_i64gather_ps(a, m, i, mask, s) \
- (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4sf)(__m128)(mask), (s))
+ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \
- (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4sf)(__m128)(mask), (s))
+ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4si)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \
- (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
- (int const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8si)(__m256i)(mask), (s))
+ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
+ (int const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8si)(__m256i)(mask), (s)))
#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4si)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4si)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2di)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)))
#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \
- (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4di)(__m256i)(mask), (s))
+ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4di)(__m256i)(mask), (s)))
#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
- (long long const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2di)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
+ (long long const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)))
#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \
- (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
- (long long const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4di)(__m256i)(mask), (s))
+ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
+ (long long const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4di)(__m256i)(mask), (s)))
#define _mm_i32gather_pd(m, i, s) \
- (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
- _mm_setzero_pd()), \
- (s))
+ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+ _mm_setzero_pd()), \
+ (s)))
#define _mm256_i32gather_pd(m, i, s) \
- (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
- _mm256_setzero_pd(), \
- _CMP_EQ_OQ), \
- (s))
+ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+ _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)))
#define _mm_i64gather_pd(m, i, s) \
- (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
- (double const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
- _mm_setzero_pd()), \
- (s))
+ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
+ (double const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+ _mm_setzero_pd()), \
+ (s)))
#define _mm256_i64gather_pd(m, i, s) \
- (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
- (double const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
- _mm256_setzero_pd(), \
- _CMP_EQ_OQ), \
- (s))
+ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
+ (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+ _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)))
#define _mm_i32gather_ps(m, i, s) \
- (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s))
+ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)))
#define _mm256_i32gather_ps(m, i, s) \
- (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
- (float const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
- _mm256_setzero_ps(), \
- _CMP_EQ_OQ), \
- (s))
+ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
+ (float const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
+ _mm256_setzero_ps(), \
+ _CMP_EQ_OQ), \
+ (s)))
#define _mm_i64gather_ps(m, i, s) \
- (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s))
+ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)))
#define _mm256_i64gather_ps(m, i, s) \
- (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s))
+ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)))
#define _mm_i32gather_epi32(m, i, s) \
- (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v4si)(__m128i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s))
+ ((__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4si)(__m128i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
#define _mm256_i32gather_epi32(m, i, s) \
- (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
- (int const *)(m), (__v8si)(__m256i)(i), \
- (__v8si)_mm256_set1_epi32(-1), (s))
+ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
+ (int const *)(m), (__v8si)(__m256i)(i), \
+ (__v8si)_mm256_set1_epi32(-1), (s)))
#define _mm_i64gather_epi32(m, i, s) \
- (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v2di)(__m128i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s))
+ ((__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v2di)(__m128i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
#define _mm256_i64gather_epi32(m, i, s) \
- (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v4di)(__m256i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s))
+ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4di)(__m256i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
#define _mm_i32gather_epi64(m, i, s) \
- (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2di)_mm_set1_epi64x(-1), (s))
+ ((__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2di)_mm_set1_epi64x(-1), (s)))
#define _mm256_i32gather_epi64(m, i, s) \
- (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4di)_mm256_set1_epi64x(-1), (s))
+ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4di)_mm256_set1_epi64x(-1), (s)))
#define _mm_i64gather_epi64(m, i, s) \
- (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
- (long long const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2di)_mm_set1_epi64x(-1), (s))
+ ((__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
+ (long long const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2di)_mm_set1_epi64x(-1), (s)))
#define _mm256_i64gather_epi64(m, i, s) \
- (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
- (long long const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4di)_mm256_set1_epi64x(-1), (s))
+ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
+ (long long const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4di)_mm256_set1_epi64x(-1), (s)))
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 382b6215751ec..7f4e9761f1e2c 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -400,7 +400,7 @@ _mm256_rcp_ps(__m256 __a)
/// 11: Truncated.
/// \returns A 256-bit vector of [4 x double] containing the rounded values.
#define _mm256_round_pd(V, M) \
- (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))
+ ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)))
/// Rounds the values stored in a 256-bit vector of [8 x float] as
/// specified by the byte operand. The source values are rounded to integer
@@ -432,7 +432,7 @@ _mm256_rcp_ps(__m256 __a)
/// 11: Truncated.
/// \returns A 256-bit vector of [8 x float] containing the rounded values.
#define _mm256_round_ps(V, M) \
- (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))
+ ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)))
/// Rounds up the values stored in a 256-bit vector of [4 x double]. The
/// source values are rounded up to integer values and returned as 64-bit
@@ -989,7 +989,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_permute_pd(A, C) \
- (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))
+ ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C)))
/// Copies the values in a 256-bit vector of [4 x double] as specified by
/// the immediate integer operand.
@@ -1029,7 +1029,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_permute_pd(A, C) \
- (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))
+ ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C)))
/// Copies the values in a 128-bit vector of [4 x float] as specified by
/// the immediate integer operand.
@@ -1085,7 +1085,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_permute_ps(A, C) \
- (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))
+ ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C)))
/// Copies the values in a 256-bit vector of [8 x float] as specified by
/// the immediate integer operand.
@@ -1177,7 +1177,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_permute_ps(A, C) \
- (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))
+ ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C)))
/// Permutes 128-bit data values stored in two 256-bit vectors of
/// [4 x double], as specified by the immediate integer operand.
@@ -1217,8 +1217,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_permute2f128_pd(V1, V2, M) \
- (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
- (__v4df)(__m256d)(V2), (int)(M))
+ ((__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (int)(M)))
/// Permutes 128-bit data values stored in two 256-bit vectors of
/// [8 x float], as specified by the immediate integer operand.
@@ -1258,8 +1258,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// destination.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_permute2f128_ps(V1, V2, M) \
- (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (int)(M))
+ ((__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (int)(M)))
/// Permutes 128-bit data values stored in two 256-bit integer vectors,
/// as specified by the immediate integer operand.
@@ -1298,8 +1298,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// destination.
/// \returns A 256-bit integer vector containing the copied values.
#define _mm256_permute2f128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
- (__v8si)(__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (int)(M)))
/* Vector Blend */
/// Merges 64-bit double-precision data values stored in either of the
@@ -1327,8 +1327,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// operand \a V2 is copied to the same position in the destination.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_blend_pd(V1, V2, M) \
- (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \
- (__v4df)(__m256d)(V2), (int)(M))
+ ((__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (int)(M)))
/// Merges 32-bit single-precision data values stored in either of the
/// two 256-bit vectors of [8 x float], as specified by the immediate
@@ -1355,8 +1355,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// operand \a V2 is copied to the same position in the destination.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_blend_ps(V1, V2, M) \
- (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (int)(M))
+ ((__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (int)(M)))
/// Merges 64-bit double-precision data values stored in either of the
/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector
@@ -1453,8 +1453,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// two parallel dot product computations.
/// \returns A 256-bit vector of [8 x float] containing the two dot products.
#define _mm256_dp_ps(V1, V2, M) \
- (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (M))
+ ((__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (M)))
/* Vector shuffle */
/// Selects 8 float values from the 256-bit operands of [8 x float], as
@@ -1507,8 +1507,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 11: Bits [127:96] and [255:224] are copied from the selected operand.
/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
#define _mm256_shuffle_ps(a, b, mask) \
- (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), (int)(mask))
+ ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (int)(mask)))
/// Selects four double-precision values from the 256-bit operands of
/// [4 x double], as specified by the immediate value operand.
@@ -1553,8 +1553,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the shuffled values.
#define _mm256_shuffle_pd(a, b, mask) \
- (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), (int)(mask))
+ ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (int)(mask)))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
@@ -1647,8 +1647,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_pd(a, b, c) \
- (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
- (__v2df)(__m128d)(b), (c))
+ ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)))
/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
@@ -1707,8 +1707,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
- (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
- (__v4sf)(__m128)(b), (c))
+ ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)))
/// Compares each of the corresponding double-precision values of two
/// 256-bit vectors of [4 x double], using the operation specified by the
@@ -1767,8 +1767,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 256-bit vector of [4 x double] containing the comparison results.
#define _mm256_cmp_pd(a, b, c) \
- (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), (c))
+ ((__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (c)))
/// Compares each of the corresponding values of two 256-bit vectors of
/// [8 x float], using the operation specified by the immediate integer
@@ -1827,8 +1827,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 256-bit vector of [8 x float] containing the comparison results.
#define _mm256_cmp_ps(a, b, c) \
- (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), (c))
+ ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (c)))
/// Compares each of the corresponding scalar double-precision values of
/// two 128-bit vectors of [2 x double], using the operation specified by the
@@ -1886,8 +1886,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_sd(a, b, c) \
- (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
- (__v2df)(__m128d)(b), (c))
+ ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)))
/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
@@ -1945,8 +1945,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
- (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
- (__v4sf)(__m128)(b), (c))
+ ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)))
/// Takes a [8 x i32] vector and returns the vector element value
/// indexed by the immediate constant operand.
@@ -1964,7 +1964,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 32-bit integer containing the extracted 32 bits of extended
/// packed data.
#define _mm256_extract_epi32(X, N) \
- (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))
+ ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N)))
/// Takes a [16 x i16] vector and returns the vector element value
/// indexed by the immediate constant operand.
@@ -1982,8 +1982,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
/// packed data.
#define _mm256_extract_epi16(X, N) \
- (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \
- (int)(N))
+ ((int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \
+ (int)(N)))
/// Takes a [32 x i8] vector and returns the vector element value
/// indexed by the immediate constant operand.
@@ -2001,8 +2001,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
/// packed data.
#define _mm256_extract_epi8(X, N) \
- (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \
- (int)(N))
+ ((int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \
+ (int)(N)))
#ifdef __x86_64__
/// Takes a [4 x i64] vector and returns the vector element value
@@ -2021,7 +2021,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 64-bit integer containing the extracted 64 bits of extended
/// packed data.
#define _mm256_extract_epi64(X, N) \
- (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))
+ ((long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N)))
#endif
/// Takes a [8 x i32] vector and replaces the vector element value
@@ -2043,8 +2043,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi32(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
- (int)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
+ (int)(I), (int)(N)))
/// Takes a [16 x i16] vector and replaces the vector element value
@@ -2066,8 +2066,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi16(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
- (int)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
+ (int)(I), (int)(N)))
/// Takes a [32 x i8] vector and replaces the vector element value
/// indexed by the immediate constant operand with a new value. Returns the
@@ -2088,8 +2088,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi8(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
- (int)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
+ (int)(I), (int)(N)))
#ifdef __x86_64__
/// Takes a [4 x i64] vector and replaces the vector element value
@@ -2111,8 +2111,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi64(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
- (long long)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
+ (long long)(I), (int)(N)))
#endif
/* Conversion */
@@ -4592,8 +4592,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
#define _mm256_insertf128_ps(V1, V2, M) \
- (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
- (__v4sf)(__m128)(V2), (int)(M))
+ ((__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M)))
/// Constructs a new 256-bit vector of [4 x double] by first duplicating
/// a 256-bit vector of [4 x double] given in the first parameter, and then
@@ -4630,8 +4630,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
#define _mm256_insertf128_pd(V1, V2, M) \
- (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
- (__v2df)(__m128d)(V2), (int)(M))
+ ((__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M)))
/// Constructs a new 256-bit integer vector by first duplicating a
/// 256-bit integer vector given in the first parameter, and then replacing
@@ -4668,8 +4668,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit integer vector containing the interleaved values.
#define _mm256_insertf128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \
- (__v4si)(__m128i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \
+ (__v4si)(__m128i)(V2), (int)(M)))
/*
Vector extract.
@@ -4698,7 +4698,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [4 x float] containing the extracted bits.
#define _mm256_extractf128_ps(V, M) \
- (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))
+ ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M)))
/// Extracts either the upper or the lower 128 bits from a 256-bit vector
/// of [4 x double], as determined by the immediate integer parameter, and
@@ -4722,7 +4722,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [2 x double] containing the extracted bits.
#define _mm256_extractf128_pd(V, M) \
- (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))
+ ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M)))
/// Extracts either the upper or the lower 128 bits from a 256-bit
/// integer vector, as determined by the immediate integer parameter, and
@@ -4746,7 +4746,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit integer vector containing the extracted bits.
#define _mm256_extractf128_si256(V, M) \
- (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))
+ ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M)))
/* SIMD load ops (unaligned) */
/// Loads two 128-bit floating-point vectors of [4 x float] from
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index bb759721faeb3..b79a4f17f9c7f 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2818,10 +2818,10 @@ _mm_xor_si128(__m128i __a, __m128i __b)
/// \a a.
/// \returns A 128-bit integer vector containing the left-shifted value.
#define _mm_slli_si128(a, imm) \
- (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
#define _mm_bslli_si128(a, imm) \
- (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
/// Left-shifts each 16-bit value in the 128-bit integer vector operand
/// by the specified number of bits. Low-order bits are cleared.
@@ -3035,10 +3035,10 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
/// \a a.
/// \returns A 128-bit integer vector containing the right-shifted value.
#define _mm_srli_si128(a, imm) \
- (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
#define _mm_bsrli_si128(a, imm) \
- (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
/// Right-shifts each of 16-bit values in the 128-bit integer vector
/// operand by the specified number of bits. High-order bits are cleared.
@@ -4356,8 +4356,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
/// \returns An integer, whose lower 16 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
#define _mm_extract_epi16(a, imm) \
- (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
- (int)(imm))
+ ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
+ (int)(imm)))
/// Constructs a 128-bit integer vector by first making a copy of the
/// 128-bit integer vector parameter, and then inserting the lower 16 bits
@@ -4380,8 +4380,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
/// lower 16 bits of \a __b are written.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi16(a, b, imm) \
- (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
- (int)(imm))
+ ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
+ (int)(imm)))
/// Copies the values of the most significant bits from each 8-bit
/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
@@ -4430,7 +4430,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [127:96] of \a a.
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shuffle_epi32(a, imm) \
- (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
/// Constructs a 128-bit integer vector by shuffling four lower 16-bit
/// elements of a 128-bit integer vector of [8 x i16], using the immediate
@@ -4460,7 +4460,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [63:48] of \a a. \n
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflelo_epi16(a, imm) \
- (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
/// Constructs a 128-bit integer vector by shuffling four upper 16-bit
/// elements of a 128-bit integer vector of [8 x i16], using the immediate
@@ -4490,7 +4490,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [127:112] of \a a. \n
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflehi_epi16(a, imm) \
- (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
/// Unpacks the high-order (index 8-15) values from two 128-bit vectors
/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
@@ -4844,8 +4844,8 @@ _mm_movemask_pd(__m128d __a)
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
#define _mm_shuffle_pd(a, b, i) \
- (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
- (int)(i))
+ ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
+ (int)(i)))
/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
/// floating-point vector of [4 x float].
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 025830a74280e..3ee58c9d79370 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -231,7 +231,7 @@
/// 11: Truncated
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_round_ps(X, M) \
- (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
+ ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)))
/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
@@ -272,8 +272,8 @@
/// \returns A 128-bit vector of [4 x float] containing the copied and rounded
/// values.
#define _mm_round_ss(X, Y, M) \
- (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M))
+ ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)))
/// Rounds each element of the 128-bit vector of [2 x double] to an
/// integer value according to the rounding control specified by the second
@@ -306,7 +306,7 @@
/// 11: Truncated
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_round_pd(X, M) \
- (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))
+ ((__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)))
/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
@@ -347,8 +347,8 @@
/// \returns A 128-bit vector of [2 x double] containing the copied and rounded
/// values.
#define _mm_round_sd(X, Y, M) \
- (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (M))
+ ((__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)))
/* SSE4 Packed Blending Intrinsics. */
/// Returns a 128-bit vector of [2 x double] where the values are
@@ -376,8 +376,8 @@
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_blend_pd(V1, V2, M) \
- (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
- (__v2df)(__m128d)(V2), (int)(M))
+ ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M)))
/// Returns a 128-bit vector of [4 x float] where the values are selected
/// from either the first or second operand as specified by the third
@@ -404,8 +404,8 @@
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_blend_ps(V1, V2, M) \
- (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
- (__v4sf)(__m128)(V2), (int)(M))
+ ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M)))
/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
@@ -513,8 +513,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [8 x i16] containing the copied values.
#define _mm_blend_epi16(V1, V2, M) \
- (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
- (__v8hi)(__m128i)(V2), (int)(M))
+ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
+ (__v8hi)(__m128i)(V2), (int)(M)))
/* SSE4 Dword Multiply Instructions. */
/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
@@ -590,8 +590,8 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// in the corresponding element; otherwise that element is set to zero.
/// \returns A 128-bit vector of [4 x float] containing the dot product.
#define _mm_dp_ps(X, Y, M) \
- (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M))
+ ((__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)))
/// Computes the dot product of the two 128-bit vectors of [2 x double]
/// and returns it in the elements of the 128-bit result vector of
@@ -625,8 +625,8 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// each [2 x double] vector. If a bit is set, the dot product is returned in
/// the corresponding element; otherwise that element is set to zero.
#define _mm_dp_pd(X, Y, M) \
- (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (M))
+ ((__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)))
/* SSE4 Streaming Load Hint Instruction. */
/// Loads integer values from a 128-bit aligned memory location to a
@@ -925,8 +925,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1111: Bits [127:120] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi8(X, I, N) \
- (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
- (int)(I), (int)(N))
+ ((__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
+ (int)(I), (int)(N)))
/// Constructs a 128-bit vector of [4 x i32] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 32-bit
@@ -957,8 +957,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Bits [127:96] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi32(X, I, N) \
- (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
- (int)(I), (int)(N))
+ ((__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
+ (int)(I), (int)(N)))
#ifdef __x86_64__
/// Constructs a 128-bit vector of [2 x i64] by first making a copy of
@@ -988,8 +988,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1: Bits [127:64] of the result are used for insertion. \n
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi64(X, I, N) \
- (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
- (long long)(I), (int)(N))
+ ((__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
+ (long long)(I), (int)(N)))
#endif /* __x86_64__ */
/* Extract int from packed integer array at index. This returns the element
@@ -1031,8 +1031,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 128-bit integer vector parameter and the remaining bits are assigned
/// zeros.
#define _mm_extract_epi8(X, N) \
- (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
- (int)(N))
+ ((int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
+ (int)(N)))
/// Extracts a 32-bit element from the 128-bit integer vector of
/// [4 x i32], using the immediate value parameter \a N as a selector.
@@ -1057,7 +1057,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// \returns An integer, whose lower 32 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
#define _mm_extract_epi32(X, N) \
- (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
+ ((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N)))
#ifdef __x86_64__
/// Extracts a 64-bit element from the 128-bit integer vector of
@@ -1080,7 +1080,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1: Bits [127:64] are returned. \n
/// \returns A 64-bit integer.
#define _mm_extract_epi64(X, N) \
- (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
+ ((long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N)))
#endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */
@@ -1514,8 +1514,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
/// \returns A 128-bit integer vector containing the sums of the sets of
/// absolute
diff erences between both operands.
#define _mm_mpsadbw_epu8(X, Y, M) \
- (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
- (__v16qi)(__m128i)(Y), (M))
+ ((__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (M)))
/// Finds the minimum unsigned 16-bit element in the input 128-bit
/// vector of [8 x u16] and returns it and along with its index.
@@ -1624,8 +1624,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns a 128-bit integer vector representing the result mask of
/// the comparison.
#define _mm_cmpistrm(A, B, M) \
- (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1678,8 +1678,8 @@ _mm_minpos_epu16(__m128i __V)
/// 1: The index of the most significant set bit. \n
/// \returns Returns an integer representing the result index of the comparison.
#define _mm_cmpistri(A, B, M) \
- (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -1738,9 +1738,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns a 128-bit integer vector representing the result mask of
/// the comparison.
#define _mm_cmpestrm(A, LA, B, LB, M) \
- (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -1797,9 +1797,9 @@ _mm_minpos_epu16(__m128i __V)
/// 1: The index of the most significant set bit. \n
/// \returns Returns an integer representing the result index of the comparison.
#define _mm_cmpestri(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
/// Uses the immediate operand \a M to perform a comparison of string
@@ -1849,8 +1849,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the bit mask is zero and the length of the string in
/// \a B is the maximum; otherwise, returns 0.
#define _mm_cmpistra(A, B, M) \
- (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1898,8 +1898,8 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B.
/// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.
#define _mm_cmpistrc(A, B, M) \
- (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1946,8 +1946,8 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B. \n
/// \returns Returns bit 0 of the resulting bit mask.
#define _mm_cmpistro(A, B, M) \
- (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1996,8 +1996,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a A is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpistrs(A, B, M) \
- (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -2046,8 +2046,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a B is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpistrz(A, B, M) \
- (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2100,9 +2100,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the bit mask is zero and the length of the string in
/// \a B is the maximum, otherwise, returns 0.
#define _mm_cmpestra(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2154,9 +2154,9 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B. \n
/// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.
#define _mm_cmpestrc(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2207,9 +2207,9 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B.
/// \returns Returns bit 0 of the resulting bit mask.
#define _mm_cmpestro(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2262,9 +2262,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a A is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpestrs(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2316,9 +2316,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a B is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpestrz(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/* SSE4.2 Compare Packed Data -- Greater Than. */
/// Compares each of the corresponding 64-bit values of the 128-bit
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 35533e115c7d2..dbd959d0a62cb 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -145,8 +145,8 @@ _mm_abs_epi32(__m128i __a)
/// \returns A 128-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_epi8(a, b, n) \
- (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (n))
+ ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (n)))
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
@@ -168,7 +168,7 @@ _mm_abs_epi32(__m128i __a)
/// \returns A 64-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_pi8(a, b, n) \
- (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
+ ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index f4686691c7ed5..620453c97783c 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@ void _mm_sfence(void);
/// 3: Bits [63:48] are copied to the destination.
/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
#define _mm_extract_pi16(a, n) \
- (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
+ ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@ void _mm_sfence(void);
/// \returns A 64-bit integer vector containing the copied packed data from the
/// operands.
#define _mm_insert_pi16(a, d, n) \
- (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
+ ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
/// Compares each of the corresponding packed 16-bit integer values of
/// the 64-bit integer vectors, and writes the greater value to the
@@ -2359,7 +2359,7 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
/// 11: assigned from bits [63:48] of \a a.
/// \returns A 64-bit integer vector containing the shuffled values.
#define _mm_shuffle_pi16(a, n) \
- (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
+ ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
@@ -2601,8 +2601,8 @@ void _mm_setcsr(unsigned int __i);
/// 11: Bits [127:96] copied from the specified operand.
/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
#define _mm_shuffle_ps(a, b, mask) \
- (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
- (int)(mask))
+ ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask)))
/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 1e38e3c3355a9..b68598c3a7da3 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -393,3 +393,11 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
// CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testz_si128(x, y);
}
+
+// Make sure brackets work after macro intrinsics.
+float pr51324(__m128 a) {
+ // CHECK-LABEL: pr51324
+ // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0)
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ return _mm_round_ps(a, 0)[0];
+}
More information about the cfe-commits
mailing list