r272119 - [AVX512] Reformat macro intrinsics, ensure arguments have proper typecasts, ensure result is typecasted back to the generic types.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 7 23:08:07 PDT 2016
Author: ctopper
Date: Wed Jun 8 01:08:07 2016
New Revision: 272119
URL: http://llvm.org/viewvc/llvm-project?rev=272119&view=rev
Log:
[AVX512] Reformat macro intrinsics, ensure arguments have proper typecasts, ensure result is typecasted back to the generic types.
Modified:
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=272119&r1=272118&r2=272119&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Jun 8 01:08:07 2016
@@ -852,76 +852,79 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U,
(__mmask16)(U), (int)(R)); })
#define _mm_reduce_ss(A, B, C) __extension__ ({ \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, \
- (int)(C),_MM_FROUND_CUR_DIRECTION); })
+ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+ (int)(C), _MM_FROUND_CUR_DIRECTION); })
#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \
- (__v4sf)(__m128 )(W), \
- (__mmask8)(U), \
- (int)(C),_MM_FROUND_CUR_DIRECTION); })
+ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(C), _MM_FROUND_CUR_DIRECTION); })
#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), \
- (int)(C),_MM_FROUND_CUR_DIRECTION); })
-
+ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION); })
+
#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, \
- (int)(C),(int)(R)); })
+ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+ (int)(C), (int)(R)); })
#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \
- (__v4sf)(__m128 )(W), \
- (__mmask8)(U), \
- (int)(C),(int)(R)); })
+ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(C), (int)(R)); })
#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), \
- (int)(C),(int)(R)); })
+ (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(C), (int)(R)); })
#define _mm_reduce_sd(A, B, C) __extension__ ({ \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, \
- (int)(C),_MM_FROUND_CUR_DIRECTION); })
+ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(C), \
+ _MM_FROUND_CUR_DIRECTION); })
#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \
- (__v2df)(__m128 )(W), \
- (__mmask8)(U), \
- (int)(C),_MM_FROUND_CUR_DIRECTION); })
+ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), (__mmask8)(U), \
+ (int)(C), _MM_FROUND_CUR_DIRECTION); })
#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), \
- (int)(C),_MM_FROUND_CUR_DIRECTION); })
-
+ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION); })
+
#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, \
- (int)(C),(int)(R)); })
+ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(C), (int)(R)); })
#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \
- (__v2df)(__m128 )(W), \
- (__mmask8)(U), \
- (int)(C),(int)(R)); })
+ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), (__mmask8)(U), \
+ (int)(C), (int)(R)); })
#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), \
- (int)(C),(int)(R)); })
+ (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(C), (int)(R)); })
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_movepi32_mask (__m512i __A)
@@ -1159,29 +1162,23 @@ _mm512_maskz_broadcast_i64x2 (__mmask8 _
(__v2di)_mm_setzero_di(), \
(__mmask8)(U)); })
-#define _mm512_insertf32x8( __A, __B, __imm) __extension__ ({ \
-__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\
- (__v8sf)( __B),\
- ( __imm),\
- (__v16sf) _mm512_setzero_ps (),\
- (__mmask16) -1);\
-})
-
-#define _mm512_mask_insertf32x8( __W, __U, __A, __B, __imm) __extension__ ({ \
-__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\
- (__v8sf)( __B),\
- ( __imm),\
- (__v16sf)( __W),\
- (__mmask16)( __U));\
-})
-
-#define _mm512_maskz_insertf32x8( __U, __A, __B, __imm) __extension__ ({ \
-__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\
- (__v8sf)( __B),\
- ( __imm),\
- (__v16sf) _mm512_setzero_ps (),\
- (__mmask16)( __U));\
-})
+#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
+ (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+ (__v8sf)(__m256)(B), (int)(imm), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1); })
+
+#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
+ (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+ (__v8sf)(__m256)(B), (int)(imm), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U)); })
+
+#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
+ (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+ (__v8sf)(__m256)(B), (int)(imm), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U)); })
#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
(__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
@@ -1245,8 +1242,7 @@ __builtin_ia32_insertf32x8_mask ((__v16s
#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
(__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
- (int)(imm), \
- (__mmask16)(U)); })
+ (int)(imm), (__mmask16)(U)); })
#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
(__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=272119&r1=272118&r2=272119&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Jun 8 01:08:07 2016
@@ -867,28 +867,23 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, _
(__mmask16) __U);
}
-#define _mm512_mask_max_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxpd512_mask ((__v8df)( __A),\
- (__v8df)( __B),\
- (__v8df)( __W),\
- (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_maskz_max_round_pd( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxpd512_mask ((__v8df)( __A),\
- (__v8df)( __B),\
- (__v8df)\
- _mm512_setzero_pd (),\
- (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_max_round_pd( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxpd512_mask ((__v8df)( __A),\
- (__v8df)( __B),\
- (__v8df)\
- _mm512_undefined_pd (),\
- (__mmask8) -1,( __R));\
-})
+#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), \
+ (int)(R)); })
+
+#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)); })
+
+#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_max_pd(__m512d __A, __m512d __B)
@@ -922,28 +917,23 @@ _mm512_maskz_max_pd (__mmask8 __U, __m51
_MM_FROUND_CUR_DIRECTION);
}
-#define _mm512_mask_max_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxps512_mask ((__v16sf)( __A),\
- (__v16sf)( __B),\
- (__v16sf)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_max_round_ps( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxps512_mask ((__v16sf)( __A),\
- (__v16sf)( __B),\
- (__v16sf)\
- _mm512_setzero_ps (),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_max_round_ps( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxps512_mask ((__v16sf)( __A),\
- (__v16sf)( __B),\
- (__v16sf)\
- _mm512_undefined_ps (),\
- (__mmask16) -1,( __R));\
-})
+#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
+ (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), \
+ (int)(R)); })
+
+#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
+ (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
+ (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_max_ps(__m512 __A, __m512 __B)
@@ -1162,28 +1152,23 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __
__M);
}
-#define _mm512_mask_min_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minpd512_mask ((__v8df)( __A),\
- (__v8df)( __B),\
- (__v8df)( __W),\
- (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_maskz_min_round_pd( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minpd512_mask ((__v8df)( __A),\
- (__v8df)( __B),\
- (__v8df)\
- _mm512_setzero_pd (),\
- (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_min_round_pd( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minpd512_mask ((__v8df)( __A),\
- (__v8df)( __B),\
- (__v8df)\
- _mm512_undefined_pd (),\
- (__mmask8) -1,( __R));\
-})
+#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), \
+ (int)(R)); })
+
+#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)); })
+
+#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_min_pd(__m512d __A, __m512d __B)
@@ -1206,28 +1191,23 @@ _mm512_mask_min_pd (__m512d __W, __mmask
_MM_FROUND_CUR_DIRECTION);
}
-#define _mm512_mask_min_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minps512_mask ((__v16sf)( __A),\
- (__v16sf)( __B),\
- (__v16sf)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_min_round_ps( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minps512_mask ((__v16sf)( __A),\
- (__v16sf)( __B),\
- (__v16sf)\
- _mm512_setzero_ps (),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_min_round_ps( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minps512_mask ((__v16sf)( __A),\
- (__v16sf)( __B),\
- (__v16sf)\
- _mm512_undefined_ps (),\
- (__mmask16) -1,( __R));\
-})
+#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
+ (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), \
+ (int)(R)); })
+
+#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
+ (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
+ (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
@@ -1537,25 +1517,20 @@ _mm512_mask_mullo_epi32 (__m512i __W, __
(__v16si) __W, __M);
}
-#define _mm512_mask_sqrt_round_pd( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\
- (__v8df)( __W),\
- (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_maskz_sqrt_round_pd( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\
- (__v8df)\
- _mm512_setzero_pd (),\
- (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_sqrt_round_pd( __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\
- (__v8df)\
- _mm512_undefined_pd (),\
- (__mmask8) -1,( __R));\
-})
+#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), \
+ (int)(R)); })
+
+#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)); })
+
+#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_sqrt_pd(__m512d __a)
@@ -1585,25 +1560,20 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m5
_MM_FROUND_CUR_DIRECTION);
}
-#define _mm512_mask_sqrt_round_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\
- (__v16sf)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_sqrt_round_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\
- (__v16sf)\
- _mm512_setzero_ps (),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_sqrt_round_ps( __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\
- (__v16sf)\
- _mm512_undefined_ps (),\
- (__mmask16) -1,( __R));\
-})
+#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), \
+ (int)(R)); })
+
+#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_sqrt_ps(__m512 __a)
@@ -2566,23 +2536,20 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51
(__mmask16)(A), \
_MM_FROUND_CUR_DIRECTION); })
-#define _mm512_mask_roundscale_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \
- (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __C), (int)__imm,\
- (__v16sf)( __A),\
- (__mmask16)( __B),(int) __R);\
-})
-
-#define _mm512_maskz_roundscale_round_ps( __A, __B, __imm,__R) __extension__ ({ \
- (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __B), (int)__imm,\
- (__v16sf)_mm512_setzero_ps (),\
- (__mmask16)( __A),(int) __R);\
-})
-
-#define _mm512_roundscale_round_ps( __A, __imm, __R) __extension__ ({ \
- (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __A),(int) __imm,\
- (__v16sf) _mm512_undefined_ps (),\
- (__mmask16) -1,(int) __R);\
-})
+#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
+ (__v16sf)(__m512)(A), (__mmask16)(B), \
+ (int)(R)); })
+
+#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(A), (int)(R)); })
+
+#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)); })
#define _mm512_roundscale_pd(A, B) __extension__ ({ \
(__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
@@ -2600,23 +2567,20 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51
(__mmask8)(A), \
_MM_FROUND_CUR_DIRECTION); })
-#define _mm512_mask_roundscale_round_pd( __A, __B, __C, __imm ,__R) __extension__ ({ \
- (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __C),(int)__imm,\
- (__v8df)( __A),\
- (__mmask8)( __B),(int)__R);\
-})
-
-#define _mm512_maskz_roundscale_round_pd( __A, __B, __imm, __R) __extension__ ({ \
- (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __B),(int)__imm,\
- (__v8df)_mm512_setzero_pd (),\
- (__mmask8)( __A),(int) __R);\
-})
-
-#define _mm512_roundscale_round_pd( __A, __imm , __R) __extension__ ({ \
- (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __A),(int) __imm,\
- (__v8df)_mm512_undefined_pd (),\
- (__mmask8) -1,(int) __R);\
-})
+#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
+ (__v8df)(__m512d)(A), (__mmask8)(B), \
+ (int)(R)); })
+
+#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(A), (int)(R)); })
+
+#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)); })
#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
(__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
@@ -3639,25 +3603,20 @@ _mm512_mask_blend_epi32(__mmask16 __U, _
/* Conversion */
-#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \
-__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
- (__v16si)\
- _mm512_undefined_epi32 (),\
- (__mmask16) -1,( __R));\
-})
-
-#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
- (__v16si)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
- (__v16si)\
- _mm512_setzero_si512 (),\
- (__mmask16)( __U),( __R));\
-})
+#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)); })
static __inline __m512i __DEFAULT_FN_ATTRS
@@ -3693,36 +3652,30 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)); })
-#define _mm512_mask_cvt_roundepi32_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\
- (__v16sf)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundepi32_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\
- (__v16sf)\
- _mm512_setzero_ps (),\
- (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)); })
#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)); })
-#define _mm512_mask_cvt_roundepu32_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\
- (__v16sf)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundepu32_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\
- (__v16sf)\
- _mm512_setzero_ps (),\
- (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)); })
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_cvtepu32_ps (__m512i __A)
@@ -3835,8 +3788,8 @@ _mm512_maskz_cvtepu32_pd (__mmask8 __U,
#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
(__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
- (__v8sf)(W), \
- (__mmask8)(U), (int)(R)); })
+ (__v8sf)(__m256)(W), (__mmask8)(U), \
+ (int)(R)); })
#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
(__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
@@ -3870,27 +3823,20 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m
_MM_FROUND_CUR_DIRECTION);
}
-#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
- (__I),\
- (__v16hi)_mm256_undefined_si256 (),\
- (__mmask16) -1);\
-})
-
-#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
- (__I),\
- (__v16hi)( __U),\
- (__mmask16)( __W));\
-})
-
-#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
- (__I),\
- (__v16hi)\
- _mm256_setzero_si256 (),\
- (__mmask16)( __W));\
-})
+#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
+ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v16hi)_mm256_undefined_si256(), \
+ (__mmask16)-1); })
+
+#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
+ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v16hi)(__m256i)(U), \
+ (__mmask16)(W)); })
+
+#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
+ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v16hi)_mm256_setzero_si256(), \
+ (__mmask16)(W)); })
#define _mm512_cvtps_ph(A, I) __extension__ ({ \
(__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
@@ -3907,27 +3853,22 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m
(__v16hi)_mm256_setzero_si256(), \
(__mmask16)(W)); })
-#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \
-__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
- (__v16sf)\
- _mm512_undefined_ps (),\
- (__mmask16) -1,( __R));\
-})
-
-#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
- (__v16sf)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
- (__v16sf)\
- _mm512_setzero_ps (),\
- (__mmask16)( __U),( __R));\
-})
-
-
+#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)); })
+
+
static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_cvtph_ps(__m256i __A)
{
@@ -3963,10 +3904,10 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __
#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
(__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)(W), \
+ (__v8si)(__m256i)(W), \
(__mmask8)(U), (int)(R)); })
-#define _mm512_maskz_cvtt_roundpd_epi32( U, A, R) __extension__ ({ \
+#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
(__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(U), (int)(R)); })
@@ -4003,12 +3944,12 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U,
(__v16si)_mm512_setzero_si512(), \
(__mmask16)-1, (int)(R)); })
-#define _mm512_mask_cvtt_roundps_epi32( W, U, A, R) __extension__ ({ \
+#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
(__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)(W), \
+ (__v16si)(__m512i)(W), \
(__mmask16)(U), (int)(R)); })
-#define _mm512_maskz_cvtt_roundps_epi32( U, A, R) __extension__ ({ \
+#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
(__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
(__v16si)_mm512_setzero_si512(), \
(__mmask16)(U), (int)(R)); })
@@ -4045,18 +3986,15 @@ _mm512_maskz_cvttps_epi32 (__mmask16 __U
(__v16si)_mm512_setzero_si512(), \
(__mmask16)-1, (int)(R)); })
-#define _mm512_mask_cvt_roundps_epi32( __W, __U, __A, __R) __extension__ ({ \
- (__m512i)__builtin_ia32_cvtps2dq512_mask ((__v16sf)( __A),\
- (__v16si)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundps_epi32( __U, __A, __R) __extension__ ({ \
- (__m512i)__builtin_ia32_cvtps2dq512_mask ((__v16sf)( __A),\
- (__v16si)\
- _mm512_setzero_si512 (),\
- (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)); })
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epi32 (__m512 __A)
@@ -4091,9 +4029,9 @@ _mm512_maskz_cvtps_epi32 (__mmask16 __U,
(__v8si)_mm256_setzero_si256(), \
(__mmask8)-1, (int)(R)); })
-#define _mm512_mask_cvt_roundpd_epi32( W, U, A, R) __extension__ ({ \
+#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
(__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)(W), \
+ (__v8si)(__m256i)(W), \
(__mmask8)(U), (int)(R)); })
#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
@@ -4135,18 +4073,15 @@ _mm512_maskz_cvtpd_epi32 (__mmask8 __U,
(__v16si)_mm512_setzero_si512(), \
(__mmask16)-1, (int)(R)); })
-#define _mm512_mask_cvt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \
- (__m512i)__builtin_ia32_cvtps2udq512_mask ((__v16sf)( __A),\
- (__v16si)( __W),\
- (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundps_epu32( __U, __A, __R) __extension__ ({ \
- (__m512i)__builtin_ia32_cvtps2udq512_mask ((__v16sf)( __A),\
- (__v16si)\
- _mm512_setzero_si512 (),\
- (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)); })
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epu32 ( __m512 __A)
@@ -4182,10 +4117,10 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U
(__v8si)_mm256_setzero_si256(), \
(__mmask8)-1, (int)(R)); })
-#define _mm512_mask_cvt_roundpd_epu32( W, U, A, R) __extension__ ({ \
+#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
(__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
(__v8si)(W), \
- (__mmask8) (U), (int)(R)); })
+ (__mmask8)(U), (int)(R)); })
#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
(__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
More information about the cfe-commits
mailing list