r272119 - [AVX512] Reformat macro intrinsics, ensure arguments have proper typecasts, ensure result is typecasted back to the generic types.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 7 23:08:07 PDT 2016


Author: ctopper
Date: Wed Jun  8 01:08:07 2016
New Revision: 272119

URL: http://llvm.org/viewvc/llvm-project?rev=272119&view=rev
Log:
[AVX512] Reformat macro intrinsics, ensure arguments have proper typecasts, ensure result is typecasted back to the generic types.

Modified:
    cfe/trunk/lib/Headers/avx512dqintrin.h
    cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=272119&r1=272118&r2=272119&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Jun  8 01:08:07 2016
@@ -852,76 +852,79 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U,
                                           (__mmask16)(U), (int)(R)); })
 
 #define _mm_reduce_ss(A, B, C) __extension__ ({              \
-  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
-                                          (__v4sf)_mm_setzero_ps(), \
-                                          (__mmask8)-1, \
-                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+                                       (int)(C), _MM_FROUND_CUR_DIRECTION); })
 
 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({   \
-  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \
-                                          (__v4sf)(__m128 )(W), \
-                                          (__mmask8)(U), \
-                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                       (int)(C), _MM_FROUND_CUR_DIRECTION); })
 
 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({       \
-  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
-                                          (__v4sf)_mm_setzero_ps(), \
-                                          (__mmask8)(U), \
-                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
-               
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), \
+                                       (__mmask8)(U), (int)(C), \
+                                       _MM_FROUND_CUR_DIRECTION); })
+
 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({              \
-  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
-                                          (__v4sf)_mm_setzero_ps(), \
-                                          (__mmask8)-1, \
-                                          (int)(C),(int)(R)); })
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+                                       (int)(C), (int)(R)); })
 
 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({   \
-  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \
-                                          (__v4sf)(__m128 )(W), \
-                                          (__mmask8)(U), \
-                                          (int)(C),(int)(R)); })
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                       (int)(C), (int)(R)); })
 
 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({       \
-  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
-                                          (__v4sf)_mm_setzero_ps(), \
-                                          (__mmask8)(U), \
-                                          (int)(C),(int)(R)); })
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), \
+                                       (__mmask8)(U), (int)(C), (int)(R)); })
 
 #define _mm_reduce_sd(A, B, C) __extension__ ({              \
-  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
-                                          (__v2df)_mm_setzero_pd(), \
-                                          (__mmask8)-1, \
-                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)-1, (int)(C), \
+                                        _MM_FROUND_CUR_DIRECTION); })
 
 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({   \
-  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \
-                                          (__v2df)(__m128 )(W), \
-                                          (__mmask8)(U), \
-                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
 
 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({       \
-  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
-                                          (__v2df)_mm_setzero_pd(), \
-                                          (__mmask8)(U), \
-                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
-               
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)(U), (int)(C), \
+                                        _MM_FROUND_CUR_DIRECTION); })
+
 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({              \
-  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
-                                          (__v2df)_mm_setzero_pd(), \
-                                          (__mmask8)-1, \
-                                          (int)(C),(int)(R)); })
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)-1, (int)(C), (int)(R)); })
 
 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({   \
-  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \
-                                          (__v2df)(__m128 )(W), \
-                                          (__mmask8)(U), \
-                                          (int)(C),(int)(R)); })
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                        (int)(C), (int)(R)); })
 
 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({       \
-  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
-                                          (__v2df)_mm_setzero_pd(), \
-                                          (__mmask8)(U), \
-                                          (int)(C),(int)(R)); })
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)(U), (int)(C), (int)(R)); })
                      
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_movepi32_mask (__m512i __A)
@@ -1159,29 +1162,23 @@ _mm512_maskz_broadcast_i64x2 (__mmask8 _
                                                 (__v2di)_mm_setzero_di(), \
                                                 (__mmask8)(U)); })
 
-#define _mm512_insertf32x8( __A, __B, __imm) __extension__ ({ \
-__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\
-                (__v8sf)( __B),\
-                ( __imm),\
-                (__v16sf) _mm512_setzero_ps (),\
-                (__mmask16) -1);\
-})
-
-#define _mm512_mask_insertf32x8( __W, __U, __A, __B, __imm) __extension__ ({ \
-__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\
-                (__v8sf)( __B),\
-                ( __imm),\
-                (__v16sf)( __W),\
-                (__mmask16)( __U));\
-})
-
-#define _mm512_maskz_insertf32x8( __U, __A, __B, __imm) __extension__ ({ \
-__builtin_ia32_insertf32x8_mask ((__v16sf)( __A),\
-                (__v8sf)( __B),\
-                ( __imm),\
-                (__v16sf) _mm512_setzero_ps (),\
-                (__mmask16)( __U));\
-})
+#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+                                          (__v8sf)(__m256)(B), (int)(imm), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)-1); })
+
+#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+                                          (__v8sf)(__m256)(B), (int)(imm), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+                                          (__v8sf)(__m256)(B), (int)(imm), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U)); })
 
 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
@@ -1245,8 +1242,7 @@ __builtin_ia32_insertf32x8_mask ((__v16s
 
 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
-                                              (int)(imm), \
-                                              (__mmask16)(U)); })
+                                              (int)(imm), (__mmask16)(U)); })
 
 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=272119&r1=272118&r2=272119&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Jun  8 01:08:07 2016
@@ -867,28 +867,23 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, _
              (__mmask16) __U);
 }
 
-#define _mm512_mask_max_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxpd512_mask ((__v8df)( __A),\
-            (__v8df)( __B),\
-            (__v8df)( __W),\
-            (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_maskz_max_round_pd( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxpd512_mask ((__v8df)( __A),\
-            (__v8df)( __B),\
-            (__v8df)\
-            _mm512_setzero_pd (),\
-            (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_max_round_pd( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxpd512_mask ((__v8df)( __A),\
-            (__v8df)( __B),\
-            (__v8df)\
-            _mm512_undefined_pd (),\
-            (__mmask8) -1,( __R));\
-})
+#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_undefined_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
 
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_max_pd(__m512d __A, __m512d __B)
@@ -922,28 +917,23 @@ _mm512_maskz_max_pd (__mmask8 __U, __m51
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_mask_max_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxps512_mask ((__v16sf)( __A),\
-           (__v16sf)( __B),\
-           (__v16sf)( __W),\
-           (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_max_round_ps( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxps512_mask ((__v16sf)( __A),\
-           (__v16sf)( __B),\
-           (__v16sf)\
-           _mm512_setzero_ps (),\
-           (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_max_round_ps( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_maxps512_mask ((__v16sf)( __A),\
-           (__v16sf)( __B),\
-           (__v16sf)\
-           _mm512_undefined_ps (),\
-           (__mmask16) -1,( __R));\
-})
+#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); })
+
+#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); })
+
+#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_undefined_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
 
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_max_ps(__m512 __A, __m512 __B)
@@ -1162,28 +1152,23 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __
                    __M);
 }
 
-#define _mm512_mask_min_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minpd512_mask ((__v8df)( __A),\
-            (__v8df)( __B),\
-            (__v8df)( __W),\
-            (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_maskz_min_round_pd( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minpd512_mask ((__v8df)( __A),\
-            (__v8df)( __B),\
-            (__v8df)\
-            _mm512_setzero_pd (),\
-            (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_min_round_pd( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minpd512_mask ((__v8df)( __A),\
-            (__v8df)( __B),\
-            (__v8df)\
-            _mm512_undefined_pd (),\
-            (__mmask8) -1,( __R));\
-})
+#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_undefined_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
 
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_min_pd(__m512d __A, __m512d __B)
@@ -1206,28 +1191,23 @@ _mm512_mask_min_pd (__m512d __W, __mmask
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_mask_min_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minps512_mask ((__v16sf)( __A),\
-           (__v16sf)( __B),\
-           (__v16sf)( __W),\
-           (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_min_round_ps( __U, __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minps512_mask ((__v16sf)( __A),\
-           (__v16sf)( __B),\
-           (__v16sf)\
-           _mm512_setzero_ps (),\
-           (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_min_round_ps( __A, __B, __R) __extension__ ({ \
-__builtin_ia32_minps512_mask ((__v16sf)( __A),\
-           (__v16sf)( __B),\
-           (__v16sf)\
-           _mm512_undefined_ps (),\
-           (__mmask16) -1,( __R));\
-})
+#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); })
+
+#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); })
+
+#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_undefined_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
@@ -1537,25 +1517,20 @@ _mm512_mask_mullo_epi32 (__m512i __W, __
               (__v16si) __W, __M);
 }
 
-#define _mm512_mask_sqrt_round_pd( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\
-             (__v8df)( __W),\
-             (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_maskz_sqrt_round_pd( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\
-             (__v8df)\
-             _mm512_setzero_pd (),\
-             (__mmask8)( __U),( __R));\
-})
-
-#define _mm512_sqrt_round_pd( __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\
-             (__v8df)\
-             _mm512_undefined_pd (),\
-             (__mmask8) -1,( __R));\
-})
+#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                         (int)(R)); })
+
+#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)_mm512_setzero_pd(), \
+                                         (__mmask8)(U), (int)(R)); })
+
+#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)_mm512_undefined_pd(), \
+                                         (__mmask8)-1, (int)(R)); })
 
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_sqrt_pd(__m512d __a)
@@ -1585,25 +1560,20 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m5
                    _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_mask_sqrt_round_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\
-            (__v16sf)( __W),\
-            (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_sqrt_round_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\
-            (__v16sf)\
-            _mm512_setzero_ps (),\
-            (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_sqrt_round_ps( __A, __R) __extension__ ({ \
-__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\
-            (__v16sf)\
-            _mm512_undefined_ps (),\
-            (__mmask16) -1,( __R));\
-})
+#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)_mm512_setzero_ps(), \
+                                        (__mmask16)(U), (int)(R)); })
+
+#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)_mm512_undefined_ps(), \
+                                        (__mmask16)-1, (int)(R)); })
 
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_sqrt_ps(__m512 __a)
@@ -2566,23 +2536,20 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51
                                          (__mmask16)(A), \
                                          _MM_FROUND_CUR_DIRECTION); })
 
-#define _mm512_mask_roundscale_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \
-  (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __C), (int)__imm,\
-                                          (__v16sf)( __A),\
-                                          (__mmask16)( __B),(int) __R);\
-})
-
-#define _mm512_maskz_roundscale_round_ps( __A, __B, __imm,__R) __extension__ ({ \
-  (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __B), (int)__imm,\
-                                          (__v16sf)_mm512_setzero_ps (),\
-                                          (__mmask16)( __A),(int) __R);\
-})
-
-#define _mm512_roundscale_round_ps( __A, __imm, __R) __extension__ ({ \
-  (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __A),(int) __imm,\
-                                          (__v16sf) _mm512_undefined_ps (),\
-                                          (__mmask16) -1,(int) __R);\
-})
+#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
+                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
+                                         (int)(R)); })
+
+#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(A), (int)(R)); })
+
+#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                         (__v16sf)_mm512_undefined_ps(), \
+                                         (__mmask16)-1, (int)(R)); })
 
 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
@@ -2600,23 +2567,20 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51
                                           (__mmask8)(A), \
                                           _MM_FROUND_CUR_DIRECTION); })
 
-#define _mm512_mask_roundscale_round_pd( __A, __B, __C, __imm ,__R) __extension__ ({ \
-  (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __C),(int)__imm,\
-                                           (__v8df)( __A),\
-                                           (__mmask8)( __B),(int)__R);\
-})
-
-#define _mm512_maskz_roundscale_round_pd( __A, __B, __imm, __R) __extension__ ({ \
-  (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __B),(int)__imm,\
-                                            (__v8df)_mm512_setzero_pd (),\
-                                            (__mmask8)( __A),(int) __R);\
-})
-
-#define _mm512_roundscale_round_pd( __A, __imm , __R) __extension__ ({ \
-  (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __A),(int) __imm,\
-                                            (__v8df)_mm512_undefined_pd (),\
-                                            (__mmask8) -1,(int) __R);\
-})
+#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
+                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
+                                          (int)(R)); })
+
+#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(A), (int)(R)); })
+
+#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
+                                          (__v8df)_mm512_undefined_pd(), \
+                                          (__mmask8)-1, (int)(R)); })
 
 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
@@ -3639,25 +3603,20 @@ _mm512_mask_blend_epi32(__mmask16 __U, _
 
 /* Conversion */
 
-#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \
-__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
-                 (__v16si)\
-                 _mm512_undefined_epi32 (),\
-                 (__mmask16) -1,( __R));\
-})
-
-#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
-                 (__v16si)( __W),\
-                 (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
-                 (__v16si)\
-                 _mm512_setzero_si512 (),\
-                 (__mmask16)( __U),( __R));\
-})
+#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+                                             (__v16si)_mm512_undefined_epi32(), \
+                                             (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+                                             (__v16si)(__m512i)(W), \
+                                             (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+                                             (__v16si)_mm512_setzero_si512(), \
+                                             (__mmask16)(U), (int)(R)); })
 
 
 static __inline __m512i __DEFAULT_FN_ATTRS
@@ -3693,36 +3652,30 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U
                                           (__v16sf)_mm512_setzero_ps(), \
                                           (__mmask16)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundepi32_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\
-              (__v16sf)( __W),\
-              (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundepi32_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\
-              (__v16sf)\
-              _mm512_setzero_ps (),\
-              (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U), (int)(R)); })
 
 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
                                            (__v16sf)_mm512_setzero_ps(), \
                                            (__mmask16)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundepu32_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\
-               (__v16sf)( __W),\
-               (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundepu32_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\
-               (__v16sf)\
-               _mm512_setzero_ps (),\
-               (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+                                           (__v16sf)(__m512)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)(U), (int)(R)); })
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_cvtepu32_ps (__m512i __A)
@@ -3835,8 +3788,8 @@ _mm512_maskz_cvtepu32_pd (__mmask8 __U,
 
 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
-                                          (__v8sf)(W), \
-                                          (__mmask8)(U), (int)(R)); })
+                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
@@ -3870,27 +3823,20 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
-                                             (__I),\
-                                             (__v16hi)_mm256_undefined_si256 (),\
-                                             (__mmask16) -1);\
-})
-
-#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
-                                             (__I),\
-                                             (__v16hi)( __U),\
-                                             (__mmask16)( __W));\
-})
-
-#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
-                                             (__I),\
-                                             (__v16hi)\
-                                             _mm256_setzero_si256 (),\
-                                             (__mmask16)( __W));\
-})
+#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)_mm256_undefined_si256(), \
+                                            (__mmask16)-1); })
+
+#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)(__m256i)(U), \
+                                            (__mmask16)(W)); })
+
+#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)_mm256_setzero_si256(), \
+                                            (__mmask16)(W)); })
 
 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
@@ -3907,27 +3853,22 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m
                                             (__v16hi)_mm256_setzero_si256(), \
                                             (__mmask16)(W)); })
 
-#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \
-__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
-               (__v16sf)\
-               _mm512_undefined_ps (),\
-               (__mmask16) -1,( __R));\
-})
-
-#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \
-__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
-               (__v16sf)( __W),\
-               (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \
-__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
-               (__v16sf)\
-               _mm512_setzero_ps (),\
-               (__mmask16)( __U),( __R));\
-})
-                     
-                     
+#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+                                           (__v16sf)_mm512_undefined_ps(), \
+                                           (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+                                           (__v16sf)(__m512)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)(U), (int)(R)); })
+
+
 static  __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_cvtph_ps(__m256i __A)
 {
@@ -3963,10 +3904,10 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __
 
 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
-                                            (__v8si)(W), \
+                                            (__v8si)(__m256i)(W), \
                                             (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvtt_roundpd_epi32( U, A, R) __extension__ ({ \
+#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
                                             (__v8si)_mm256_setzero_si256(), \
                                             (__mmask8)(U), (int)(R)); })
@@ -4003,12 +3944,12 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U,
                                             (__v16si)_mm512_setzero_si512(), \
                                             (__mmask16)-1, (int)(R)); })
 
-#define _mm512_mask_cvtt_roundps_epi32( W, U, A, R) __extension__ ({ \
+#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
-                                            (__v16si)(W), \
+                                            (__v16si)(__m512i)(W), \
                                             (__mmask16)(U), (int)(R)); })
 
-#define _mm512_maskz_cvtt_roundps_epi32( U, A, R) __extension__ ({ \
+#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
                                             (__v16si)_mm512_setzero_si512(), \
                                             (__mmask16)(U), (int)(R)); })
@@ -4045,18 +3986,15 @@ _mm512_maskz_cvttps_epi32 (__mmask16 __U
                                            (__v16si)_mm512_setzero_si512(), \
                                            (__mmask16)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundps_epi32( __W, __U, __A, __R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvtps2dq512_mask ((__v16sf)( __A),\
-               (__v16si)( __W),\
-               (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundps_epi32( __U, __A, __R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvtps2dq512_mask ((__v16sf)( __A),\
-               (__v16si)\
-               _mm512_setzero_si512 (),\
-               (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+                                           (__v16si)(__m512i)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtps_epi32 (__m512 __A)
@@ -4091,9 +4029,9 @@ _mm512_maskz_cvtps_epi32 (__mmask16 __U,
                                            (__v8si)_mm256_setzero_si256(), \
                                            (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundpd_epi32( W, U, A, R) __extension__ ({ \
+#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
-                                           (__v8si)(W), \
+                                           (__v8si)(__m256i)(W), \
                                            (__mmask8)(U), (int)(R)); })
 
 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
@@ -4135,18 +4073,15 @@ _mm512_maskz_cvtpd_epi32 (__mmask8 __U,
                                             (__v16si)_mm512_setzero_si512(), \
                                             (__mmask16)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvtps2udq512_mask ((__v16sf)( __A),\
-                (__v16si)( __W),\
-                (__mmask16)( __U),( __R));\
-})
-
-#define _mm512_maskz_cvt_roundps_epu32( __U, __A, __R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvtps2udq512_mask ((__v16sf)( __A),\
-                (__v16si)\
-                _mm512_setzero_si512 (),\
-                (__mmask16)( __U),( __R));\
-})
+#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+                                            (__v16si)(__m512i)(W), \
+                                            (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+                                            (__v16si)_mm512_setzero_si512(), \
+                                            (__mmask16)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtps_epu32 ( __m512 __A)
@@ -4182,10 +4117,10 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U
                                             (__v8si)_mm256_setzero_si256(), \
                                             (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundpd_epu32( W, U, A, R) __extension__ ({ \
+#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
                                             (__v8si)(W), \
-                                            (__mmask8) (U), (int)(R)); })
+                                            (__mmask8)(U), (int)(R)); })
 
 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \




More information about the cfe-commits mailing list