r243009 - [X86][AVX512F] Add FP scalar intrinsics
Asaf Badouh
asaf.badouh at intel.com
Thu Jul 23 05:13:33 PDT 2015
Author: abadouh
Date: Thu Jul 23 07:13:32 2015
New Revision: 243009
URL: http://llvm.org/viewvc/llvm-project?rev=243009&view=rev
Log:
[X86][AVX512F] Add FP scalar intrinsics
intrinsics for: add/sub/mul/div/min/max in their FP scalar versions
Differential Revision: http://reviews.llvm.org/D11418
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=243009&r1=243008&r2=243009&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jul 23 07:13:32 2015
@@ -1230,5 +1230,17 @@ BUILTIN(__builtin_ia32_subpd512_mask, "V
BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "")
BUILTIN(__builtin_ia32_pmaddubsw512_mask, "V32sV64cV64cV32sUi", "")
BUILTIN(__builtin_ia32_pmaddwd512_mask, "V16iV32sV32sV16iUs", "")
+BUILTIN(__builtin_ia32_addss_mask, "V4fV4fV4fV4fUcIi", "")
+BUILTIN(__builtin_ia32_divss_mask, "V4fV4fV4fV4fUcIi", "")
+BUILTIN(__builtin_ia32_mulss_mask, "V4fV4fV4fV4fUcIi", "")
+BUILTIN(__builtin_ia32_subss_mask, "V4fV4fV4fV4fUcIi", "")
+BUILTIN(__builtin_ia32_maxss_mask, "V4fV4fV4fV4fUcIi", "")
+BUILTIN(__builtin_ia32_minss_mask, "V4fV4fV4fV4fUcIi", "")
+BUILTIN(__builtin_ia32_addsd_mask, "V2dV2dV2dV2dUcIi", "")
+BUILTIN(__builtin_ia32_divsd_mask, "V2dV2dV2dV2dUcIi", "")
+BUILTIN(__builtin_ia32_mulsd_mask, "V2dV2dV2dV2dUcIi", "")
+BUILTIN(__builtin_ia32_subsd_mask, "V2dV2dV2dV2dUcIi", "")
+BUILTIN(__builtin_ia32_maxsd_mask, "V2dV2dV2dV2dUcIi", "")
+BUILTIN(__builtin_ia32_minsd_mask, "V2dV2dV2dV2dUcIi", "")
#undef BUILTIN
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=243009&r1=243008&r2=243009&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Thu Jul 23 07:13:32 2015
@@ -543,6 +543,66 @@ _mm512_max_ps(__m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_maxss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_maxsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline __m512i
__DEFAULT_FN_ATTRS
_mm512_max_epi32(__m512i __A, __m512i __B)
@@ -606,6 +666,66 @@ _mm512_min_ps(__m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_minss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_minss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_minsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline __m512i
__DEFAULT_FN_ATTRS
_mm512_min_epi32(__m512i __A, __m512i __B)
@@ -873,6 +993,65 @@ _mm512_abs_epi32(__m512i __A)
(__mmask16) -1);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_addss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_addss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_addsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
@@ -933,6 +1112,65 @@ _mm512_maskz_add_ps(__mmask16 __U, __m51
(__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
(__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_subss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_subss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_subsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
@@ -995,6 +1233,65 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m51
(__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
(__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_mulss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_mulsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
@@ -1057,6 +1354,66 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m51
(__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
(__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+ return (__m128) __builtin_ia32_divss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
+ (__m128) __builtin_ia32_divss_mask ((__v4sf) __A, (__v4sf) __B, \
+ (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+ return (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
+ (__m128d) __builtin_ia32_divsd_mask ((__v2df) __A, (__v2df) __B, \
+ (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); })
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=243009&r1=243008&r2=243009&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu Jul 23 07:13:32 2015
@@ -1425,6 +1425,56 @@ __m512 test_mm512_maskz_add_ps(__mmask16
// CHECK: @llvm.x86.avx512.mask.add.ps.512
return _mm512_maskz_add_ps(__U,__A,__B);
}
+__m128 test_mm_add_round_ss(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_add_round_ss
+ // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ return _mm_add_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_add_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_add_round_ss
+ // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ return _mm_mask_add_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_maskz_add_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_add_round_ss
+ // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ return _mm_maskz_add_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_add_ss
+ // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ return _mm_mask_add_ss(__W,__U,__A,__B);
+}
+__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_add_ss
+ // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ return _mm_maskz_add_ss(__U,__A,__B);
+}
+__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_add_round_sd
+ // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ return _mm_add_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_add_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_add_round_sd
+ // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ return _mm_mask_add_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_maskz_add_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_add_round_sd
+ // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ return _mm_maskz_add_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_add_sd
+ // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ return _mm_mask_add_sd(__W,__U,__A,__B);
+}
+__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_add_sd
+ // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ return _mm_maskz_add_sd(__U,__A,__B);
+}
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_sub_round_pd
// CHECK: @llvm.x86.avx512.mask.sub.pd.512
@@ -1475,6 +1525,56 @@ __m512 test_mm512_maskz_sub_ps(__mmask16
// CHECK: @llvm.x86.avx512.mask.sub.ps.512
return _mm512_maskz_sub_ps(__U,__A,__B);
}
+__m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_sub_round_ss
+ // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ return _mm_sub_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_sub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_sub_round_ss
+ // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ return _mm_mask_sub_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_maskz_sub_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_sub_round_ss
+ // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ return _mm_maskz_sub_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_sub_ss
+ // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ return _mm_mask_sub_ss(__W,__U,__A,__B);
+}
+__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_sub_ss
+ // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ return _mm_maskz_sub_ss(__U,__A,__B);
+}
+__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_sub_round_sd
+ // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ return _mm_sub_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_sub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_sub_round_sd
+ // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ return _mm_mask_sub_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_maskz_sub_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_sub_round_sd
+ // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ return _mm_maskz_sub_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_sub_sd
+ // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ return _mm_mask_sub_sd(__W,__U,__A,__B);
+}
+__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_sub_sd
+ // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ return _mm_maskz_sub_sd(__U,__A,__B);
+}
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mul_round_pd
// CHECK: @llvm.x86.avx512.mask.mul.pd.512
@@ -1525,6 +1625,56 @@ __m512 test_mm512_maskz_mul_ps(__mmask16
// CHECK: @llvm.x86.avx512.mask.mul.ps.512
return _mm512_maskz_mul_ps(__U,__A,__B);
}
+__m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mul_round_ss
+ // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ return _mm_mul_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_mul_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_mul_round_ss
+ // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ return _mm_mask_mul_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_maskz_mul_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_mul_round_ss
+ // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ return _mm_maskz_mul_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_mul_ss
+ // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ return _mm_mask_mul_ss(__W,__U,__A,__B);
+}
+__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_mul_ss
+ // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ return _mm_maskz_mul_ss(__U,__A,__B);
+}
+__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mul_round_sd
+ // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ return _mm_mul_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_mul_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_mul_round_sd
+ // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ return _mm_mask_mul_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_maskz_mul_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_mul_round_sd
+ // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ return _mm_maskz_mul_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_mul_sd
+ // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ return _mm_mask_mul_sd(__W,__U,__A,__B);
+}
+__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_mul_sd
+ // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ return _mm_maskz_mul_sd(__U,__A,__B);
+}
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_div_round_pd
// CHECK: @llvm.x86.avx512.mask.div.pd.512
@@ -1575,3 +1725,153 @@ __m512 test_mm512_maskz_div_ps(__mmask16
// CHECK: @llvm.x86.avx512.mask.div.ps.512
return _mm512_maskz_div_ps(__U,__A,__B);
}
+__m128 test_mm_div_round_ss(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_div_round_ss
+ // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ return _mm_div_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_div_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_div_round_ss
+ // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ return _mm_mask_div_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_maskz_div_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_div_round_ss
+ // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ return _mm_maskz_div_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_div_ss
+ // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ return _mm_mask_div_ss(__W,__U,__A,__B);
+}
+__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_div_ss
+ // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ return _mm_maskz_div_ss(__U,__A,__B);
+}
+__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_div_round_sd
+ // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ return _mm_div_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_div_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_div_round_sd
+ // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ return _mm_mask_div_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_maskz_div_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_div_round_sd
+ // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ return _mm_maskz_div_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT);
+}
+__m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_div_sd
+ // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ return _mm_mask_div_sd(__W,__U,__A,__B);
+}
+__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_div_sd
+ // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ return _mm_maskz_div_sd(__U,__A,__B);
+}
+__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_max_round_ss
+ // CHECK: @llvm.x86.avx512.mask.max.ss.round
+ return _mm_max_round_ss(__A,__B,0x08);
+}
+__m128 test_mm_mask_max_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_max_round_ss
+ // CHECK: @llvm.x86.avx512.mask.max.ss.round
+ return _mm_mask_max_round_ss(__W,__U,__A,__B,0x08);
+}
+__m128 test_mm_maskz_max_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_max_round_ss
+ // CHECK: @llvm.x86.avx512.mask.max.ss.round
+ return _mm_maskz_max_round_ss(__U,__A,__B,0x08);
+}
+__m128 test_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_max_ss
+ // CHECK: @llvm.x86.avx512.mask.max.ss.round
+ return _mm_mask_max_ss(__W,__U,__A,__B);
+}
+__m128 test_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_max_ss
+ // CHECK: @llvm.x86.avx512.mask.max.ss.round
+ return _mm_maskz_max_ss(__U,__A,__B);
+}
+__m128d test_mm_max_round_sd(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_max_round_sd
+ // CHECK: @llvm.x86.avx512.mask.max.sd.round
+ return _mm_max_round_sd(__A,__B,0x08);
+}
+__m128d test_mm_mask_max_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_max_round_sd
+ // CHECK: @llvm.x86.avx512.mask.max.sd.round
+ return _mm_mask_max_round_sd(__W,__U,__A,__B,0x08);
+}
+__m128d test_mm_maskz_max_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_max_round_sd
+ // CHECK: @llvm.x86.avx512.mask.max.sd.round
+ return _mm_maskz_max_round_sd(__U,__A,__B,0x08);
+}
+__m128d test_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_max_sd
+ // CHECK: @llvm.x86.avx512.mask.max.sd.round
+ return _mm_mask_max_sd(__W,__U,__A,__B);
+}
+__m128d test_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_max_sd
+ // CHECK: @llvm.x86.avx512.mask.max.sd.round
+ return _mm_maskz_max_sd(__U,__A,__B);
+}
+__m128 test_mm_min_round_ss(__m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_min_round_ss
+ // CHECK: @llvm.x86.avx512.mask.min.ss.round
+ return _mm_min_round_ss(__A,__B,0x08);
+}
+__m128 test_mm_mask_min_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_min_round_ss
+ // CHECK: @llvm.x86.avx512.mask.min.ss.round
+ return _mm_mask_min_round_ss(__W,__U,__A,__B,0x08);
+}
+__m128 test_mm_maskz_min_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_min_round_ss
+ // CHECK: @llvm.x86.avx512.mask.min.ss.round
+ return _mm_maskz_min_round_ss(__U,__A,__B,0x08);
+}
+__m128 test_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_mask_min_ss
+ // CHECK: @llvm.x86.avx512.mask.min.ss.round
+ return _mm_mask_min_ss(__W,__U,__A,__B);
+}
+__m128 test_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+ // CHECK-LABEL: @test_mm_maskz_min_ss
+ // CHECK: @llvm.x86.avx512.mask.min.ss.round
+ return _mm_maskz_min_ss(__U,__A,__B);
+}
+__m128d test_mm_min_round_sd(__m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_min_round_sd
+ // CHECK: @llvm.x86.avx512.mask.min.sd.round
+ return _mm_min_round_sd(__A,__B,0x08);
+}
+__m128d test_mm_mask_min_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_min_round_sd
+ // CHECK: @llvm.x86.avx512.mask.min.sd.round
+ return _mm_mask_min_round_sd(__W,__U,__A,__B,0x08);
+}
+__m128d test_mm_maskz_min_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_min_round_sd
+ // CHECK: @llvm.x86.avx512.mask.min.sd.round
+ return _mm_maskz_min_round_sd(__U,__A,__B,0x08);
+}
+__m128d test_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_mask_min_sd
+ // CHECK: @llvm.x86.avx512.mask.min.sd.round
+ return _mm_mask_min_sd(__W,__U,__A,__B);
+}
+__m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+ // CHECK-LABEL: @test_mm_maskz_min_sd
+ // CHECK: @llvm.x86.avx512.mask.min.sd.round
+ return _mm_maskz_min_sd(__U,__A,__B);
+}
More information about the cfe-commits
mailing list