r267129 - [Clang][AVX512][BUILTIN] Adding scalar intrinsics for rsqrt14 , rcp14, getexp and getmant instruction set
Michael Zuckerman via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 22 03:06:11 PDT 2016
Author: mzuckerm
Date: Fri Apr 22 05:06:10 2016
New Revision: 267129
URL: http://llvm.org/viewvc/llvm-project?rev=267129&view=rev
Log:
[Clang][AVX512][BUILTIN] Adding scalar intrinsics for rsqrt14 ,rcp14, getexp and getmant instruction set
Differential Revision: http://reviews.llvm.org/D19326
Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=267129&r1=267128&r2=267129&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Apr 22 05:06:10 2016
@@ -1021,6 +1021,24 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
(__mmask8) -1);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rsqrt14_sd(__m128d __A, __m128d __B)
{
@@ -1031,6 +1049,24 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B)
(__mmask8) -1);
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_rcp14_pd(__m512d __A)
{
@@ -1058,6 +1094,24 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
(__mmask8) -1);
}
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rcp14_sd(__m128d __A, __m128d __B)
{
@@ -1068,6 +1122,24 @@ _mm_rcp14_sd(__m128d __A, __m128d __B)
(__mmask8) -1);
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_floor_ps(__m512 __A)
{
@@ -4008,6 +4080,42 @@ _mm_getexp_sd (__m128d __A, __m128d __B)
(__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpsd128_round_mask ((__v2df) __A,\
+ (__v2df) __B,\
+ (__v2df) __W,\
+ (__mmask8) __U,\
+ __R);\
+})
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
+ (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_sd( __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,\
+ (__v2df) __B,\
+ (__v2df) _mm_setzero_pd (),\
+ (__mmask8) __U,\
+ __R);\
+})
+
#define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \
__builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\
(__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\
@@ -4021,6 +4129,42 @@ _mm_getexp_ss (__m128 __A, __m128 __B)
(__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_ss (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
+ (__v4sf) __B,\
+ (__v4sf) __W,\
+ (__mmask8) __U,\
+ __R);\
+})
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_ss (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_ss( __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
+ (__v4sf) __B,\
+ (__v4sf) _mm_setzero_ps (),\
+ (__mmask8) __U,\
+ __R);\
+})
+
#define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \
__builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
(__v2df)( __B),\
@@ -4035,6 +4179,42 @@ __builtin_ia32_getmantsd_round_mask ((__
_MM_FROUND_CUR_DIRECTION);\
})
+#define _mm_mask_getmant_sd( __W, __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+ (__v2df) __B,\
+ (( __D) << 2) |( __C),\
+ (__v2df) __W,\
+ (__mmask8) __U,\
+ _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_mask_getmant_round_sd( __W, __U, __A, __B, __C, __D, __R)({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+ (__v2df) __B,\
+ (( __D) << 2) |( __C),\
+ (__v2df) __W,\
+ (__mmask8) __U,\
+ __R);\
+})
+
+#define _mm_maskz_getmant_sd( __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+ (__v2df) __B,\
+ (( __D) << 2) |( __C),\
+ (__v2df) _mm_setzero_pd (),\
+ (__mmask8) __U,\
+ _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_maskz_getmant_round_sd( __U, __A, __B, __C, __D, __R) __extension__ ({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+ (__v2df) __B,\
+ (( __D) << 2) |( __C),\
+ (__v2df) _mm_setzero_pd (),\
+ (__mmask8) __U,\
+ __R);\
+})
+
#define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \
__builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
(__v4sf)( __B),\
@@ -4049,6 +4229,41 @@ __builtin_ia32_getmantss_round_mask ((__
_MM_FROUND_CUR_DIRECTION);\
})
+#define _mm_mask_getmant_ss( __W, __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+ (__v4sf) __B,\
+ (( __D) << 2) |( __C),\
+ (__v4sf) __W,\
+ (__mmask8) __U,\
+ _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_mask_getmant_round_ss( __W, __U, __A, __B, __C, __D, __R)({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+ (__v4sf) __B,\
+ (( __D) << 2) |( __C),\
+ (__v4sf) __W,\
+ (__mmask8) __U,\
+ __R);\
+})
+
+#define _mm_maskz_getmant_ss( __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+ (__v4sf) __B,\
+ (( __D) << 2) |( __C),\
+ (__v4sf) _mm_setzero_pd (),\
+ (__mmask8) __U,\
+ _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_maskz_getmant_round_ss( __U, __A, __B, __C, __D, __R) __extension__ ({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+ (__v4sf) __B,\
+ (( __D) << 2) |( __C),\
+ (__v4sf) _mm_setzero_ps (),\
+ (__mmask8) __U,\
+ __R);\
+})
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kmov (__mmask16 __A)
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=267129&r1=267128&r2=267129&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Fri Apr 22 05:06:10 2016
@@ -2556,8 +2556,6 @@ __m128d test_mm_getmant_sd(__m128d __A,
return _mm_getmant_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src);
}
-
-
__m128 test_mm_getmant_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_getmant_round_ss
// CHECK: @llvm.x86.avx512.mask.getmant.ss
@@ -4372,3 +4370,147 @@ __m512i test_mm512_mask_i32gather_epi64(
return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
+__m128d test_mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_mask_rsqrt14_sd
+ // CHECK: @llvm.x86.avx512.rsqrt14.sd
+ return _mm_mask_rsqrt14_sd(__W, __U, __A, __B);
+}
+
+__m128d test_mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_maskz_rsqrt14_sd
+ // CHECK: @llvm.x86.avx512.rsqrt14.sd
+ return _mm_maskz_rsqrt14_sd(__U, __A, __B);
+}
+
+__m128 test_mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_mask_rsqrt14_ss
+ // CHECK: @llvm.x86.avx512.rsqrt14.ss
+ return _mm_mask_rsqrt14_ss(__W, __U, __A, __B);
+}
+
+__m128 test_mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_maskz_rsqrt14_ss
+ // CHECK: @llvm.x86.avx512.rsqrt14.ss
+ return _mm_maskz_rsqrt14_ss(__U, __A, __B);
+}
+
+__m128d test_mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_mask_rcp14_sd
+ // CHECK: @llvm.x86.avx512.rcp14.sd
+ return _mm_mask_rcp14_sd(__W, __U, __A, __B);
+}
+
+__m128d test_mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_maskz_rcp14_sd
+ // CHECK: @llvm.x86.avx512.rcp14.sd
+ return _mm_maskz_rcp14_sd(__U, __A, __B);
+}
+
+__m128 test_mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_mask_rcp14_ss
+ // CHECK: @llvm.x86.avx512.rcp14.ss
+ return _mm_mask_rcp14_ss(__W, __U, __A, __B);
+}
+
+__m128 test_mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_maskz_rcp14_ss
+ // CHECK: @llvm.x86.avx512.rcp14.ss
+ return _mm_maskz_rcp14_ss(__U, __A, __B);
+}
+
+__m128d test_mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_mask_getexp_sd
+ // CHECK: @llvm.x86.avx512.mask.getexp.sd
+ return _mm_mask_getexp_sd(__W, __U, __A, __B);
+}
+
+__m128d test_mm_mask_getexp_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_mask_getexp_round_sd
+ // CHECK: @llvm.x86.avx512.mask.getexp.sd
+ return _mm_mask_getexp_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128d test_mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_maskz_getexp_sd
+ // CHECK: @llvm.x86.avx512.mask.getexp.sd
+ return _mm_maskz_getexp_sd(__U, __A, __B);
+}
+
+__m128d test_mm_maskz_getexp_round_sd(__mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_maskz_getexp_round_sd
+ // CHECK: @llvm.x86.avx512.mask.getexp.sd
+ return _mm_maskz_getexp_round_sd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_mask_getexp_ss
+ // CHECK: @llvm.x86.avx512.mask.getexp.ss
+ return _mm_mask_getexp_ss(__W, __U, __A, __B);
+}
+
+__m128 test_mm_mask_getexp_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_mask_getexp_round_ss
+ // CHECK: @llvm.x86.avx512.mask.getexp.ss
+ return _mm_mask_getexp_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_maskz_getexp_ss
+ // CHECK: @llvm.x86.avx512.mask.getexp.ss
+ return _mm_maskz_getexp_ss(__U, __A, __B);
+}
+
+__m128 test_mm_maskz_getexp_round_ss(__mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_maskz_getexp_round_ss
+ // CHECK: @llvm.x86.avx512.mask.getexp.ss
+ return _mm_maskz_getexp_round_ss(__U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128d test_mm_mask_getmant_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_mask_getmant_sd
+ // CHECK: @llvm.x86.avx512.mask.getmant.sd
+ return _mm_mask_getmant_sd(__W, __U, __A, __B, 1, 2);
+}
+
+__m128d test_mm_mask_getmant_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_mask_getmant_round_sd
+ // CHECK: @llvm.x86.avx512.mask.getmant.sd
+ return _mm_mask_getmant_round_sd(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128d test_mm_maskz_getmant_sd(__mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_maskz_getmant_sd
+ // CHECK: @llvm.x86.avx512.mask.getmant.sd
+ return _mm_maskz_getmant_sd(__U, __A, __B, 1, 2);
+}
+
+__m128d test_mm_maskz_getmant_round_sd(__mmask8 __U, __m128d __A, __m128d __B){
+ // CHECK-LABEL: @test_mm_maskz_getmant_round_sd
+ // CHECK: @llvm.x86.avx512.mask.getmant.sd
+ return _mm_maskz_getmant_round_sd(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_mask_getmant_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_mask_getmant_ss
+ // CHECK: @llvm.x86.avx512.mask.getmant.ss
+ return _mm_mask_getmant_ss(__W, __U, __A, __B, 1, 2);
+}
+
+__m128 test_mm_mask_getmant_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_mask_getmant_round_ss
+ // CHECK: @llvm.x86.avx512.mask.getmant.ss
+ return _mm_mask_getmant_round_ss(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_maskz_getmant_ss(__mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_maskz_getmant_ss
+ // CHECK: @llvm.x86.avx512.mask.getmant.ss
+ return _mm_maskz_getmant_ss(__U, __A, __B, 1, 2);
+}
+
+__m128 test_mm_maskz_getmant_round_ss(__mmask8 __U, __m128 __A, __m128 __B){
+ // CHECK-LABEL: @test_mm_maskz_getmant_round_ss
+ // CHECK: @llvm.x86.avx512.mask.getmant.ss
+ return _mm_maskz_getmant_round_ss(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
More information about the cfe-commits
mailing list