r267129 - [Clang][AVX512][BUILTIN] Adding scalar intrinsics for rsqrt14 , rcp14, getexp and getmant instruction set

Michael Zuckerman via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 22 03:06:11 PDT 2016


Author: mzuckerm
Date: Fri Apr 22 05:06:10 2016
New Revision: 267129

URL: http://llvm.org/viewvc/llvm-project?rev=267129&view=rev
Log:
[Clang][AVX512][BUILTIN] Adding scalar intrinsics for rsqrt14 ,rcp14, getexp and getmant instruction set

Differential Revision: http://reviews.llvm.org/D19326


Modified:
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=267129&r1=267128&r2=267129&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Apr 22 05:06:10 2016
@@ -1021,6 +1021,24 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
              (__mmask8) -1);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) _mm_setzero_ps (),
+          (__mmask8) __U);
+}
+
 static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
 {
@@ -1031,6 +1049,24 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B)
               (__mmask8) -1);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) _mm_setzero_pd (),
+          (__mmask8) __U);
+}
+
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_rcp14_pd(__m512d __A)
 {
@@ -1058,6 +1094,24 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
                  (__mmask8) -1);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) _mm_setzero_ps (),
+          (__mmask8) __U);
+}
+
 static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rcp14_sd(__m128d __A, __m128d __B)
 {
@@ -1068,6 +1122,24 @@ _mm_rcp14_sd(__m128d __A, __m128d __B)
             (__mmask8) -1);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) _mm_setzero_pd (),
+          (__mmask8) __U);
+}
+
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_floor_ps(__m512 __A)
 {
@@ -4008,6 +4080,42 @@ _mm_getexp_sd (__m128d __A, __m128d __B)
                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_sd( __W, __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpsd128_round_mask ((__v2df) __A,\
+          (__v2df) __B,\
+          (__v2df) __W,\
+          (__mmask8) __U,\
+          __R);\
+})
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) _mm_setzero_pd (),
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_sd( __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,\
+          (__v2df) __B,\
+          (__v2df) _mm_setzero_pd (),\
+          (__mmask8) __U,\
+          __R);\
+})
+
 #define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \
 __builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\
                 (__v4sf)( __B), (__v4sf)  _mm_setzero_ps(), (__mmask8) -1,\
@@ -4021,6 +4129,42 @@ _mm_getexp_ss (__m128 __A, __m128 __B)
                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_ss (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_ss( __W, __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
+          (__v4sf) __B,\
+          (__v4sf) __W,\
+          (__mmask8) __U,\
+          __R);\
+})
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_ss (__mmask8 __U, __m128d __A, __m128d __B) 
+{
+ return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) _mm_setzero_pd (),
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_ss( __U, __A, __B, __R) __extension__ ({\
+__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\
+          (__v4sf) __B,\
+          (__v4sf) _mm_setzero_ps (),\
+          (__mmask8) __U,\
+          __R);\
+})
+
 #define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \
 __builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\
               (__v2df)( __B),\
@@ -4035,6 +4179,42 @@ __builtin_ia32_getmantsd_round_mask ((__
               _MM_FROUND_CUR_DIRECTION);\
 })
 
+#define _mm_mask_getmant_sd( __W, __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+          (__v2df) __B,\
+          (( __D) << 2) |( __C),\
+          (__v2df) __W,\
+          (__mmask8) __U,\
+          _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_mask_getmant_round_sd( __W, __U, __A, __B, __C, __D, __R)({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+          (__v2df) __B,\
+          (( __D) << 2) |( __C),\
+          (__v2df) __W,\
+          (__mmask8) __U,\
+          __R);\
+})
+
+#define _mm_maskz_getmant_sd( __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+          (__v2df) __B,\
+          (( __D) << 2) |( __C),\
+          (__v2df) _mm_setzero_pd (),\
+          (__mmask8) __U,\
+          _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_maskz_getmant_round_sd( __U, __A, __B, __C, __D, __R) __extension__ ({\
+__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\
+          (__v2df) __B,\
+          (( __D) << 2) |( __C),\
+          (__v2df) _mm_setzero_pd (),\
+          (__mmask8) __U,\
+          __R);\
+})
+
 #define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \
 __builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\
               (__v4sf)( __B),\
@@ -4049,6 +4229,41 @@ __builtin_ia32_getmantss_round_mask ((__
               _MM_FROUND_CUR_DIRECTION);\
 })
 
+#define _mm_mask_getmant_ss( __W, __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+          (__v4sf) __B,\
+          (( __D) << 2) |( __C),\
+          (__v4sf) __W,\
+          (__mmask8) __U,\
+          _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_mask_getmant_round_ss( __W, __U, __A, __B, __C, __D, __R)({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+          (__v4sf) __B,\
+          (( __D) << 2) |( __C),\
+          (__v4sf) __W,\
+          (__mmask8) __U,\
+          __R);\
+})
+
+#define _mm_maskz_getmant_ss( __U, __A, __B, __C, __D) __extension__ ({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+          (__v4sf) __B,\
+          (( __D) << 2) |( __C),\
+          (__v4sf) _mm_setzero_pd (),\
+          (__mmask8) __U,\
+          _MM_FROUND_CUR_DIRECTION);\
+})
+
+#define _mm_maskz_getmant_round_ss( __U, __A, __B, __C, __D, __R) __extension__ ({\
+__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\
+          (__v4sf) __B,\
+          (( __D) << 2) |( __C),\
+          (__v4sf) _mm_setzero_ps (),\
+          (__mmask8) __U,\
+          __R);\
+})
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kmov (__mmask16 __A)

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=267129&r1=267128&r2=267129&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Fri Apr 22 05:06:10 2016
@@ -2556,8 +2556,6 @@ __m128d test_mm_getmant_sd(__m128d __A,
   return _mm_getmant_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src); 
 }
 
-
-
 __m128 test_mm_getmant_round_ss(__m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_getmant_round_ss
   // CHECK: @llvm.x86.avx512.mask.getmant.ss
@@ -4372,3 +4370,147 @@ __m512i test_mm512_mask_i32gather_epi64(
   return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
+__m128d test_mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_mask_rsqrt14_sd
+  // CHECK: @llvm.x86.avx512.rsqrt14.sd
+  return _mm_mask_rsqrt14_sd(__W, __U, __A, __B);
+}
+
+__m128d test_mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_maskz_rsqrt14_sd
+  // CHECK: @llvm.x86.avx512.rsqrt14.sd
+  return _mm_maskz_rsqrt14_sd(__U, __A, __B);
+}
+
+__m128 test_mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_mask_rsqrt14_ss
+  // CHECK: @llvm.x86.avx512.rsqrt14.ss
+  return _mm_mask_rsqrt14_ss(__W, __U, __A, __B);
+}
+
+__m128 test_mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_maskz_rsqrt14_ss
+  // CHECK: @llvm.x86.avx512.rsqrt14.ss
+  return _mm_maskz_rsqrt14_ss(__U, __A, __B);
+}
+
+__m128d test_mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_mask_rcp14_sd
+  // CHECK: @llvm.x86.avx512.rcp14.sd
+  return _mm_mask_rcp14_sd(__W, __U, __A, __B);
+}
+
+__m128d test_mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_maskz_rcp14_sd
+  // CHECK: @llvm.x86.avx512.rcp14.sd
+  return _mm_maskz_rcp14_sd(__U, __A, __B);
+}
+
+__m128 test_mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_mask_rcp14_ss
+  // CHECK: @llvm.x86.avx512.rcp14.ss
+  return _mm_mask_rcp14_ss(__W, __U, __A, __B);
+}
+
+__m128 test_mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_maskz_rcp14_ss
+  // CHECK: @llvm.x86.avx512.rcp14.ss
+  return _mm_maskz_rcp14_ss(__U, __A, __B);
+}
+
+__m128d test_mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_mask_getexp_sd
+  // CHECK: @llvm.x86.avx512.mask.getexp.sd
+  return _mm_mask_getexp_sd(__W, __U, __A, __B);
+}
+
+__m128d test_mm_mask_getexp_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_mask_getexp_round_sd
+  // CHECK: @llvm.x86.avx512.mask.getexp.sd
+  return _mm_mask_getexp_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128d test_mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_maskz_getexp_sd
+  // CHECK: @llvm.x86.avx512.mask.getexp.sd
+  return _mm_maskz_getexp_sd(__U, __A, __B);
+}
+
+__m128d test_mm_maskz_getexp_round_sd(__mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_maskz_getexp_round_sd
+  // CHECK: @llvm.x86.avx512.mask.getexp.sd
+  return _mm_maskz_getexp_round_sd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_mask_getexp_ss
+  // CHECK: @llvm.x86.avx512.mask.getexp.ss
+  return _mm_mask_getexp_ss(__W, __U, __A, __B);
+}
+
+__m128 test_mm_mask_getexp_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_mask_getexp_round_ss
+  // CHECK: @llvm.x86.avx512.mask.getexp.ss
+  return _mm_mask_getexp_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_maskz_getexp_ss
+  // CHECK: @llvm.x86.avx512.mask.getexp.ss
+  return _mm_maskz_getexp_ss(__U, __A, __B);
+}
+
+__m128 test_mm_maskz_getexp_round_ss(__mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_maskz_getexp_round_ss
+  // CHECK: @llvm.x86.avx512.mask.getexp.ss
+  return _mm_maskz_getexp_round_ss(__U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128d test_mm_mask_getmant_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_mask_getmant_sd
+  // CHECK: @llvm.x86.avx512.mask.getmant.sd
+  return _mm_mask_getmant_sd(__W, __U, __A, __B, 1, 2);
+}
+
+__m128d test_mm_mask_getmant_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_mask_getmant_round_sd
+  // CHECK: @llvm.x86.avx512.mask.getmant.sd
+  return _mm_mask_getmant_round_sd(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128d test_mm_maskz_getmant_sd(__mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_maskz_getmant_sd
+  // CHECK: @llvm.x86.avx512.mask.getmant.sd
+  return _mm_maskz_getmant_sd(__U, __A, __B, 1, 2);
+}
+
+__m128d test_mm_maskz_getmant_round_sd(__mmask8 __U, __m128d __A, __m128d __B){
+  // CHECK-LABEL: @test_mm_maskz_getmant_round_sd
+  // CHECK: @llvm.x86.avx512.mask.getmant.sd
+  return _mm_maskz_getmant_round_sd(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_mask_getmant_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_mask_getmant_ss
+  // CHECK: @llvm.x86.avx512.mask.getmant.ss
+  return _mm_mask_getmant_ss(__W, __U, __A, __B, 1, 2);
+}
+
+__m128 test_mm_mask_getmant_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_mask_getmant_round_ss
+  // CHECK: @llvm.x86.avx512.mask.getmant.ss
+  return _mm_mask_getmant_round_ss(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m128 test_mm_maskz_getmant_ss(__mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_maskz_getmant_ss
+  // CHECK: @llvm.x86.avx512.mask.getmant.ss
+  return _mm_maskz_getmant_ss(__U, __A, __B, 1, 2);
+}
+
+__m128 test_mm_maskz_getmant_round_ss(__mmask8 __U, __m128 __A, __m128 __B){
+  // CHECK-LABEL: @test_mm_maskz_getmant_round_ss
+  // CHECK: @llvm.x86.avx512.mask.getmant.ss
+  return _mm_maskz_getmant_round_ss(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION);
+}
+




More information about the cfe-commits mailing list