r272012 - [clang][AVX512][Intrinsics] Adding intrinsics reduce_[round]_{ss|sd} to clang

Michael Zuckerman via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 7 07:00:23 PDT 2016


Author: mzuckerm
Date: Tue Jun  7 09:00:20 2016
New Revision: 272012

URL: http://llvm.org/viewvc/llvm-project?rev=272012&view=rev
Log:
[clang][AVX512][Intrinsics] Adding intrinsics reduce_[round]_{ss|sd} to clang

Differential Revision: http://reviews.llvm.org/D21014


Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512dqintrin.h
    cfe/trunk/test/CodeGen/avx512dq-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=272012&r1=272011&r2=272012&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Jun  7 09:00:20 2016
@@ -1531,6 +1531,8 @@ TARGET_BUILTIN(__builtin_ia32_reducepd12
 TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_pmaddubsw128_mask, "V8sV16cV16cV8sUc", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmaddubsw256_mask, "V16sV32cV32cV16sUs", "", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmaddwd128_mask, "V4iV8sV8sV4iUc", "", "avx512vl,avx512bw")

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=272012&r1=272011&r2=272012&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Tue Jun  7 09:00:20 2016
@@ -851,6 +851,78 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U,
                                           (__v16sf)_mm512_setzero_ps(), \
                                           (__mmask16)(U), (int)(R)); })
 
+#define _mm_reduce_ss(A, B, C) __extension__ ({              \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, \
+                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({   \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \
+                                          (__v4sf)(__m128 )(W), \
+                                          (__mmask8)(U), \
+                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({       \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), \
+                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+               
+#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({              \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, \
+                                          (int)(C),(int)(R)); })
+
+#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({   \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128 )(A), (__v4sf)(__m128 )(B), \
+                                          (__v4sf)(__m128 )(W), \
+                                          (__mmask8)(U), \
+                                          (int)(C),(int)(R)); })
+
+#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({       \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), (__v4sf)(__m128 )(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), \
+                                          (int)(C),(int)(R)); })
+
+#define _mm_reduce_sd(A, B, C) __extension__ ({              \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
+                                          (__v2df)_mm_setzero_pd(), \
+                                          (__mmask8)-1, \
+                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({   \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \
+                                          (__v2df)(__m128 )(W), \
+                                          (__mmask8)(U), \
+                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({       \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
+                                          (__v2df)_mm_setzero_pd(), \
+                                          (__mmask8)(U), \
+                                          (int)(C),_MM_FROUND_CUR_DIRECTION); })
+               
+#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({              \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
+                                          (__v2df)_mm_setzero_pd(), \
+                                          (__mmask8)-1, \
+                                          (int)(C),(int)(R)); })
+
+#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({   \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128 )(A), (__v2df)(__m128)(B), \
+                                          (__v2df)(__m128 )(W), \
+                                          (__mmask8)(U), \
+                                          (int)(C),(int)(R)); })
+
+#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({       \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128)(A), (__v2df)(__m128)(B), \
+                                          (__v2df)_mm_setzero_pd(), \
+                                          (__mmask8)(U), \
+                                          (int)(C),(int)(R)); })
+                     
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_movepi32_mask (__m512i __A)
 {

Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=272012&r1=272011&r2=272012&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Tue Jun  7 09:00:20 2016
@@ -743,6 +743,78 @@ __m512 test_mm512_maskz_reduce_round_ps(
   return _mm512_maskz_reduce_round_ps(__U, __A, 4, 8); 
 }
 
+__m128 test_mm_reduce_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_reduce_ss
+  // CHECK: @llvm.x86.avx512.mask.reduce.ss
+  return _mm_reduce_ss(__A, __B, 4);
+}
+
+__m128 test_mm_mask_reduce_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_reduce_ss
+  // CHECK: @llvm.x86.avx512.mask.reduce.ss
+  return _mm_mask_reduce_ss(__W, __U, __A, __B, 4);
+}
+
+__m128 test_mm_maskz_reduce_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_ss
+  // CHECK: @llvm.x86.avx512.mask.reduce.ss
+  return _mm_maskz_reduce_ss(__U, __A, __B, 4);
+}
+
+__m128 test_mm_reduce_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_reduce_round_ss
+  // CHECK: @llvm.x86.avx512.mask.reduce.ss
+  return _mm_reduce_round_ss(__A, __B, 4, 8);
+}
+
+__m128 test_mm_mask_reduce_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_reduce_round_ss
+  // CHECK: @llvm.x86.avx512.mask.reduce.ss
+  return _mm_mask_reduce_round_ss(__W, __U, __A, __B, 4, 8);
+}
+
+__m128 test_mm_maskz_reduce_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_round_ss
+  // CHECK: @llvm.x86.avx512.mask.reduce.ss
+  return _mm_maskz_reduce_round_ss(__U, __A, __B, 4, 8);
+}
+
+__m128d test_mm_reduce_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_reduce_sd
+  // CHECK: @llvm.x86.avx512.mask.reduce.sd
+  return _mm_reduce_sd(__A, __B, 4);
+}
+
+__m128d test_mm_mask_reduce_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_reduce_sd
+  // CHECK: @llvm.x86.avx512.mask.reduce.sd
+  return _mm_mask_reduce_sd(__W, __U, __A, __B, 4);
+}
+
+__m128d test_mm_maskz_reduce_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_sd
+  // CHECK: @llvm.x86.avx512.mask.reduce.sd
+  return _mm_maskz_reduce_sd(__U, __A, __B, 4);
+}
+
+__m128d test_mm_reduce_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_reduce_round_sd
+  // CHECK: @llvm.x86.avx512.mask.reduce.sd
+  return _mm_reduce_round_sd(__A, __B, 4, 8);
+}
+
+__m128d test_mm_mask_reduce_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_reduce_round_sd
+  // CHECK: @llvm.x86.avx512.mask.reduce.sd
+  return _mm_mask_reduce_round_sd(__W, __U, __A, __B, 4, 8);
+}
+
+__m128d test_mm_maskz_reduce_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_round_sd
+  // CHECK: @llvm.x86.avx512.mask.reduce.sd
+  return _mm_maskz_reduce_round_sd(__U, __A, __B, 4, 8);
+}
+
 __mmask16 test_mm512_movepi32_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi32_mask
   // CHECK: @llvm.x86.avx512.cvtd2mask.512




More information about the cfe-commits mailing list