r269056 - [Clang][AVX512] completing missing intrinsics [load/store]

Michael Zuckerman via cfe-commits cfe-commits at lists.llvm.org
Tue May 10 06:13:54 PDT 2016


Author: mzuckerm
Date: Tue May 10 08:13:54 2016
New Revision: 269056

URL: http://llvm.org/viewvc/llvm-project?rev=269056&view=rev
Log:
[Clang][AVX512] completing missing intrinsics [load/store]

Differential Revision: http://reviews.llvm.org/D20063


Modified:
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=269056&r1=269055&r2=269056&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 10 08:13:54 2016
@@ -3477,6 +3477,24 @@ _mm512_test_epi64_mask(__m512i __A, __m5
 /* SIMD load ops */
 
 static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_loadu_si512 (void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+                  (__v16si)
+                  _mm512_setzero_si512 (),
+                  (__mmask16) -1);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+                  (__v16si) __W,
+                  (__mmask16) __U);
+}
+
+
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
@@ -3486,6 +3504,14 @@ _mm512_maskz_loadu_epi32(__mmask16 __U,
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+                  (__v8di) __W,
+                  (__mmask8) __U);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
@@ -3495,6 +3521,14 @@ _mm512_maskz_loadu_epi64(__mmask8 __U, v
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
@@ -3504,27 +3538,17 @@ _mm512_maskz_loadu_ps(__mmask16 __U, voi
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
+_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
-                                                   (__v8df)
-                                                   _mm512_setzero_pd (),
-                                                   (__mmask8) __U);
-}
-
-static __inline __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
-                                                  (__v16sf)
-                                                  _mm512_setzero_ps (),
-                                                  (__mmask16) __U);
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+                (__v8df) __W,
+                (__mmask8) __U);
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
                                                    (__v8df)
                                                    _mm512_setzero_pd (),
                                                    (__mmask8) __U);
@@ -3557,6 +3581,23 @@ _mm512_load_ps(float const *__p)
                                                   (__mmask16) -1);
 }
 
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+                                                  (__v16sf)
+                                                  _mm512_setzero_ps (),
+                                                  (__mmask16) __U);
+}
+
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_load_pd(double const *__p)
 {
@@ -3566,6 +3607,41 @@ _mm512_load_pd(double const *__p)
                                                    (__mmask8) -1);
 }
 
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+                          (__v8df) __W,
+                          (__mmask8) __U);
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+                                                   (__v8df)
+                                                   _mm512_setzero_pd (),
+                                                   (__mmask8) __U);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_si512 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_epi32 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_epi64 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
 /* SIMD store ops */
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -3576,6 +3652,13 @@ _mm512_mask_storeu_epi64(void *__P, __mm
 }
 
 static __inline void __DEFAULT_FN_ATTRS
+_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+            (__mmask16) -1);
+}
+
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
 {
   __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
@@ -3632,6 +3715,24 @@ _mm512_store_ps(void *__P, __m512 __A)
   *(__m512*)__P = __A;
 }
 
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_si512 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_epi32 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_epi64 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
 /* Mask ops */
 
 static __inline __mmask16 __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=269056&r1=269055&r2=269056&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Tue May 10 08:13:54 2016
@@ -61,6 +61,13 @@ __m512d test_mm512_mul_pd(__m512d a, __m
   return _mm512_mul_pd(a, b);
 }
 
+void test_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+  // CHECK-LABEL: @test_mm512_storeu_si512 
+  // CHECK: @llvm.x86.avx512.mask.storeu.d.512
+  _mm512_storeu_si512 ( __P,__A);
+}
+
 void test_mm512_storeu_ps(void *p, __m512 a)
 {
   // CHECK-LABEL: @test_mm512_storeu_ps
@@ -82,6 +89,36 @@ void test_mm512_mask_store_ps(void *p, _
   _mm512_mask_store_ps(p, m, a);
 }
 
+void test_mm512_store_si512 (void *__P, __m512i __A)
+{
+  // CHECK-LABEL: @test_mm512_store_si512 
+  // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64
+  // CHECK: load i8*, i8** %__P.addr.i, align 8
+  // CHECK: bitcast i8* %3 to <8 x i64>*
+  // CHECK: store <8 x i64>  
+  _mm512_store_si512 ( __P,__A);
+}
+
+void test_mm512_store_epi32 (void *__P, __m512i __A)
+{
+  // CHECK-LABEL: @test_mm512_store_epi32 
+  // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64
+  // CHECK: load i8*, i8** %__P.addr.i, align 8
+  // CHECK: bitcast i8* %3 to <8 x i64>*
+  // CHECK: store <8 x i64>  
+  _mm512_store_epi32 ( __P,__A);
+}
+
+void test_mm512_store_epi64 (void *__P, __m512i __A)
+{
+  // CHECK-LABEL: @test_mm512_store_epi64 
+  // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64
+  // CHECK: load i8*, i8** %__P.addr.i, align 8
+  // CHECK: bitcast i8* %3 to <8 x i64>*
+  // CHECK: store <8 x i64>  
+  _mm512_store_epi64 ( __P,__A);
+}
+
 void test_mm512_store_ps(void *p, __m512 a)
 {
   // CHECK-LABEL: @test_mm512_store_ps
@@ -89,6 +126,13 @@ void test_mm512_store_ps(void *p, __m512
   _mm512_store_ps(p, a);
 }
 
+void test_mm512_store_pd(void *p, __m512d a)
+{
+  // CHECK-LABEL: @test_mm512_store_pd
+  // CHECK: store <8 x double>
+  _mm512_store_pd(p, a);
+}
+
 void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m)
 {
   // CHECK-LABEL: @test_mm512_mask_store_pd
@@ -96,11 +140,25 @@ void test_mm512_mask_store_pd(void *p, _
   _mm512_mask_store_pd(p, m, a);
 }
 
-void test_mm512_store_pd(void *p, __m512d a)
+__m512i test_mm512_loadu_si512 (void *__P)
 {
-  // CHECK-LABEL: @test_mm512_store_pd
-  // CHECK: store <8 x double>
-  _mm512_store_pd(p, a);
+  // CHECK-LABEL: @test_mm512_loadu_si512 
+  // CHECK: @llvm.x86.avx512.mask.loadu.d.512
+  return _mm512_loadu_si512 ( __P);
+}
+
+__m512i test_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void *__P)
+{
+  // CHECK-LABEL: @test_mm512_mask_loadu_epi32 
+  // CHECK: @llvm.x86.avx512.mask.loadu.d.512
+  return _mm512_mask_loadu_epi32 (__W,__U, __P);
+}
+
+__m512i test_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void *__P)
+{
+  // CHECK-LABEL: @test_mm512_mask_loadu_epi64 
+  // CHECK: @llvm.x86.avx512.mask.loadu.q.512(
+  return _mm512_mask_loadu_epi64 (__W,__U, __P);
 }
 
 __m512 test_mm512_loadu_ps(void *p)
@@ -110,6 +168,13 @@ __m512 test_mm512_loadu_ps(void *p)
   return _mm512_loadu_ps(p);
 }
 
+__m512 test_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void *__P)
+{
+  // CHECK-LABEL: @test_mm512_mask_loadu_ps 
+  // CHECK: @llvm.x86.avx512.mask.loadu.ps.512
+  return _mm512_mask_loadu_ps (__W,__U, __P);
+}
+
 __m512d test_mm512_loadu_pd(void *p)
 {
   // CHECK-LABEL: @test_mm512_loadu_pd
@@ -117,11 +182,38 @@ __m512d test_mm512_loadu_pd(void *p)
   return _mm512_loadu_pd(p);
 }
 
-__m512 test_mm512_maskz_load_ps(void *p, __mmask16 m)
+__m512d test_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void *__P)
 {
-  // CHECK-LABEL: @test_mm512_maskz_load_ps
-  // CHECK: @llvm.x86.avx512.mask.load.ps.512
-  return _mm512_maskz_load_ps(m, p);
+  // CHECK-LABEL: @test_mm512_mask_loadu_pd 
+  // CHECK: @llvm.x86.avx512.mask.loadu.pd.512
+  return _mm512_mask_loadu_pd (__W,__U, __P);
+}
+
+__m512i test_mm512_load_si512 (void *__P)
+{
+  // CHECK-LABEL: @test_mm512_load_si512 
+  // CHECK: load i8*, i8** %__P.addr.i, align 8
+  // CHECK: bitcast i8* %1 to <8 x i64>*
+  // CHECK: load <8 x i64>, <8 x i64>* %2, align 64
+  return _mm512_load_si512 ( __P);
+}
+
+__m512i test_mm512_load_epi32 (void *__P)
+{
+  // CHECK-LABEL: @test_mm512_load_epi32 
+  // CHECK: load i8*, i8** %__P.addr.i, align 8
+  // CHECK: bitcast i8* %1 to <8 x i64>*
+  // CHECK: load <8 x i64>, <8 x i64>* %2, align 64
+  return _mm512_load_epi32 ( __P);
+}
+
+__m512i test_mm512_load_epi64 (void *__P)
+{
+  // CHECK-LABEL: @test_mm512_load_epi64 
+  // CHECK: load i8*, i8** %__P.addr.i, align 8
+  // CHECK: bitcast i8* %1 to <8 x i64>*
+  // CHECK: load <8 x i64>, <8 x i64>* %2, align 64
+  return _mm512_load_epi64 ( __P);
 }
 
 __m512 test_mm512_load_ps(void *p)
@@ -131,11 +223,18 @@ __m512 test_mm512_load_ps(void *p)
   return _mm512_load_ps(p);
 }
 
-__m512d test_mm512_maskz_load_pd(void *p, __mmask8 m)
+__m512 test_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void *__P)
 {
-  // CHECK-LABEL: @test_mm512_maskz_load_pd
-  // CHECK: @llvm.x86.avx512.mask.load.pd.512
-  return _mm512_maskz_load_pd(m, p);
+  // CHECK-LABEL: @test_mm512_mask_load_ps 
+  // CHECK: @llvm.x86.avx512.mask.load.ps.512
+  return _mm512_mask_load_ps (__W,__U, __P);
+}
+
+__m512 test_mm512_maskz_load_ps(__mmask16 __U, void *__P)
+{
+  // CHECK-LABEL: @test_mm512_maskz_load_ps
+  // CHECK:  @llvm.x86.avx512.mask.load.ps.512
+  return _mm512_maskz_load_ps(__U, __P);
 }
 
 __m512d test_mm512_load_pd(void *p)
@@ -145,6 +244,20 @@ __m512d test_mm512_load_pd(void *p)
   return _mm512_load_pd(p);
 }
 
+__m512d test_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void *__P)
+{
+  // CHECK-LABEL: @test_mm512_mask_load_pd 
+  // CHECK: @llvm.x86.avx512.mask.load.pd.512
+  return _mm512_mask_load_pd (__W,__U, __P);
+}
+
+__m512d test_mm512_maskz_load_pd(__mmask8 __U, void *__P)
+{
+  // CHECK-LABEL: @test_mm512_maskz_load_pd
+  // CHECK: @llvm.x86.avx512.mask.load.pd.512
+  return _mm512_maskz_load_pd(__U, __P);
+}
+
 __m512d test_mm512_set1_pd(double d)
 {
   // CHECK-LABEL: @test_mm512_set1_pd




More information about the cfe-commits mailing list