r269056 - [Clang][AVX512] completing missing intrinsics [load/store]
Michael Zuckerman via cfe-commits
cfe-commits at lists.llvm.org
Tue May 10 06:13:54 PDT 2016
Author: mzuckerm
Date: Tue May 10 08:13:54 2016
New Revision: 269056
URL: http://llvm.org/viewvc/llvm-project?rev=269056&view=rev
Log:
[Clang][AVX512] completing missing intrinsics [load/store]
Differential Revision: http://reviews.llvm.org/D20063
Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=269056&r1=269055&r2=269056&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 10 08:13:54 2016
@@ -3477,6 +3477,24 @@ _mm512_test_epi64_mask(__m512i __A, __m5
/* SIMD load ops */
static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_loadu_si512 (void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+
+static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
@@ -3486,6 +3504,14 @@ _mm512_maskz_loadu_epi32(__mmask16 __U,
}
static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
{
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
@@ -3495,6 +3521,14 @@ _mm512_maskz_loadu_epi64(__mmask8 __U, v
}
static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
{
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
@@ -3504,27 +3538,17 @@ _mm512_maskz_loadu_ps(__mmask16 __U, voi
}
static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
+_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
{
- return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
-}
-
-static __inline __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
-{
- return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+ (__v8df) __W,
+ (__mmask8) __U);
}
static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
{
- return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+ return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
@@ -3557,6 +3581,23 @@ _mm512_load_ps(float const *__p)
(__mmask16) -1);
}
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_load_pd(double const *__p)
{
@@ -3566,6 +3607,41 @@ _mm512_load_pd(double const *__p)
(__mmask8) -1);
}
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_si512 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_epi32 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_epi64 (void const *__P)
+{
+ return *(__m512i *) __P;
+}
+
/* SIMD store ops */
static __inline void __DEFAULT_FN_ATTRS
@@ -3576,6 +3652,13 @@ _mm512_mask_storeu_epi64(void *__P, __mm
}
static __inline void __DEFAULT_FN_ATTRS
+_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+ __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+ (__mmask16) -1);
+}
+
+static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
{
__builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
@@ -3632,6 +3715,24 @@ _mm512_store_ps(void *__P, __m512 __A)
*(__m512*)__P = __A;
}
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_si512 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_epi32 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_epi64 (void *__P, __m512i __A)
+{
+ *(__m512i *) __P = __A;
+}
+
/* Mask ops */
static __inline __mmask16 __DEFAULT_FN_ATTRS
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=269056&r1=269055&r2=269056&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Tue May 10 08:13:54 2016
@@ -61,6 +61,13 @@ __m512d test_mm512_mul_pd(__m512d a, __m
return _mm512_mul_pd(a, b);
}
+void test_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_storeu_si512
+ // CHECK: @llvm.x86.avx512.mask.storeu.d.512
+ _mm512_storeu_si512 ( __P,__A);
+}
+
void test_mm512_storeu_ps(void *p, __m512 a)
{
// CHECK-LABEL: @test_mm512_storeu_ps
@@ -82,6 +89,36 @@ void test_mm512_mask_store_ps(void *p, _
_mm512_mask_store_ps(p, m, a);
}
+void test_mm512_store_si512 (void *__P, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_store_si512
+ // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64
+ // CHECK: load i8*, i8** %__P.addr.i, align 8
+ // CHECK: bitcast i8* %3 to <8 x i64>*
+ // CHECK: store <8 x i64>
+ _mm512_store_si512 ( __P,__A);
+}
+
+void test_mm512_store_epi32 (void *__P, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_store_epi32
+ // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64
+ // CHECK: load i8*, i8** %__P.addr.i, align 8
+ // CHECK: bitcast i8* %3 to <8 x i64>*
+ // CHECK: store <8 x i64>
+ _mm512_store_epi32 ( __P,__A);
+}
+
+void test_mm512_store_epi64 (void *__P, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_store_epi64
+ // CHECK: load <8 x i64>, <8 x i64>* %__A.addr.i, align 64
+ // CHECK: load i8*, i8** %__P.addr.i, align 8
+ // CHECK: bitcast i8* %3 to <8 x i64>*
+ // CHECK: store <8 x i64>
+ _mm512_store_epi64 ( __P,__A);
+}
+
void test_mm512_store_ps(void *p, __m512 a)
{
// CHECK-LABEL: @test_mm512_store_ps
@@ -89,6 +126,13 @@ void test_mm512_store_ps(void *p, __m512
_mm512_store_ps(p, a);
}
+void test_mm512_store_pd(void *p, __m512d a)
+{
+ // CHECK-LABEL: @test_mm512_store_pd
+ // CHECK: store <8 x double>
+ _mm512_store_pd(p, a);
+}
+
void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m)
{
// CHECK-LABEL: @test_mm512_mask_store_pd
@@ -96,11 +140,25 @@ void test_mm512_mask_store_pd(void *p, _
_mm512_mask_store_pd(p, m, a);
}
-void test_mm512_store_pd(void *p, __m512d a)
+__m512i test_mm512_loadu_si512 (void *__P)
{
- // CHECK-LABEL: @test_mm512_store_pd
- // CHECK: store <8 x double>
- _mm512_store_pd(p, a);
+ // CHECK-LABEL: @test_mm512_loadu_si512
+ // CHECK: @llvm.x86.avx512.mask.loadu.d.512
+ return _mm512_loadu_si512 ( __P);
+}
+
+__m512i test_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void *__P)
+{
+ // CHECK-LABEL: @test_mm512_mask_loadu_epi32
+ // CHECK: @llvm.x86.avx512.mask.loadu.d.512
+ return _mm512_mask_loadu_epi32 (__W,__U, __P);
+}
+
+__m512i test_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void *__P)
+{
+ // CHECK-LABEL: @test_mm512_mask_loadu_epi64
+ // CHECK: @llvm.x86.avx512.mask.loadu.q.512(
+ return _mm512_mask_loadu_epi64 (__W,__U, __P);
}
__m512 test_mm512_loadu_ps(void *p)
@@ -110,6 +168,13 @@ __m512 test_mm512_loadu_ps(void *p)
return _mm512_loadu_ps(p);
}
+__m512 test_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void *__P)
+{
+ // CHECK-LABEL: @test_mm512_mask_loadu_ps
+ // CHECK: @llvm.x86.avx512.mask.loadu.ps.512
+ return _mm512_mask_loadu_ps (__W,__U, __P);
+}
+
__m512d test_mm512_loadu_pd(void *p)
{
// CHECK-LABEL: @test_mm512_loadu_pd
@@ -117,11 +182,38 @@ __m512d test_mm512_loadu_pd(void *p)
return _mm512_loadu_pd(p);
}
-__m512 test_mm512_maskz_load_ps(void *p, __mmask16 m)
+__m512d test_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void *__P)
{
- // CHECK-LABEL: @test_mm512_maskz_load_ps
- // CHECK: @llvm.x86.avx512.mask.load.ps.512
- return _mm512_maskz_load_ps(m, p);
+ // CHECK-LABEL: @test_mm512_mask_loadu_pd
+ // CHECK: @llvm.x86.avx512.mask.loadu.pd.512
+ return _mm512_mask_loadu_pd (__W,__U, __P);
+}
+
+__m512i test_mm512_load_si512 (void *__P)
+{
+ // CHECK-LABEL: @test_mm512_load_si512
+ // CHECK: load i8*, i8** %__P.addr.i, align 8
+ // CHECK: bitcast i8* %1 to <8 x i64>*
+ // CHECK: load <8 x i64>, <8 x i64>* %2, align 64
+ return _mm512_load_si512 ( __P);
+}
+
+__m512i test_mm512_load_epi32 (void *__P)
+{
+ // CHECK-LABEL: @test_mm512_load_epi32
+ // CHECK: load i8*, i8** %__P.addr.i, align 8
+ // CHECK: bitcast i8* %1 to <8 x i64>*
+ // CHECK: load <8 x i64>, <8 x i64>* %2, align 64
+ return _mm512_load_epi32 ( __P);
+}
+
+__m512i test_mm512_load_epi64 (void *__P)
+{
+ // CHECK-LABEL: @test_mm512_load_epi64
+ // CHECK: load i8*, i8** %__P.addr.i, align 8
+ // CHECK: bitcast i8* %1 to <8 x i64>*
+ // CHECK: load <8 x i64>, <8 x i64>* %2, align 64
+ return _mm512_load_epi64 ( __P);
}
__m512 test_mm512_load_ps(void *p)
@@ -131,11 +223,18 @@ __m512 test_mm512_load_ps(void *p)
return _mm512_load_ps(p);
}
-__m512d test_mm512_maskz_load_pd(void *p, __mmask8 m)
+__m512 test_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void *__P)
{
- // CHECK-LABEL: @test_mm512_maskz_load_pd
- // CHECK: @llvm.x86.avx512.mask.load.pd.512
- return _mm512_maskz_load_pd(m, p);
+ // CHECK-LABEL: @test_mm512_mask_load_ps
+ // CHECK: @llvm.x86.avx512.mask.load.ps.512
+ return _mm512_mask_load_ps (__W,__U, __P);
+}
+
+__m512 test_mm512_maskz_load_ps(__mmask16 __U, void *__P)
+{
+ // CHECK-LABEL: @test_mm512_maskz_load_ps
+ // CHECK: @llvm.x86.avx512.mask.load.ps.512
+ return _mm512_maskz_load_ps(__U, __P);
}
__m512d test_mm512_load_pd(void *p)
@@ -145,6 +244,20 @@ __m512d test_mm512_load_pd(void *p)
return _mm512_load_pd(p);
}
+__m512d test_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void *__P)
+{
+ // CHECK-LABEL: @test_mm512_mask_load_pd
+ // CHECK: @llvm.x86.avx512.mask.load.pd.512
+ return _mm512_mask_load_pd (__W,__U, __P);
+}
+
+__m512d test_mm512_maskz_load_pd(__mmask8 __U, void *__P)
+{
+ // CHECK-LABEL: @test_mm512_maskz_load_pd
+ // CHECK: @llvm.x86.avx512.mask.load.pd.512
+ return _mm512_maskz_load_pd(__U, __P);
+}
+
__m512d test_mm512_set1_pd(double d)
{
// CHECK-LABEL: @test_mm512_set1_pd
More information about the cfe-commits
mailing list