r226298 - [AVX512] Add intrinsics for masked aligned FP loads and stores
Adam Nemet via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 18 18:07:25 PST 2016
> On Jan 18, 2016, at 6:00 PM, Adam Nemet <anemet at apple.com> wrote:
>
>
>> On Jan 18, 2016, at 5:28 PM, Hal Finkel <hfinkel at anl.gov> wrote:
>>
>> ----- Original Message -----
>>> From: "Adam Nemet" <anemet at apple.com>
>>> To: cfe-commits at cs.uiuc.edu
>>> Sent: Friday, January 16, 2015 12:51:50 PM
>>> Subject: r226298 - [AVX512] Add intrinsics for masked aligned FP loads and stores
>>>
>>> Author: anemet
>>> Date: Fri Jan 16 12:51:50 2015
>>> New Revision: 226298
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=226298&view=rev
>>> Log:
>>> [AVX512] Add intrinsics for masked aligned FP loads and stores
>>>
>>> Part of <rdar://problem/17688758>
>>>
>>> Modified:
>>> cfe/trunk/include/clang/Basic/BuiltinsX86.def
>>> cfe/trunk/lib/Headers/avx512fintrin.h
>>> cfe/trunk/test/CodeGen/avx512f-builtins.c
>>>
>>> Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=226298&r1=226297&r2=226298&view=diff
>>> ==============================================================================
>>> --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
>>> +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Jan 16 12:51:50
>>> 2015
>>> @@ -897,11 +897,15 @@ BUILTIN(__builtin_ia32_pbroadcastq512_me
>>> BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16ivC*V16iUs", "")
>>> BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLivC*V8LLiUc", "")
>>> BUILTIN(__builtin_ia32_loadups512_mask, "V16fvC*V16fUs", "")
>>> +BUILTIN(__builtin_ia32_loadaps512_mask, "V16fvC*V16fUs", "")
>>> BUILTIN(__builtin_ia32_loadupd512_mask, "V8dvC*V8dUc", "")
>>> +BUILTIN(__builtin_ia32_loadapd512_mask, "V8dvC*V8dUc", "")
>>> BUILTIN(__builtin_ia32_storedqudi512_mask, "vv*V8LLiUc", "")
>>> BUILTIN(__builtin_ia32_storedqusi512_mask, "vv*V16iUs", "")
>>> BUILTIN(__builtin_ia32_storeupd512_mask, "vv*V8dUc", "")
>>> +BUILTIN(__builtin_ia32_storeapd512_mask, "vv*V8dUc", "")
>>> BUILTIN(__builtin_ia32_storeups512_mask, "vv*V16fUs", "")
>>> +BUILTIN(__builtin_ia32_storeaps512_mask, "vv*V16fUs", "")
>>> BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs",
>>> "")
>>> BUILTIN(__builtin_ia32_vpermt2varq512_mask,
>>> "V8LLiV8LLiV8LLiV8LLiUc", "")
>>> BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs",
>>> "")
>>>
>>> Modified: cfe/trunk/lib/Headers/avx512fintrin.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=226298&r1=226297&r2=226298&view=diff
>>> ==============================================================================
>>> --- cfe/trunk/lib/Headers/avx512fintrin.h (original)
>>> +++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Jan 16 12:51:50 2015
>>> @@ -928,6 +928,24 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void
>>> (__mmask8) __U);
>>> }
>>>
>>> +static __inline __m512 __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
>>> +{
>>> + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf
>>> *)__P,
>>> + (__v16sf)
>>> + _mm512_setzero_ps
>>> (),
>>> + (__mmask16) __U);
>>> +}
>>> +
>>> +static __inline __m512d __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
>>> +{
>>> + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df
>>> *)__P,
>>> + (__v8df)
>>> + _mm512_setzero_pd
>>> (),
>>> + (__mmask8) __U);
>>> +}
>>> +
>>> static __inline __m512d __attribute__((__always_inline__,
>>> __nodebug__))
>>> _mm512_loadu_pd(double const *__p)
>>> {
>>> @@ -946,6 +964,24 @@ _mm512_loadu_ps(float const *__p)
>>> return ((struct __loadu_ps*)__p)->__v;
>>> }
>>>
>>> +static __inline __m512 __attribute__((__always_inline__,
>>> __nodebug__))
>>> +_mm512_load_ps(double const *__p)
>>> +{
>>> + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf
>>> *)__p,
>>> + (__v16sf)
>>> + _mm512_setzero_ps
>>> (),
>>> + (__mmask16) -1);
>>> +}
>>> +
>>> +static __inline __m512d __attribute__((__always_inline__,
>>> __nodebug__))
>>> +_mm512_load_pd(float const *__p)
>>
>> Shouldn't this one take a 'double const *' and the one above take a 'float const *’?
>
> You’re right. Fixing it.
It’s r258108.
>
> Thanks,
> Adam
>
>>
>> -Hal
>>
>>> +{
>>> + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df
>>> *)__p,
>>> + (__v8df)
>>> + _mm512_setzero_pd
>>> (),
>>> + (__mmask8) -1);
>>> +}
>>> +
>>> /* SIMD store ops */
>>>
>>> static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> @@ -988,9 +1024,9 @@ _mm512_storeu_ps(void *__P, __m512 __A)
>>> }
>>>
>>> static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> -_mm512_store_ps(void *__P, __m512 __A)
>>> +_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
>>> {
>>> - *(__m512*)__P = __A;
>>> + __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A,
>>> (__mmask8) __U);
>>> }
>>>
>>> static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> @@ -999,6 +1035,19 @@ _mm512_store_pd(void *__P, __m512d __A)
>>> *(__m512d*)__P = __A;
>>> }
>>>
>>> +static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
>>> +{
>>> + __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
>>> + (__mmask16) __U);
>>> +}
>>> +
>>> +static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_store_ps(void *__P, __m512 __A)
>>> +{
>>> + *(__m512*)__P = __A;
>>> +}
>>> +
>>> /* Mask ops */
>>>
>>> static __inline __mmask16 __attribute__ ((__always_inline__,
>>> __nodebug__))
>>>
>>> Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=226298&r1=226297&r2=226298&view=diff
>>> ==============================================================================
>>> --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
>>> +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Fri Jan 16 12:51:50
>>> 2015
>>> @@ -75,6 +75,13 @@ void test_mm512_storeu_pd(void *p, __m51
>>> _mm512_storeu_pd(p, a);
>>> }
>>>
>>> +void test_mm512_mask_store_ps(void *p, __m512 a, __mmask16 m)
>>> +{
>>> + // CHECK-LABEL: @test_mm512_mask_store_ps
>>> + // CHECK: @llvm.x86.avx512.mask.store.ps.512
>>> + _mm512_mask_store_ps(p, m, a);
>>> +}
>>> +
>>> void test_mm512_store_ps(void *p, __m512 a)
>>> {
>>> // CHECK-LABEL: @test_mm512_store_ps
>>> @@ -82,6 +89,13 @@ void test_mm512_store_ps(void *p, __m512
>>> _mm512_store_ps(p, a);
>>> }
>>>
>>> +void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m)
>>> +{
>>> + // CHECK-LABEL: @test_mm512_mask_store_pd
>>> + // CHECK: @llvm.x86.avx512.mask.store.pd.512
>>> + _mm512_mask_store_pd(p, m, a);
>>> +}
>>> +
>>> void test_mm512_store_pd(void *p, __m512d a)
>>> {
>>> // CHECK-LABEL: @test_mm512_store_pd
>>> @@ -103,6 +117,34 @@ __m512d test_mm512_loadu_pd(void *p)
>>> return _mm512_loadu_pd(p);
>>> }
>>>
>>> +__m512 test_mm512_maskz_load_ps(void *p, __mmask16 m)
>>> +{
>>> + // CHECK-LABEL: @test_mm512_maskz_load_ps
>>> + // CHECK: @llvm.x86.avx512.mask.load.ps.512
>>> + return _mm512_maskz_load_ps(m, p);
>>> +}
>>> +
>>> +__m512 test_mm512_load_ps(void *p)
>>> +{
>>> + // CHECK-LABEL: @test_mm512_load_ps
>>> + // CHECK: @llvm.x86.avx512.mask.load.ps.512
>>> + return _mm512_load_ps(p);
>>> +}
>>> +
>>> +__m512d test_mm512_maskz_load_pd(void *p, __mmask8 m)
>>> +{
>>> + // CHECK-LABEL: @test_mm512_maskz_load_pd
>>> + // CHECK: @llvm.x86.avx512.mask.load.pd.512
>>> + return _mm512_maskz_load_pd(m, p);
>>> +}
>>> +
>>> +__m512d test_mm512_load_pd(void *p)
>>> +{
>>> + // CHECK-LABEL: @test_mm512_load_pd
>>> + // CHECK: @llvm.x86.avx512.mask.load.pd.512
>>> + return _mm512_load_pd(p);
>>> +}
>>> +
>>> __m512d test_mm512_set1_pd(double d)
>>> {
>>> // CHECK-LABEL: @test_mm512_set1_pd
>>>
>>>
>>> _______________________________________________
>>> cfe-commits mailing list
>>> cfe-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>>
>>
>> --
>> Hal Finkel
>> Assistant Computational Scientist
>> Leadership Computing Facility
>> Argonne National Laboratory
>
More information about the cfe-commits
mailing list