r226298 - [AVX512] Add intrinsics for masked aligned FP loads and stores

Adam Nemet via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 18 18:07:25 PST 2016


> On Jan 18, 2016, at 6:00 PM, Adam Nemet <anemet at apple.com> wrote:
> 
> 
>> On Jan 18, 2016, at 5:28 PM, Hal Finkel <hfinkel at anl.gov> wrote:
>> 
>> ----- Original Message -----
>>> From: "Adam Nemet" <anemet at apple.com>
>>> To: cfe-commits at cs.uiuc.edu
>>> Sent: Friday, January 16, 2015 12:51:50 PM
>>> Subject: r226298 - [AVX512] Add intrinsics for masked aligned FP loads and	stores
>>> 
>>> Author: anemet
>>> Date: Fri Jan 16 12:51:50 2015
>>> New Revision: 226298
>>> 
>>> URL: http://llvm.org/viewvc/llvm-project?rev=226298&view=rev
>>> Log:
>>> [AVX512] Add intrinsics for masked aligned FP loads and stores
>>> 
>>> Part of <rdar://problem/17688758>
>>> 
>>> Modified:
>>>   cfe/trunk/include/clang/Basic/BuiltinsX86.def
>>>   cfe/trunk/lib/Headers/avx512fintrin.h
>>>   cfe/trunk/test/CodeGen/avx512f-builtins.c
>>> 
>>> Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=226298&r1=226297&r2=226298&view=diff
>>> ==============================================================================
>>> --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
>>> +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Jan 16 12:51:50
>>> 2015
>>> @@ -897,11 +897,15 @@ BUILTIN(__builtin_ia32_pbroadcastq512_me
>>> BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16ivC*V16iUs", "")
>>> BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLivC*V8LLiUc", "")
>>> BUILTIN(__builtin_ia32_loadups512_mask, "V16fvC*V16fUs", "")
>>> +BUILTIN(__builtin_ia32_loadaps512_mask, "V16fvC*V16fUs", "")
>>> BUILTIN(__builtin_ia32_loadupd512_mask, "V8dvC*V8dUc", "")
>>> +BUILTIN(__builtin_ia32_loadapd512_mask, "V8dvC*V8dUc", "")
>>> BUILTIN(__builtin_ia32_storedqudi512_mask, "vv*V8LLiUc", "")
>>> BUILTIN(__builtin_ia32_storedqusi512_mask, "vv*V16iUs", "")
>>> BUILTIN(__builtin_ia32_storeupd512_mask, "vv*V8dUc", "")
>>> +BUILTIN(__builtin_ia32_storeapd512_mask, "vv*V8dUc", "")
>>> BUILTIN(__builtin_ia32_storeups512_mask, "vv*V16fUs", "")
>>> +BUILTIN(__builtin_ia32_storeaps512_mask, "vv*V16fUs", "")
>>> BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs",
>>> "")
>>> BUILTIN(__builtin_ia32_vpermt2varq512_mask,
>>> "V8LLiV8LLiV8LLiV8LLiUc", "")
>>> BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs",
>>> "")
>>> 
>>> Modified: cfe/trunk/lib/Headers/avx512fintrin.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=226298&r1=226297&r2=226298&view=diff
>>> ==============================================================================
>>> --- cfe/trunk/lib/Headers/avx512fintrin.h (original)
>>> +++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Jan 16 12:51:50 2015
>>> @@ -928,6 +928,24 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void
>>>                                                   (__mmask8) __U);
>>> }
>>> 
>>> +static __inline __m512 __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
>>> +{
>>> +  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf
>>> *)__P,
>>> +                                                  (__v16sf)
>>> +                                                  _mm512_setzero_ps
>>> (),
>>> +                                                  (__mmask16) __U);
>>> +}
>>> +
>>> +static __inline __m512d __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
>>> +{
>>> +  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df
>>> *)__P,
>>> +                                                   (__v8df)
>>> +                                                   _mm512_setzero_pd
>>> (),
>>> +                                                   (__mmask8) __U);
>>> +}
>>> +
>>> static __inline __m512d __attribute__((__always_inline__,
>>> __nodebug__))
>>> _mm512_loadu_pd(double const *__p)
>>> {
>>> @@ -946,6 +964,24 @@ _mm512_loadu_ps(float const *__p)
>>>  return ((struct __loadu_ps*)__p)->__v;
>>> }
>>> 
>>> +static __inline __m512 __attribute__((__always_inline__,
>>> __nodebug__))
>>> +_mm512_load_ps(double const *__p)
>>> +{
>>> +  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf
>>> *)__p,
>>> +                                                  (__v16sf)
>>> +                                                  _mm512_setzero_ps
>>> (),
>>> +                                                  (__mmask16) -1);
>>> +}
>>> +
>>> +static __inline __m512d __attribute__((__always_inline__,
>>> __nodebug__))
>>> +_mm512_load_pd(float const *__p)
>> 
>> Shouldn't this one take a 'double const *' and the one above take a 'float const *’?
> 
> You’re right.  Fixing it.

It’s r258108.

> 
> Thanks,
> Adam
> 
>> 
>> -Hal
>> 
>>> +{
>>> +  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df
>>> *)__p,
>>> +                                                   (__v8df)
>>> +                                                   _mm512_setzero_pd
>>> (),
>>> +                                                   (__mmask8) -1);
>>> +}
>>> +
>>> /* SIMD store ops */
>>> 
>>> static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> @@ -988,9 +1024,9 @@ _mm512_storeu_ps(void *__P, __m512 __A)
>>> }
>>> 
>>> static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> -_mm512_store_ps(void *__P, __m512 __A)
>>> +_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
>>> {
>>> -  *(__m512*)__P = __A;
>>> +  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A,
>>> (__mmask8) __U);
>>> }
>>> 
>>> static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> @@ -999,6 +1035,19 @@ _mm512_store_pd(void *__P, __m512d __A)
>>>  *(__m512d*)__P = __A;
>>> }
>>> 
>>> +static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
>>> +{
>>> +  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
>>> +                                   (__mmask16) __U);
>>> +}
>>> +
>>> +static __inline void __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> +_mm512_store_ps(void *__P, __m512 __A)
>>> +{
>>> +  *(__m512*)__P = __A;
>>> +}
>>> +
>>> /* Mask ops */
>>> 
>>> static __inline __mmask16 __attribute__ ((__always_inline__,
>>> __nodebug__))
>>> 
>>> Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=226298&r1=226297&r2=226298&view=diff
>>> ==============================================================================
>>> --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
>>> +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Fri Jan 16 12:51:50
>>> 2015
>>> @@ -75,6 +75,13 @@ void test_mm512_storeu_pd(void *p, __m51
>>>  _mm512_storeu_pd(p, a);
>>> }
>>> 
>>> +void test_mm512_mask_store_ps(void *p, __m512 a, __mmask16 m)
>>> +{
>>> +  // CHECK-LABEL: @test_mm512_mask_store_ps
>>> +  // CHECK: @llvm.x86.avx512.mask.store.ps.512
>>> +  _mm512_mask_store_ps(p, m, a);
>>> +}
>>> +
>>> void test_mm512_store_ps(void *p, __m512 a)
>>> {
>>>  // CHECK-LABEL: @test_mm512_store_ps
>>> @@ -82,6 +89,13 @@ void test_mm512_store_ps(void *p, __m512
>>>  _mm512_store_ps(p, a);
>>> }
>>> 
>>> +void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m)
>>> +{
>>> +  // CHECK-LABEL: @test_mm512_mask_store_pd
>>> +  // CHECK: @llvm.x86.avx512.mask.store.pd.512
>>> +  _mm512_mask_store_pd(p, m, a);
>>> +}
>>> +
>>> void test_mm512_store_pd(void *p, __m512d a)
>>> {
>>>  // CHECK-LABEL: @test_mm512_store_pd
>>> @@ -103,6 +117,34 @@ __m512d test_mm512_loadu_pd(void *p)
>>>  return _mm512_loadu_pd(p);
>>> }
>>> 
>>> +__m512 test_mm512_maskz_load_ps(void *p, __mmask16 m)
>>> +{
>>> +  // CHECK-LABEL: @test_mm512_maskz_load_ps
>>> +  // CHECK: @llvm.x86.avx512.mask.load.ps.512
>>> +  return _mm512_maskz_load_ps(m, p);
>>> +}
>>> +
>>> +__m512 test_mm512_load_ps(void *p)
>>> +{
>>> +  // CHECK-LABEL: @test_mm512_load_ps
>>> +  // CHECK: @llvm.x86.avx512.mask.load.ps.512
>>> +  return _mm512_load_ps(p);
>>> +}
>>> +
>>> +__m512d test_mm512_maskz_load_pd(void *p, __mmask8 m)
>>> +{
>>> +  // CHECK-LABEL: @test_mm512_maskz_load_pd
>>> +  // CHECK: @llvm.x86.avx512.mask.load.pd.512
>>> +  return _mm512_maskz_load_pd(m, p);
>>> +}
>>> +
>>> +__m512d test_mm512_load_pd(void *p)
>>> +{
>>> +  // CHECK-LABEL: @test_mm512_load_pd
>>> +  // CHECK: @llvm.x86.avx512.mask.load.pd.512
>>> +  return _mm512_load_pd(p);
>>> +}
>>> +
>>> __m512d test_mm512_set1_pd(double d)
>>> {
>>>  // CHECK-LABEL: @test_mm512_set1_pd
>>> 
>>> 
>>> _______________________________________________
>>> cfe-commits mailing list
>>> cfe-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>> 
>> 
>> -- 
>> Hal Finkel
>> Assistant Computational Scientist
>> Leadership Computing Facility
>> Argonne National Laboratory
> 



More information about the cfe-commits mailing list