r289351 - [AVX-512] Remove masking from 512-bit vpermil builtins. The backend now has versions without masking so wrap it with select.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Sat Dec 10 17:26:52 PST 2016


Author: ctopper
Date: Sat Dec 10 19:26:52 2016
New Revision: 289351

URL: http://llvm.org/viewvc/llvm-project?rev=289351&view=rev
Log:
[AVX-512] Remove masking from 512-bit vpermil builtins. The backend now has versions without masking so wrap it with select.

This will allow the backend to constant fold these to generic shuffle vectors like 128-bit and 256-bit without having to working about handling masking.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=289351&r1=289350&r2=289351&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Dec 10 19:26:52 2016
@@ -1489,8 +1489,8 @@ TARGET_BUILTIN(__builtin_ia32_vpermi2var
 TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps512_mask, "V16fV16fV16iV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps512_mask, "V16fV16fV16iV16fUs","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_maskz, "V16iV16iV16iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_maskz, "V8dV8LLiV8dV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_maskz, "V16fV16iV16fV16fUs","","avx512f")

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=289351&r1=289350&r2=289351&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sat Dec 10 19:26:52 2016
@@ -6588,61 +6588,47 @@ _mm512_mask2_permutex2var_epi64 (__m512i
                                       (__v16sf)_mm512_setzero_ps()); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_permutevar_pd (__m512d __A, __m512i __C)
+_mm512_permutevar_pd(__m512d __A, __m512i __C)
 {
-  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
-              (__v8di) __C,
-              (__v8df)
-              _mm512_undefined_pd (),
-              (__mmask8) -1);
+  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
+_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
 {
-  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
-              (__v8di) __C,
-              (__v8df) __W,
-              (__mmask8) __U);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                         (__v8df)_mm512_permutevar_pd(__A, __C),
+                                         (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
+_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
 {
-  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
-              (__v8di) __C,
-              (__v8df)
-              _mm512_setzero_pd (),
-              (__mmask8) __U);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                         (__v8df)_mm512_permutevar_pd(__A, __C),
+                                         (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_permutevar_ps (__m512 __A, __m512i __C)
+_mm512_permutevar_ps(__m512 __A, __m512i __C)
 {
-  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
-                   (__v16si) __C,
-                   (__v16sf)
-                   _mm512_undefined_ps (),
-                   (__mmask16) -1);
+  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
+_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
 {
-  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
-                   (__v16si) __C,
-                   (__v16sf) __W,
-                   (__mmask16) __U);
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
+                                        (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
+_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
 {
-  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
-                   (__v16si) __C,
-                   (__v16sf)
-                   _mm512_setzero_ps (),
-                   (__mmask16) __U);
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
+                                        (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=289351&r1=289350&r2=289351&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sat Dec 10 19:26:52 2016
@@ -3738,37 +3738,41 @@ __m512 test_mm512_maskz_permute_ps(__mma
 
 __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
   // CHECK-LABEL: @test_mm512_permutevar_pd
-  // CHECK: @llvm.x86.avx512.mask.vpermilvar.pd.512
+  // CHECK: @llvm.x86.avx512.vpermilvar.pd.512
   return _mm512_permutevar_pd(__A, __C); 
 }
 
 __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
   // CHECK-LABEL: @test_mm512_mask_permutevar_pd
-  // CHECK: @llvm.x86.avx512.mask.vpermilvar.pd.512
+  // CHECK: @llvm.x86.avx512.vpermilvar.pd.512
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_permutevar_pd(__W, __U, __A, __C); 
 }
 
 __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
   // CHECK-LABEL: @test_mm512_maskz_permutevar_pd
-  // CHECK: @llvm.x86.avx512.mask.vpermilvar.pd.512
+  // CHECK: @llvm.x86.avx512.vpermilvar.pd.512
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_permutevar_pd(__U, __A, __C); 
 }
 
 __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
   // CHECK-LABEL: @test_mm512_permutevar_ps
-  // CHECK: @llvm.x86.avx512.mask.vpermilvar.ps.512
+  // CHECK: @llvm.x86.avx512.vpermilvar.ps.512
   return _mm512_permutevar_ps(__A, __C); 
 }
 
 __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
   // CHECK-LABEL: @test_mm512_mask_permutevar_ps
-  // CHECK: @llvm.x86.avx512.mask.vpermilvar.ps.512
+  // CHECK: @llvm.x86.avx512.vpermilvar.ps.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_permutevar_ps(__W, __U, __A, __C); 
 }
 
 __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
   // CHECK-LABEL: @test_mm512_maskz_permutevar_ps
-  // CHECK: @llvm.x86.avx512.mask.vpermilvar.ps.512
+  // CHECK: @llvm.x86.avx512.vpermilvar.ps.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_permutevar_ps(__U, __A, __C); 
 }
 




More information about the cfe-commits mailing list