r275012 - [X86] Use __butilin_shufflevector for 512-bit shufps intrinsics.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Sat Jul 9 22:57:21 PDT 2016


Author: ctopper
Date: Sun Jul 10 00:57:21 2016
New Revision: 275012

URL: http://llvm.org/viewvc/llvm-project?rev=275012&view=rev
Log:
[X86] Use __butilin_shufflevector for 512-bit shufps intrinsics.

Modified:
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=275012&r1=275011&r2=275012&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Jul 10 00:57:21 2016
@@ -7181,23 +7181,35 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m12
                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
                                        (__v8df)_mm512_setzero_pd()); })
 
-#define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \
-  (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
-                                        (__v16sf)(__m512)(V), (int)(imm), \
-                                        (__v16sf)_mm512_undefined_ps(), \
-                                        (__mmask16)-1); })
+#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
+                                   (__v16sf)(__m512)(B), \
+                                   0  + (((M) >> 0) & 0x3), \
+                                   0  + (((M) >> 2) & 0x3), \
+                                   16 + (((M) >> 4) & 0x3), \
+                                   16 + (((M) >> 6) & 0x3), \
+                                   4  + (((M) >> 0) & 0x3), \
+                                   4  + (((M) >> 2) & 0x3), \
+                                   20 + (((M) >> 4) & 0x3), \
+                                   20 + (((M) >> 6) & 0x3), \
+                                   8  + (((M) >> 0) & 0x3), \
+                                   8  + (((M) >> 2) & 0x3), \
+                                   24 + (((M) >> 4) & 0x3), \
+                                   24 + (((M) >> 6) & 0x3), \
+                                   12 + (((M) >> 0) & 0x3), \
+                                   12 + (((M) >> 2) & 0x3), \
+                                   28 + (((M) >> 4) & 0x3), \
+                                   28 + (((M) >> 6) & 0x3)); })
 
-#define _mm512_mask_shuffle_ps(W, U, M, V, imm) __extension__ ({ \
-  (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
-                                        (__v16sf)(__m512)(V), (int)(imm), \
-                                        (__v16sf)(__m512)(W), \
-                                        (__mmask16)(U)); })
+#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+                                      (__v16sf)(__m512)(W)); })
 
-#define _mm512_maskz_shuffle_ps(U, M, V, imm) __extension__ ({ \
-  (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
-                                        (__v16sf)(__m512)(V), (int)(imm), \
-                                        (__v16sf)_mm512_setzero_ps(), \
-                                        (__mmask16)(U)); })
+#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+                                      (__v16sf)_mm512_setzero_ps()); })
 
 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=275012&r1=275011&r2=275012&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun Jul 10 00:57:21 2016
@@ -4234,19 +4234,21 @@ __m512d test_mm512_maskz_shuffle_pd(__mm
 
 __m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) {
   // CHECK-LABEL: @test_mm512_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
   return _mm512_shuffle_ps(__M, __V, 4); 
 }
 
 __m512 test_mm512_mask_shuffle_ps(__m512 __W, __mmask16 __U, __m512 __M, __m512 __V) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_shuffle_ps(__W, __U, __M, __V, 4); 
 }
 
 __m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_shuffle_ps(__U, __M, __V, 4); 
 }
 




More information about the cfe-commits mailing list