r312135 - [X86] Implement broadcastf32x2 and broadcasti32x2 intrinsics using __builtin_shufflevector instead builtins

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 30 09:15:12 PDT 2017


Author: ctopper
Date: Wed Aug 30 09:15:12 2017
New Revision: 312135

URL: http://llvm.org/viewvc/llvm-project?rev=312135&view=rev
Log:
[X86] Implement broadcastf32x2 and broadcasti32x2 intrinsics using __builtin_shufflevector instead builtins

This patch implements the broadcastf32x2/broadcasti32x2 intrinsics using __builtin_shufflevector.

Differential Revision: https://reviews.llvm.org/D37287

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512dqintrin.h
    cfe/trunk/lib/Headers/avx512vldqintrin.h
    cfe/trunk/test/CodeGen/avx512dq-builtins.c
    cfe/trunk/test/CodeGen/avx512vldq-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 30 09:15:12 2017
@@ -1596,11 +1596,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastm
 TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl")

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Aug 30 09:15:12 2017
@@ -973,25 +973,26 @@ _mm512_movepi64_mask (__m512i __A)
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_broadcast_f32x2 (__m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-                (__v16sf)_mm512_undefined_ps(),
-                (__mmask16) -1);
+  return (__m512)__builtin_shufflevector((__v4sf)__A,
+                                         (__v4sf)_mm_undefined_ps(),
+                                         0, 1, 0, 1, 0, 1, 0, 1,
+                                         0, 1, 0, 1, 0, 1, 0, 1);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-                (__v16sf)
-                __O, __M);
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+                                             (__v16sf)_mm512_broadcast_f32x2(__A),
+                                             (__v16sf)__O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-                (__v16sf)_mm512_setzero_ps (),
-                __M);
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+                                             (__v16sf)_mm512_broadcast_f32x2(__A),
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -1044,25 +1045,26 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcast_i32x2 (__m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
-                 (__v16si)_mm512_setzero_si512(),
-                 (__mmask16) -1);
+  return (__m512i)__builtin_shufflevector((__v4si)__A,
+                                          (__v4si)_mm_undefined_si128(),
+                                          0, 1, 0, 1, 0, 1, 0, 1,
+                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
-                 (__v16si)
-                 __O, __M);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+                                             (__v16si)_mm512_broadcast_i32x2(__A),
+                                             (__v16si)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
-                 (__v16si)_mm512_setzero_si512 (),
-                 __M);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+                                             (__v16si)_mm512_broadcast_i32x2(__A),
+                                             (__v16si)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vldqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vldqintrin.h Wed Aug 30 09:15:12 2017
@@ -978,25 +978,25 @@ _mm256_movepi64_mask (__m256i __A)
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcast_f32x2 (__m128 __A)
 {
-  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
-                (__v8sf)_mm256_undefined_ps(),
-                (__mmask8) -1);
+  return (__m256)__builtin_shufflevector((__v4sf)__A,
+                                         (__v4sf)_mm_undefined_ps(),
+                                         0, 1, 0, 1, 0, 1, 0, 1);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
 {
-  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
-                (__v8sf) __O,
-                __M);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+                                             (__v8sf)_mm256_broadcast_f32x2(__A),
+                                             (__v8sf)__O);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
 {
-  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
-                (__v8sf) _mm256_setzero_ps (),
-                __M);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+                                             (__v8sf)_mm256_broadcast_f32x2(__A),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
@@ -1025,49 +1025,49 @@ _mm256_maskz_broadcast_f64x2 (__mmask8 _
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcast_i32x2 (__m128i __A)
 {
-  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
-                 (__v4si)_mm_undefined_si128(),
-                 (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector((__v4si)__A,
+                                          (__v4si)_mm_undefined_si128(),
+                                          0, 1, 0, 1);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
 {
-  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
-                 (__v4si) __O,
-                 __M);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
+                                             (__v4si)_mm_broadcast_i32x2(__A),
+                                             (__v4si)__O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
 {
-  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
-                 (__v4si) _mm_setzero_si128 (),
-                 __M);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
+                                             (__v4si)_mm_broadcast_i32x2(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcast_i32x2 (__m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
-                 (__v8si)_mm256_undefined_si256(),
-                 (__mmask8) -1);
+  return (__m256i)__builtin_shufflevector((__v4si)__A,
+                                          (__v4si)_mm_undefined_si128(),
+                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
-                 (__v8si) __O,
-                 __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                             (__v8si)_mm256_broadcast_i32x2(__A),
+                                             (__v8si)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
 {
-  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
-                 (__v8si) _mm256_setzero_si256 (),
-                 __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                             (__v8si)_mm256_broadcast_i32x2(__A),
+                                             (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Wed Aug 30 09:15:12 2017
@@ -949,19 +949,21 @@ __mmask8 test_mm512_movepi64_mask(__m512
 
 __m512 test_mm512_broadcast_f32x2(__m128 __A) {
   // CHECK-LABEL: @test_mm512_broadcast_f32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm512_broadcast_f32x2(__A); 
 }
 
 __m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_f32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_broadcast_f32x2(__O, __M, __A); 
 }
 
 __m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_broadcast_f32x2(__M, __A); 
 }
 
@@ -1007,19 +1009,21 @@ __m512d test_mm512_maskz_broadcast_f64x2
 
 __m512i test_mm512_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: @test_mm512_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm512_broadcast_i32x2(__A); 
 }
 
 __m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
   // CHECK-LABEL: @test_mm512_mask_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
 __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_broadcast_i32x2(__M, __A); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Wed Aug 30 09:15:12 2017
@@ -909,19 +909,21 @@ __mmask8 test_mm256_movepi64_mask(__m256
 
 __m256 test_mm256_broadcast_f32x2(__m128 __A) {
   // CHECK-LABEL: @test_mm256_broadcast_f32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm256_broadcast_f32x2(__A); 
 }
 
 __m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   // CHECK-LABEL: @test_mm256_mask_broadcast_f32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_broadcast_f32x2(__O, __M, __A); 
 }
 
 __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_broadcast_f32x2(__M, __A); 
 }
 
@@ -947,37 +949,41 @@ __m256d test_mm256_maskz_broadcast_f64x2
 
 __m128i test_mm_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: @test_mm_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   return _mm_broadcast_i32x2(__A); 
 }
 
 __m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
 __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_broadcast_i32x2(__M, __A); 
 }
 
 __m256i test_mm256_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: @test_mm256_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm256_broadcast_i32x2(__A); 
 }
 
 __m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
   // CHECK-LABEL: @test_mm256_mask_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
 __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x2
-  // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_broadcast_i32x2(__M, __A); 
 }
 




More information about the cfe-commits mailing list