r312135 - [X86] Implement broadcastf32x2 and broadcasti32x2 intrinsics using __builtin_shufflevector instead builtins
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 30 09:15:12 PDT 2017
Author: ctopper
Date: Wed Aug 30 09:15:12 2017
New Revision: 312135
URL: http://llvm.org/viewvc/llvm-project?rev=312135&view=rev
Log:
[X86] Implement broadcastf32x2 and broadcasti32x2 intrinsics using __builtin_shufflevector instead builtins
This patch implements the broadcastf32x2/broadcasti32x2 intrinsics using __builtin_shufflevector.
Differential Revision: https://reviews.llvm.org/D37287
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512vldqintrin.h
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512vldq-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 30 09:15:12 2017
@@ -1596,11 +1596,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastm
TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl")
Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Aug 30 09:15:12 2017
@@ -973,25 +973,26 @@ _mm512_movepi64_mask (__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x2 (__m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)_mm512_undefined_ps(),
- (__mmask16) -1);
+ return (__m512)__builtin_shufflevector((__v4sf)__A,
+ (__v4sf)_mm_undefined_ps(),
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)
- __O, __M);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+ (__v16sf)_mm512_broadcast_f32x2(__A),
+ (__v16sf)__O);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
{
- return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
- (__v16sf)_mm512_setzero_ps (),
- __M);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
+ (__v16sf)_mm512_broadcast_f32x2(__A),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -1044,25 +1045,26 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x2 (__m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
- (__v16si)_mm512_setzero_si512(),
- (__mmask16) -1);
+ return (__m512i)__builtin_shufflevector((__v4si)__A,
+ (__v4si)_mm_undefined_si128(),
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
- (__v16si)
- __O, __M);
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+ (__v16si)_mm512_broadcast_i32x2(__A),
+ (__v16si)__O);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
{
- return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
- (__v16si)_mm512_setzero_si512 (),
- __M);
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
+ (__v16si)_mm512_broadcast_i32x2(__A),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vldqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vldqintrin.h Wed Aug 30 09:15:12 2017
@@ -978,25 +978,25 @@ _mm256_movepi64_mask (__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_f32x2 (__m128 __A)
{
- return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
- (__v8sf)_mm256_undefined_ps(),
- (__mmask8) -1);
+ return (__m256)__builtin_shufflevector((__v4sf)__A,
+ (__v4sf)_mm_undefined_ps(),
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
{
- return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
- (__v8sf) __O,
- __M);
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+ (__v8sf)_mm256_broadcast_f32x2(__A),
+ (__v8sf)__O);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
{
- return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
- (__v8sf) _mm256_setzero_ps (),
- __M);
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
+ (__v8sf)_mm256_broadcast_f32x2(__A),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
@@ -1025,49 +1025,49 @@ _mm256_maskz_broadcast_f64x2 (__mmask8 _
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcast_i32x2 (__m128i __A)
{
- return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
- (__v4si)_mm_undefined_si128(),
- (__mmask8) -1);
+ return (__m128i)__builtin_shufflevector((__v4si)__A,
+ (__v4si)_mm_undefined_si128(),
+ 0, 1, 0, 1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
{
- return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
- (__v4si) __O,
- __M);
+ return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
+ (__v4si)_mm_broadcast_i32x2(__A),
+ (__v4si)__O);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
{
- return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
- (__v4si) _mm_setzero_si128 (),
- __M);
+ return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
+ (__v4si)_mm_broadcast_i32x2(__A),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcast_i32x2 (__m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
- (__v8si)_mm256_undefined_si256(),
- (__mmask8) -1);
+ return (__m256i)__builtin_shufflevector((__v4si)__A,
+ (__v4si)_mm_undefined_si128(),
+ 0, 1, 0, 1, 0, 1, 0, 1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
- (__v8si) __O,
- __M);
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_broadcast_i32x2(__A),
+ (__v8si)__O);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
- (__v8si) _mm256_setzero_si256 (),
- __M);
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_broadcast_i32x2(__A),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Wed Aug 30 09:15:12 2017
@@ -949,19 +949,21 @@ __mmask8 test_mm512_movepi64_mask(__m512
__m512 test_mm512_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: @test_mm512_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm512_broadcast_f32x2(__A);
}
__m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
// CHECK-LABEL: @test_mm512_mask_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_broadcast_f32x2(__O, __M, __A);
}
__m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
// CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_broadcast_f32x2(__M, __A);
}
@@ -1007,19 +1009,21 @@ __m512d test_mm512_maskz_broadcast_f64x2
__m512i test_mm512_broadcast_i32x2(__m128i __A) {
// CHECK-LABEL: @test_mm512_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm512_broadcast_i32x2(__A);
}
__m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
// CHECK-LABEL: @test_mm512_mask_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_broadcast_i32x2(__O, __M, __A);
}
__m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
// CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_broadcast_i32x2(__M, __A);
}
Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=312135&r1=312134&r2=312135&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Wed Aug 30 09:15:12 2017
@@ -909,19 +909,21 @@ __mmask8 test_mm256_movepi64_mask(__m256
__m256 test_mm256_broadcast_f32x2(__m128 __A) {
// CHECK-LABEL: @test_mm256_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm256_broadcast_f32x2(__A);
}
__m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
// CHECK-LABEL: @test_mm256_mask_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_broadcast_f32x2(__O, __M, __A);
}
__m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
// CHECK-LABEL: @test_mm256_maskz_broadcast_f32x2
- // CHECK: @llvm.x86.avx512.mask.broadcastf32x2
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_broadcast_f32x2(__M, __A);
}
@@ -947,37 +949,41 @@ __m256d test_mm256_maskz_broadcast_f64x2
__m128i test_mm_broadcast_i32x2(__m128i __A) {
// CHECK-LABEL: @test_mm_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
return _mm_broadcast_i32x2(__A);
}
__m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_broadcast_i32x2(__O, __M, __A);
}
__m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_broadcast_i32x2(__M, __A);
}
__m256i test_mm256_broadcast_i32x2(__m128i __A) {
// CHECK-LABEL: @test_mm256_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
return _mm256_broadcast_i32x2(__A);
}
__m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm256_mask_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_broadcast_i32x2(__O, __M, __A);
}
__m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
// CHECK-LABEL: @test_mm256_maskz_broadcast_i32x2
- // CHECK: @llvm.x86.avx512.mask.broadcasti32x2
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_broadcast_i32x2(__M, __A);
}
More information about the cfe-commits
mailing list