r283073 - [AVX-512] Use native IR for masked 512-bit add/sub/mul/div ps/pd intrinsics when rounding mode isn't used.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Sun Oct 2 10:43:00 PDT 2016
Author: ctopper
Date: Sun Oct 2 12:43:00 2016
New Revision: 283073
URL: http://llvm.org/viewvc/llvm-project?rev=283073&view=rev
Log:
[AVX-512] Use native IR for masked 512-bit add/sub/mul/div ps/pd intrinsics when rounding mode isn't used.
Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=283073&r1=283072&r2=283073&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Oct 2 12:43:00 2016
@@ -1976,38 +1976,30 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_add_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_add_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_add_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_add_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
#define _mm512_add_round_pd(A, B, R) __extension__ ({ \
@@ -2119,40 +2111,30 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_sub_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_sub_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_sub_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_sub_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
@@ -2264,40 +2246,30 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_mul_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_mul_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_mul_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_mul_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
@@ -2416,21 +2388,16 @@ _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_div_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_div_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline __m512 __DEFAULT_FN_ATTRS
@@ -2441,21 +2408,16 @@ _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_div_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_div_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
#define _mm512_div_round_pd(A, B, R) __extension__ ({ \
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=283073&r1=283072&r2=283073&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun Oct 2 12:43:00 2016
@@ -1921,12 +1921,14 @@ __m512d test_mm512_maskz_add_round_pd(__
}
__m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_add_pd
- // CHECK: @llvm.x86.avx512.mask.add.pd.512
+ // CHECK: fadd <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_add_pd(__W,__U,__A,__B);
}
__m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_add_pd
- // CHECK: @llvm.x86.avx512.mask.add.pd.512
+ // CHECK: fadd <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_add_pd(__U,__A,__B);
}
__m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) {
@@ -1946,12 +1948,14 @@ __m512 test_mm512_maskz_add_round_ps(__m
}
__m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_add_ps
- // CHECK: @llvm.x86.avx512.mask.add.ps.512
+ // CHECK: fadd <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_add_ps(__W,__U,__A,__B);
}
__m512 test_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_add_ps
- // CHECK: @llvm.x86.avx512.mask.add.ps.512
+ // CHECK: fadd <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_add_ps(__U,__A,__B);
}
__m128 test_mm_add_round_ss(__m128 __A, __m128 __B) {
@@ -2021,12 +2025,14 @@ __m512d test_mm512_maskz_sub_round_pd(__
}
__m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_sub_pd
- // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+ // CHECK: fsub <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_sub_pd(__W,__U,__A,__B);
}
__m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_sub_pd
- // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+ // CHECK: fsub <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_sub_pd(__U,__A,__B);
}
__m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) {
@@ -2046,12 +2052,14 @@ __m512 test_mm512_maskz_sub_round_ps(__m
}
__m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_sub_ps
- // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+ // CHECK: fsub <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_sub_ps(__W,__U,__A,__B);
}
__m512 test_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_sub_ps
- // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+ // CHECK: fsub <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_sub_ps(__U,__A,__B);
}
__m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) {
@@ -2121,12 +2129,14 @@ __m512d test_mm512_maskz_mul_round_pd(__
}
__m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_mul_pd
- // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+ // CHECK: fmul <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_mul_pd(__W,__U,__A,__B);
}
__m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_mul_pd
- // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+ // CHECK: fmul <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_mul_pd(__U,__A,__B);
}
__m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) {
@@ -2146,12 +2156,14 @@ __m512 test_mm512_maskz_mul_round_ps(__m
}
__m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_mul_ps
- // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+ // CHECK: fmul <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_mul_ps(__W,__U,__A,__B);
}
__m512 test_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_mul_ps
- // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+ // CHECK: fmul <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_mul_ps(__U,__A,__B);
}
__m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) {
@@ -2226,12 +2238,14 @@ __m512d test_mm512_div_pd(__m512d __a, _
}
__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) {
// CHECK-LABLE: @test_mm512_mask_div_pd
- // CHECK: @llvm.x86.avx512.mask.div.pd.512
+ // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_div_pd(__w,__u,__a,__b);
}
__m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_div_pd
- // CHECK: @llvm.x86.avx512.mask.div.pd.512
+ // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_div_pd(__U,__A,__B);
}
__m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) {
@@ -2256,12 +2270,14 @@ __m512 test_mm512_div_ps(__m512 __A, __m
}
__m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_div_ps
- // CHECK: @llvm.x86.avx512.mask.div.ps.512
+ // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_div_ps(__W,__U,__A,__B);
}
__m512 test_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_div_ps
- // CHECK: @llvm.x86.avx512.mask.div.ps.512
+ // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_div_ps(__U,__A,__B);
}
__m128 test_mm_div_round_ss(__m128 __A, __m128 __B) {
More information about the cfe-commits
mailing list