r334359 - [X86] Remove masking from the 512-bit packed floating point add/sub/mul/div builtins. Use select in IR instead.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Sat Jun 9 23:01:42 PDT 2018
Author: ctopper
Date: Sat Jun 9 23:01:42 2018
New Revision: 334359
URL: http://llvm.org/viewvc/llvm-project?rev=334359&view=rev
Log:
[X86] Remove masking from the 512-bit packed floating point add/sub/mul/div builtins. Use select in IR instead.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334359&r1=334358&r2=334359&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Jun 9 23:01:42 2018
@@ -1087,14 +1087,14 @@ TARGET_BUILTIN(__builtin_ia32_pmulhrsw51
TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "nc", "avx512bw")
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=334359&r1=334358&r2=334359&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sat Jun 9 23:01:42 2018
@@ -2060,40 +2060,32 @@ _mm512_maskz_add_ps(__mmask16 __U, __m51
}
#define _mm512_add_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_add_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_add_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_add_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_add_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_add_round_ps(A, B, R) \
- (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
#define _mm512_mask_add_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
#define _mm512_maskz_add_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2195,40 +2187,32 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m51
}
#define _mm512_sub_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_sub_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_sub_round_ps(A, B, R) \
- (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
-
-#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
-
-#define _mm512_maskz_sub_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
+
+#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
+
+#define _mm512_maskz_sub_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2330,40 +2314,32 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m51
}
#define _mm512_mul_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_mul_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_mul_round_ps(A, B, R) \
- (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
-
-#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
-
-#define _mm512_maskz_mul_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
+
+#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
+
+#define _mm512_maskz_mul_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2478,40 +2454,32 @@ _mm512_maskz_div_ps(__mmask16 __U, __m51
}
#define _mm512_div_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R))
#define _mm512_mask_div_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_div_round_pd((A), (B), (R)), \
+ (__v8df)(__m512d)(W));
#define _mm512_maskz_div_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_div_round_pd((A), (B), (R)), \
+ (__v8df)_mm512_setzero_pd());
#define _mm512_div_round_ps(A, B, R) \
- (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
-
-#define _mm512_mask_div_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
-
-#define _mm512_maskz_div_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R))
+
+#define _mm512_mask_div_round_ps(W, U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
+ (__v16sf)(__m512)(W));
+
+#define _mm512_maskz_div_round_ps(U, A, B, R) \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
+ (__v16sf)_mm512_setzero_ps());
#define _mm512_roundscale_ps(A, B) \
(__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334359&r1=334358&r2=334359&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Sat Jun 9 23:01:42 2018
@@ -2359,6 +2359,14 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(
ArgNum = 1;
HasRC = true;
break;
+ case X86::BI__builtin_ia32_addpd512:
+ case X86::BI__builtin_ia32_addps512:
+ case X86::BI__builtin_ia32_divpd512:
+ case X86::BI__builtin_ia32_divps512:
+ case X86::BI__builtin_ia32_mulpd512:
+ case X86::BI__builtin_ia32_mulps512:
+ case X86::BI__builtin_ia32_subpd512:
+ case X86::BI__builtin_ia32_subps512:
case X86::BI__builtin_ia32_cvtsi2sd64:
case X86::BI__builtin_ia32_cvtsi2ss32:
case X86::BI__builtin_ia32_cvtsi2ss64:
@@ -2384,14 +2392,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(
ArgNum = 3;
HasRC = true;
break;
- case X86::BI__builtin_ia32_addpd512_mask:
- case X86::BI__builtin_ia32_addps512_mask:
- case X86::BI__builtin_ia32_divpd512_mask:
- case X86::BI__builtin_ia32_divps512_mask:
- case X86::BI__builtin_ia32_mulpd512_mask:
- case X86::BI__builtin_ia32_mulps512_mask:
- case X86::BI__builtin_ia32_subpd512_mask:
- case X86::BI__builtin_ia32_subps512_mask:
case X86::BI__builtin_ia32_addss_round_mask:
case X86::BI__builtin_ia32_addsd_round_mask:
case X86::BI__builtin_ia32_divss_round_mask:
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=334359&r1=334358&r2=334359&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sat Jun 9 23:01:42 2018
@@ -2229,17 +2229,19 @@ __m512i test_mm512_mask_mullox_epi64 (__
__m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_add_round_pd
- // CHECK: @llvm.x86.avx512.mask.add.pd.512
+ // CHECK: @llvm.x86.avx512.add.pd.512
return _mm512_add_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_add_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_add_round_pd
- // CHECK: @llvm.x86.avx512.mask.add.pd.512
+ // CHECK: @llvm.x86.avx512.add.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_add_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_maskz_add_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_add_round_pd
- // CHECK: @llvm.x86.avx512.mask.add.pd.512
+ // CHECK: @llvm.x86.avx512.add.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_add_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -2256,17 +2258,19 @@ __m512d test_mm512_maskz_add_pd(__mmask8
}
__m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_add_round_ps
- // CHECK: @llvm.x86.avx512.mask.add.ps.512
+ // CHECK: @llvm.x86.avx512.add.ps.512
return _mm512_add_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_add_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_add_round_ps
- // CHECK: @llvm.x86.avx512.mask.add.ps.512
+ // CHECK: @llvm.x86.avx512.add.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_add_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_maskz_add_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_add_round_ps
- // CHECK: @llvm.x86.avx512.mask.add.ps.512
+ // CHECK: @llvm.x86.avx512.add.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_add_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
@@ -2333,17 +2337,19 @@ __m128d test_mm_maskz_add_sd(__mmask8 __
}
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_sub_round_pd
- // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+ // CHECK: @llvm.x86.avx512.sub.pd.512
return _mm512_sub_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_sub_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_sub_round_pd
- // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+ // CHECK: @llvm.x86.avx512.sub.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_sub_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_maskz_sub_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_sub_round_pd
- // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+ // CHECK: @llvm.x86.avx512.sub.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_sub_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -2360,17 +2366,19 @@ __m512d test_mm512_maskz_sub_pd(__mmask8
}
__m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_sub_round_ps
- // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+ // CHECK: @llvm.x86.avx512.sub.ps.512
return _mm512_sub_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_sub_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_sub_round_ps
- // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+ // CHECK: @llvm.x86.avx512.sub.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_sub_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_maskz_sub_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_sub_round_ps
- // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+ // CHECK: @llvm.x86.avx512.sub.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_sub_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
@@ -2437,17 +2445,19 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __
}
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mul_round_pd
- // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+ // CHECK: @llvm.x86.avx512.mul.pd.512
return _mm512_mul_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_mul_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_mul_round_pd
- // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+ // CHECK: @llvm.x86.avx512.mul.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_mul_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_maskz_mul_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_mul_round_pd
- // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+ // CHECK: @llvm.x86.avx512.mul.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_mul_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -2464,17 +2474,19 @@ __m512d test_mm512_maskz_mul_pd(__mmask8
}
__m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mul_round_ps
- // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+ // CHECK: @llvm.x86.avx512.mul.ps.512
return _mm512_mul_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_mul_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_mul_round_ps
- // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+ // CHECK: @llvm.x86.avx512.mul.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_mul_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_maskz_mul_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_mul_round_ps
- // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+ // CHECK: @llvm.x86.avx512.mul.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_mul_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
@@ -2541,17 +2553,19 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __
}
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_div_round_pd
- // CHECK: @llvm.x86.avx512.mask.div.pd.512
+ // CHECK: @llvm.x86.avx512.div.pd.512
return _mm512_div_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_mask_div_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_div_round_pd
- // CHECK: @llvm.x86.avx512.mask.div.pd.512
+ // CHECK: @llvm.x86.avx512.div.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_div_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_maskz_div_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_div_round_pd
- // CHECK: @llvm.x86.avx512.mask.div.pd.512
+ // CHECK: @llvm.x86.avx512.div.pd.512
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512d test_mm512_div_pd(__m512d __a, __m512d __b) {
@@ -2573,17 +2587,19 @@ __m512d test_mm512_maskz_div_pd(__mmask8
}
__m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_div_round_ps
- // CHECK: @llvm.x86.avx512.mask.div.ps.512
+ // CHECK: @llvm.x86.avx512.div.ps.512
return _mm512_div_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_div_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_div_round_ps
- // CHECK: @llvm.x86.avx512.mask.div.ps.512
+ // CHECK: @llvm.x86.avx512.div.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_div_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_maskz_div_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_div_round_ps
- // CHECK: @llvm.x86.avx512.mask.div.ps.512
+ // CHECK: @llvm.x86.avx512.div.ps.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m512 test_mm512_div_ps(__m512 __A, __m512 __B) {
More information about the cfe-commits
mailing list