r334741 - [X86] Lowering Mask Scalar intrinsics to native IR (Clang part)
Tomasz Krupa via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 14 10:36:23 PDT 2018
Author: tkrupa
Date: Thu Jun 14 10:36:23 2018
New Revision: 334741
URL: http://llvm.org/viewvc/llvm-project?rev=334741&view=rev
Log:
[X86] Lowering Mask Scalar intrinsics to native IR (Clang part)
Summary: Lowering add, sub, mul, and div mask scalar intrinsic calls
to native IR.
Reviewers: craig.topper, RKSimon, spatel, sroland
Reviewed By: craig.topper
Subscribers: cfe-commits
Differential Revision: https://reviews.llvm.org/D47979
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334741&r1=334740&r2=334741&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 14 10:36:23 2018
@@ -9982,6 +9982,35 @@ Value *CodeGenFunction::EmitX86BuiltinEx
case X86::BI__builtin_ia32_pternlogq256_maskz:
return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+ case X86::BI__builtin_ia32_divss_round_mask:
+ case X86::BI__builtin_ia32_divsd_round_mask: {
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_divss_round_mask:
+ ID = Intrinsic::x86_avx512_mask_div_ss_round; break;
+ case X86::BI__builtin_ia32_divsd_round_mask:
+ ID = Intrinsic::x86_avx512_mask_div_sd_round; break;
+ }
+ Function *Intr = CGM.getIntrinsic(ID);
+
+ // If round parameter is not _MM_FROUND_CUR_DIRECTION, don't lower.
+ if (cast<llvm::ConstantInt>(Ops[4])->getZExtValue() != (uint64_t)4)
+ return Builder.CreateCall(Intr, Ops);
+
+ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ Value *Mask = Ops[3];
+ Value *Div = Builder.CreateFDiv(A, B);
+ llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
+ cast<IntegerType>(Mask->getType())->getBitWidth());
+ Mask = Builder.CreateBitCast(Mask, MaskTy);
+ Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
+ Value *Select = Builder.CreateSelect(Mask, Div, C);
+ return Builder.CreateInsertElement(Ops[0], Select, (uint64_t)0);
+ }
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=334741&r1=334740&r2=334741&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Thu Jun 14 10:36:23 2018
@@ -1962,20 +1962,16 @@ _mm512_maskz_abs_epi32 (__mmask16 __U, _
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_add_ss(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : __W[0];
+ return __A;
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_add_ss(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : 0;
+ return __A;
}
#define _mm_add_round_ss(A, B, R) \
@@ -1998,20 +1994,16 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_add_sd(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : __W[0];
+ return __A;
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_add_sd(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : 0;
+ return __A;
}
#define _mm_add_round_sd(A, B, R) \
(__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
@@ -2089,20 +2081,16 @@ _mm512_maskz_add_ps(__mmask16 __U, __m51
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_sub_ss(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : __W[0];
+ return __A;
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_sub_ss(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : 0;
+ return __A;
}
#define _mm_sub_round_ss(A, B, R) \
(__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
@@ -2124,20 +2112,16 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_sub_sd(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : __W[0];
+ return __A;
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_sub_sd(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : 0;
+ return __A;
}
#define _mm_sub_round_sd(A, B, R) \
@@ -2216,20 +2200,16 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m51
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_mul_ss(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : __W[0];
+ return __A;
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_mul_ss(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : 0;
+ return __A;
}
#define _mm_mul_round_ss(A, B, R) \
(__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
@@ -2251,20 +2231,16 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_mul_sd(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : __W[0];
+ return __A;
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ __A = _mm_mul_sd(__A, __B);
+ __A[0] = (__U & 1) ? __A[0] : 0;
+ return __A;
}
#define _mm_mul_round_sd(A, B, R) \
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=334741&r1=334740&r2=334741&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu Jun 14 10:36:23 2018
@@ -2302,12 +2302,29 @@ __m128 test_mm_maskz_add_round_ss(__mmas
}
__m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_add_ss
- // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: fadd float %{{.*}}, %{{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_mask_add_ss(__W,__U,__A,__B);
}
__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_add_ss
- // CHECK: @llvm.x86.avx512.mask.add.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: fadd float %{{.*}}, %{{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_maskz_add_ss(__U,__A,__B);
}
__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
@@ -2327,12 +2344,29 @@ __m128d test_mm_maskz_add_round_sd(__mma
}
__m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_add_sd
- // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: fadd double %{{.*}}, %{{.*}}
+ // CHECK: insertelement <2 x double> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_mask_add_sd(__W,__U,__A,__B);
}
__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_add_sd
- // CHECK: @llvm.x86.avx512.mask.add.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: fadd double %{{.*}}, %{{.*}}
+ // CHECK: insertelement <2 x double> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_maskz_add_sd(__U,__A,__B);
}
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
@@ -2410,12 +2444,29 @@ __m128 test_mm_maskz_sub_round_ss(__mmas
}
__m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_sub_ss
- // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: fsub float %{{.*}}, %{{.*}}
+ // CHECK: insertelement <4 x float> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_mask_sub_ss(__W,__U,__A,__B);
}
__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_sub_ss
- // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: fsub float %{{.*}}, %{{.*}}
+ // CHECK: insertelement <4 x float> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_maskz_sub_ss(__U,__A,__B);
}
__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
@@ -2435,12 +2486,29 @@ __m128d test_mm_maskz_sub_round_sd(__mma
}
__m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_sub_sd
- // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: fsub double %{{.*}}, %{{.*}}
+ // CHECK: insertelement <2 x double> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_mask_sub_sd(__W,__U,__A,__B);
}
__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_sub_sd
- // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: fsub double %{{.*}}, %{{.*}}
+ // CHECK: insertelement <2 x double> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_maskz_sub_sd(__U,__A,__B);
}
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
@@ -2518,12 +2586,29 @@ __m128 test_mm_maskz_mul_round_ss(__mmas
}
__m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_mul_ss
- // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: fmul float %{{.*}}, %{{.*}}
+ // CHECK: insertelement <4 x float> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_mask_mul_ss(__W,__U,__A,__B);
}
__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_mul_ss
- // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+ // CHECK: fmul float %{{.*}}, %{{.*}}
+ // CHECK: insertelement <4 x float> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_maskz_mul_ss(__U,__A,__B);
}
__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
@@ -2543,12 +2628,29 @@ __m128d test_mm_maskz_mul_round_sd(__mma
}
__m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_mul_sd
- // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: fmul double %{{.*}}, %{{.*}}
+ // CHECK: insertelement <2 x double> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_mask_mul_sd(__W,__U,__A,__B);
}
__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_mul_sd
- // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+ // CHECK: fmul double %{{.*}}, %{{.*}}
+ // CHECK: insertelement <2 x double> {{.*}}, i32 0
+ // CHECK: and i32 {{.*}}, 1
+ // CHECK: icmp ne i32 %{{.*}}, 0
+ // CHECK: br {{.*}}, {{.*}}, {{.*}}
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_maskz_mul_sd(__U,__A,__B);
}
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
@@ -2636,12 +2738,27 @@ __m128 test_mm_maskz_div_round_ss(__mmas
}
__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_div_ss
- // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.div.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: fdiv float %{{.*}}, %{{.*}}
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK: select i1 %{{.*}}, float %{{.*}}, float %{{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_mask_div_ss(__W,__U,__A,__B);
}
__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_div_ss
- // CHECK: @llvm.x86.avx512.mask.div.ss.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.div.ss.round
+ // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK: fdiv float %{{.*}}, %{{.*}}
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK: select i1 %{{.*}}, float %{{.*}}, float %{{.*}}
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
return _mm_maskz_div_ss(__U,__A,__B);
}
__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
@@ -2661,12 +2778,27 @@ __m128d test_mm_maskz_div_round_sd(__mma
}
__m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_div_sd
- // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.div.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: fdiv double %{{.*}}, %{{.*}}
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK: select i1 %{{.*}}, double %{{.*}}, double %{{.*}}
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_mask_div_sd(__W,__U,__A,__B);
}
__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_div_sd
- // CHECK: @llvm.x86.avx512.mask.div.sd.round
+ // CHECK-NOT: @llvm.x86.avx512.mask.div.sd.round
+ // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK: fdiv double %{{.*}}, %{{.*}}
+ // CHECK: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK: select i1 %{{.*}}, double %{{.*}}, double %{{.*}}
+ // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
return _mm_maskz_div_sd(__U,__A,__B);
}
__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
More information about the cfe-commits
mailing list