r336507 - [X86] Add new scalar fma intrinsics with rounding mode that use f32/f64 types.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Sat Jul 7 18:10:47 PDT 2018
Author: ctopper
Date: Sat Jul 7 18:10:47 2018
New Revision: 336507
URL: http://llvm.org/viewvc/llvm-project?rev=336507&view=rev
Log:
[X86] Add new scalar fma intrinsics with rounding mode that use f32/f64 types.
This allows us to handle masking in a very similar way to the default rounding version that uses llvm.fma
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512f-builtins.c
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=336507&r1=336506&r2=336507&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Jul 7 18:10:47 2018
@@ -8718,6 +8718,47 @@ static Value *EmitX86FMAExpr(CodeGenFunc
return Res;
}
+static Value *
+EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
+ Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
+ bool NegAcc = false) {
+ unsigned Rnd = 4;
+ if (Ops.size() > 4)
+ Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
+
+ if (NegAcc)
+ Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
+
+ Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
+ Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
+ Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
+ Value *Res;
+ if (Rnd != 4) {
+ Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
+ Intrinsic::x86_avx512_vfmadd_f32 :
+ Intrinsic::x86_avx512_vfmadd_f64;
+ Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ {Ops[0], Ops[1], Ops[2], Ops[4]});
+ } else {
+ Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
+ Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
+ }
+ // If we have more than 3 arguments, we need to do masking.
+ if (Ops.size() > 3) {
+ Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
+ : Ops[PTIdx];
+
+ // If we negated the accumulator and the its the PassThru value we need to
+ // bypass the negate. Conveniently Upper should be the same thing in this
+ // case.
+ if (NegAcc && PTIdx == 2)
+ PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
+
+ Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
+ }
+ return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
+}
+
static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
ArrayRef<Value *> Ops) {
llvm::Type *Ty = Ops[0]->getType();
@@ -9141,24 +9182,24 @@ Value *CodeGenFunction::EmitX86BuiltinEx
return EmitX86ConvertToMask(*this, Ops[0]);
case X86::BI__builtin_ia32_vfmaddss3:
- case X86::BI__builtin_ia32_vfmaddsd3: {
- Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType());
- Value *Res = Builder.CreateCall(FMA, {A, B, C} );
- return Builder.CreateInsertElement(Ops[0], Res, (uint64_t)0);
- }
+ case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0]);
case X86::BI__builtin_ia32_vfmaddss:
- case X86::BI__builtin_ia32_vfmaddsd: {
- Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
- Value *B = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
- Value *C = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
- Function *FMA = CGM.getIntrinsic(Intrinsic::fma, A->getType());
- Value *Res = Builder.CreateCall(FMA, {A, B, C} );
- Value *Zero = Constant::getNullValue(Ops[0]->getType());
- return Builder.CreateInsertElement(Zero, Res, (uint64_t)0);
- }
+ case X86::BI__builtin_ia32_vfmaddsd:
+ return EmitScalarFMAExpr(*this, Ops,
+ Constant::getNullValue(Ops[0]->getType()));
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
+ /*NegAcc*/true);
case X86::BI__builtin_ia32_vfmaddps:
case X86::BI__builtin_ia32_vfmaddpd:
case X86::BI__builtin_ia32_vfmaddps256:
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=336507&r1=336506&r2=336507&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sat Jul 7 18:10:47 2018
@@ -7232,13 +7232,24 @@ __m128 test_mm_mask_fmadd_ss(__m128 __W,
__m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7266,7 +7277,14 @@ __m128 test_mm_maskz_fmadd_ss(__mmask8 _
__m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7295,7 +7313,14 @@ __m128 test_mm_mask3_fmadd_ss(__m128 __W
__m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7325,13 +7350,26 @@ __m128 test_mm_mask_fmsub_ss(__m128 __W,
__m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7360,7 +7398,15 @@ __m128 test_mm_maskz_fmsub_ss(__mmask8 _
__m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7390,7 +7436,16 @@ __m128 test_mm_mask3_fmsub_ss(__m128 __W
__m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7420,13 +7475,26 @@ __m128 test_mm_mask_fnmadd_ss(__m128 __W
__m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fnmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7455,7 +7523,15 @@ __m128 test_mm_maskz_fnmadd_ss(__mmask8
__m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7485,7 +7561,15 @@ __m128 test_mm_mask3_fnmadd_ss(__m128 __
__m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7516,13 +7600,28 @@ __m128 test_mm_mask_fnmsub_ss(__m128 __W
__m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[FMA]], i64 0
return _mm_fnmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[A]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7552,7 +7651,16 @@ __m128 test_mm_maskz_fnmsub_ss(__mmask8
__m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float 0.000000e+00
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGA]], float [[SEL]], i64 0
return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7583,7 +7691,17 @@ __m128 test_mm_mask3_fnmsub_ss(__m128 __
__m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call float @llvm.x86.avx512.vfmadd.f32(float [[A]], float [[B]], float [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <4 x float> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, float [[FMA]], float [[C2]]
+ // CHECK-NEXT: insertelement <4 x float> [[ORIGC]], float [[SEL]], i64 0
return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7612,13 +7730,24 @@ __m128d test_mm_mask_fmadd_sd(__m128d __
__m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7646,7 +7775,14 @@ __m128d test_mm_maskz_fmadd_sd(__mmask8
__m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7675,7 +7811,14 @@ __m128d test_mm_mask3_fmadd_sd(__m128d _
__m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7705,13 +7848,26 @@ __m128d test_mm_mask_fmsub_sd(__m128d __
__m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7740,7 +7896,15 @@ __m128d test_mm_maskz_fmsub_sd(__mmask8
__m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7770,7 +7934,16 @@ __m128d test_mm_mask3_fmsub_sd(__m128d _
__m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7800,13 +7973,26 @@ __m128d test_mm_mask_fnmadd_sd(__m128d _
__m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fnmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7835,7 +8021,15 @@ __m128d test_mm_maskz_fnmadd_sd(__mmask8
__m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7865,7 +8059,15 @@ __m128d test_mm_mask3_fnmadd_sd(__m128d
__m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7896,13 +8098,28 @@ __m128d test_mm_mask_fnmsub_sd(__m128d _
__m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[FMA]], i64 0
return _mm_fnmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
__m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
// CHECK-LABEL: @test_mm_mask_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[A]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7932,7 +8149,16 @@ __m128d test_mm_maskz_fnmsub_sd(__mmask8
__m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double 0.000000e+00
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGA]], double [[SEL]], i64 0
return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
@@ -7963,7 +8189,17 @@ __m128d test_mm_mask3_fnmsub_sd(__m128d
__m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd
- // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 8)
+ // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+ // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ORIGC:%.+]]
+ // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0
+ // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0
+ // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
+ // CHECK-NEXT: [[FMA:%.+]] = call double @llvm.x86.avx512.vfmadd.f64(double [[A]], double [[B]], double [[C]], i32 8)
+ // CHECK-NEXT: [[C2:%.+]] = extractelement <2 x double> [[ORIGC]], i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, double [[FMA]], double [[C2]]
+ // CHECK-NEXT: insertelement <2 x double> [[ORIGC]], double [[SEL]], i64 0
return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
}
More information about the cfe-commits
mailing list