[llvm] 7273284 - [X86] Add back fmaddsub intrinsics to work towards fixing the strict fp implementation
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 12:07:39 PST 2020
Author: Craig Topper
Date: 2020-02-24T12:07:21-08:00
New Revision: 727328433ad61b8c7acdd4d63e73241303a6beb7
URL: https://github.com/llvm/llvm-project/commit/727328433ad61b8c7acdd4d63e73241303a6beb7
DIFF: https://github.com/llvm/llvm-project/commit/727328433ad61b8c7acdd4d63e73241303a6beb7.diff
LOG: [X86] Add back fmaddsub intrinsics to work towards fixing the strict fp implementation
Previously we emitted an fmadd and a fmadd+fneg and combined them with a shufflevector. But this doesn't follow the correct exception behavior for unselected elements so the backend can't merge them into the fmaddsub/fmsubadd instructions.
This patch restores the the fmaddsub intrinsics so we don't have two arithmetic operations. We lose out on optimization opportunity in the non-strict FP case, but I don't think this is a big loss. If someone gives us a test case we can look into adding instcombine/dagcombine improvements. I'd rather not have the frontend do completely different things for strict and non-strict.
This still has problems because target specific intrinsics don't support strict semantics yet. We also still have all of the problems with masking. But we at least generate the right instruction in constrained mode now.
Differential Revision: https://reviews.llvm.org/D74268
Added:
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/avx512f-builtins.c
clang/test/CodeGen/avx512vl-builtins.c
clang/test/CodeGen/fma-builtins-constrained.c
clang/test/CodeGen/fma-builtins.c
clang/test/CodeGen/fma4-builtins.c
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/IR/AutoUpgrade.cpp
llvm/lib/Target/X86/X86IntrinsicsInfo.h
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bda01c6598a0..db58738c2701 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10152,7 +10152,8 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
// Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
if (IID != Intrinsic::not_intrinsic &&
- cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
+ (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
+ IsAddSub)) {
Function *Intr = CGF.CGM.getIntrinsic(IID);
Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
} else {
@@ -10165,24 +10166,6 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
Res = CGF.Builder.CreateCall(FMA, {A, B, C});
}
-
- if (IsAddSub) {
- // Negate even elts in C using a mask.
- unsigned NumElts = Ty->getVectorNumElements();
- SmallVector<uint32_t, 16> Indices(NumElts);
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i + (i % 2) * NumElts;
-
- // FIXME: This code isn't exception safe for constrained FP. We need to
- // suppress exceptions on the unselected elements.
- Value *NegC = CGF.Builder.CreateFNeg(C);
- Value *FMSub;
- if (CGF.Builder.getIsFPConstrained())
- FMSub = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, NegC} );
- else
- FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
- Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
- }
}
// Handle any required masking.
@@ -10818,10 +10801,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
- case X86::BI__builtin_ia32_vfmaddsubps:
- case X86::BI__builtin_ia32_vfmaddsubpd:
- case X86::BI__builtin_ia32_vfmaddsubps256:
- case X86::BI__builtin_ia32_vfmaddsubpd256:
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c
index 9296108f01b1..bd15424af889 100644
--- a/clang/test/CodeGen/avx512f-builtins.c
+++ b/clang/test/CodeGen/avx512f-builtins.c
@@ -887,38 +887,30 @@ __m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __
}
__m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: @test_mm512_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
return _mm512_fmaddsub_pd(__A, __B, __C);
}
__m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
// CHECK-LABEL: @test_mm512_mask_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C);
}
__m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U);
}
__m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C);
@@ -926,17 +918,13 @@ __m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m
__m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: @test_mm512_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
return _mm512_fmsubadd_pd(__A, __B, __C);
}
__m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
// CHECK-LABEL: @test_mm512_mask_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C);
@@ -944,9 +932,7 @@ __m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m5
__m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C);
@@ -1001,38 +987,30 @@ __m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B,
}
__m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: @test_mm512_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK-NOT: fneg
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
return _mm512_fmaddsub_ps(__A, __B, __C);
}
__m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
// CHECK-LABEL: @test_mm512_mask_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK-NOT: fneg
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C);
}
__m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
// CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK-NOT: fneg
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U);
}
__m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK-NOT: fneg
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C);
@@ -1040,17 +1018,13 @@ __m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m51
__m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: @test_mm512_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
return _mm512_fmsubadd_ps(__A, __B, __C);
}
__m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
// CHECK-LABEL: @test_mm512_mask_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C);
@@ -1058,9 +1032,7 @@ __m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512
__m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
// CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C);
@@ -1108,9 +1080,7 @@ __m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C
__m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
- // CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U);
@@ -1126,9 +1096,7 @@ __m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __
__m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
// CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
- // CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U);
diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c
index 99c7dc61e448..5bed16ec1327 100644
--- a/clang/test/CodeGen/avx512vl-builtins.c
+++ b/clang/test/CodeGen/avx512vl-builtins.c
@@ -3140,10 +3140,8 @@ __m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 _
__m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
// CHECK-LABEL: @test_mm_mask_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3>
+ // CHECK-NOT: fneg
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
@@ -3152,10 +3150,8 @@ __m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d
__m128d test_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
// CHECK-LABEL: @test_mm_mask_fmsubadd_pd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3>
+ // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}}
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
@@ -3164,10 +3160,8 @@ __m128d test_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d
__m128d test_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm_mask3_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3>
+ // CHECK-NOT: fneg
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
@@ -3176,10 +3170,8 @@ __m128d test_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask
__m128d test_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
// CHECK-LABEL: @test_mm_maskz_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3>
+ // CHECK-NOT: fneg
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
@@ -3188,10 +3180,8 @@ __m128d test_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128
__m128d test_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
// CHECK-LABEL: @test_mm_maskz_fmsubadd_pd
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3>
+ // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}}
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
@@ -3200,10 +3190,8 @@ __m128d test_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128
__m256d test_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
// CHECK-LABEL: @test_mm256_mask_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
@@ -3212,10 +3200,8 @@ __m256d test_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m2
__m256d test_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
// CHECK-LABEL: @test_mm256_mask_fmsubadd_pd
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.*}}
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
@@ -3224,10 +3210,8 @@ __m256d test_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m2
__m256d test_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm256_mask3_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
@@ -3236,10 +3220,8 @@ __m256d test_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mm
__m256d test_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: @test_mm256_maskz_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
@@ -3248,10 +3230,8 @@ __m256d test_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m
__m256d test_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
// CHECK-LABEL: @test_mm256_maskz_fmsubadd_pd
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.*}}
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
@@ -3260,10 +3240,8 @@ __m256d test_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m
__m128 test_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK-LABEL: @test_mm_mask_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3272,10 +3250,8 @@ __m128 test_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C
__m128 test_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
// CHECK-LABEL: @test_mm_mask_fmsubadd_ps
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}}
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3284,10 +3260,8 @@ __m128 test_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C
__m128 test_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm_mask3_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3296,10 +3270,8 @@ __m128 test_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __
__m128 test_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
// CHECK-LABEL: @test_mm_maskz_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3308,10 +3280,8 @@ __m128 test_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __
__m128 test_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
// CHECK-LABEL: @test_mm_maskz_fmsubadd_ps
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}}
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3320,10 +3290,8 @@ __m128 test_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __
__m256 test_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
// CHECK-LABEL: @test_mm256_mask_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_fmaddsub_ps(__A, __U, __B, __C);
@@ -3332,9 +3300,7 @@ __m256 test_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256
__m256 test_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
// CHECK-LABEL: @test_mm256_mask_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_fmsubadd_ps(__A, __U, __B, __C);
@@ -3342,10 +3308,8 @@ __m256 test_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256
__m256 test_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm256_mask3_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask3_fmaddsub_ps(__A, __B, __C, __U);
@@ -3353,10 +3317,8 @@ __m256 test_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8
__m256 test_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: @test_mm256_maskz_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_fmaddsub_ps(__U, __A, __B, __C);
@@ -3365,9 +3327,7 @@ __m256 test_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256
__m256 test_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
// CHECK-LABEL: @test_mm256_maskz_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_fmsubadd_ps(__U, __A, __B, __C);
@@ -3415,9 +3375,7 @@ __m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __
__m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm_mask3_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3>
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
@@ -3427,9 +3385,7 @@ __m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask
__m256d test_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm256_mask3_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
@@ -3439,9 +3395,7 @@ __m256d test_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mm
__m128 test_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm_mask3_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -3451,9 +3405,7 @@ __m128 test_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __
__m256 test_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
// CHECK-LABEL: @test_mm256_mask3_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask3_fmsubadd_ps(__A, __B, __C, __U);
diff --git a/clang/test/CodeGen/fma-builtins-constrained.c b/clang/test/CodeGen/fma-builtins-constrained.c
index fe5b946fe640..6b299278b108 100644
--- a/clang/test/CodeGen/fma-builtins-constrained.c
+++ b/clang/test/CodeGen/fma-builtins-constrained.c
@@ -184,53 +184,33 @@ __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
// COMMON-LABEL: test_mm_fmaddsub_ps
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
- // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
- // CONSTRAINED: [[SUB:%.+]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]], metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmaddsub213ps
- // CHECK-ASM-CONSTRAINED-NOT: vfmaddsub213ps
- // COMMONIR: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // COMMONIR-NOT: fneg
+ // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK-ASM: vfmaddsub213ps
return _mm_fmaddsub_ps(a, b, c);
}
__m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
// COMMON-LABEL: test_mm_fmaddsub_pd
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
- // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
- // CONSTRAINED: [[SUB:%.+]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]], metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmaddsub213pd
- // CHECK-ASM-CONSTRAINED-NOT: vfmaddsub213pd
- // COMMONIR: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3>
+ // COMMONIR-NOT: fneg
+ // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+ // CHECK-ASM: vfmaddsub213pd
return _mm_fmaddsub_pd(a, b, c);
}
__m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
// COMMON-LABEL: test_mm_fmsubadd_ps
- // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CONSTRAINED: [[SUB:%.+]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]], metadata !{{.*}})
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmsubadd213ps
- // CHECK-ASM-CONSTRAINED-NOT: vfmsubadd213ps
- // COMMONIR: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // COMMONIR: [[FNEG:%.+]] = fneg <4 x float> %{{.*}}
+ // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[FNEG]])
+ // CHECK-ASM: vfmsubadd213ps
return _mm_fmsubadd_ps(a, b, c);
}
__m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
// COMMON-LABEL: test_mm_fmsubadd_pd
- // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
- // CONSTRAINED: [[SUB:%.+]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]], metadata !{{.*}})
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmsubadd213pd
- // CHECK-ASM-CONSTRAINED-NOT: vfmsubadd213pd
- // COMMONIR: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3>
+ // COMMONIR: [[FNEG:%.+]] = fneg <2 x double> %{{.*}}
+ // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[FNEG]])
+ // CHECK-ASM: vfmsubadd213pd
return _mm_fmsubadd_pd(a, b, c);
}
@@ -308,52 +288,32 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
__m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
// COMMON-LABEL: test_mm256_fmaddsub_ps
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
- // CONSTRAINED: [[SUB:%.+]] = tail call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]], metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmaddsub213ps
- // CHECK-ASM-CONSTRAINED-NOT: vfmaddsub213ps
- // COMMONIR: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // COMMONIR-NOT: fneg
+ // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
+ // CHECK-ASM: vfmaddsub213ps
return _mm256_fmaddsub_ps(a, b, c);
}
__m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
// COMMON-LABEL: test_mm256_fmaddsub_pd
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
- // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CONSTRAINED: [[SUB:%.+]] = tail call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmaddsub213pd
- // CHECK-ASM-CONSTRAINED-NOT: vfmaddsub213pd
- // COMMONIR: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // COMMONIR-NOT: fneg
+ // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
+ // CHECK-ASM: vfmaddsub213pd
return _mm256_fmaddsub_pd(a, b, c);
}
__m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
// COMMON-LABEL: test_mm256_fmsubadd_ps
- // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
- // CONSTRAINED: [[SUB:%.+]] = tail call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]], metadata !{{.*}})
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmsubadd213ps
- // CHECK-ASM-CONSTRAINED-NOT: vfmsubadd213ps
- // COMMONIR: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // COMMONIR: [[FNEG:%.+]] = fneg <8 x float> %{{.*}}
+ // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[FNEG]])
+ // CHECK-ASM: vfmsubadd213ps
return _mm256_fmsubadd_ps(a, b, c);
}
__m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
// COMMON-LABEL: test_mm256_fmsubadd_pd
- // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // UNCONSTRAINED: [[SUB:%.+]] = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
- // CONSTRAINED: [[SUB:%.+]] = tail call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]], metadata !{{.*}})
- // UNCONSTRAINED: [[ADD:%.+]] = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CONSTRAINED: [[ADD:%.+]] = tail call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}})
- // CHECK-ASM-UNCONSTRAINED: vfmsubadd213pd
- // CHECK-ASM-CONSTRAINED-NOT: vfmsubadd213pd
- // COMMONIR: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // COMMONIR: [[FNEG:%.+]] = fneg <4 x double> %{{.*}}
+ // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[FNEG]])
+ // CHECK-ASM: vfmsubadd213pd
return _mm256_fmsubadd_pd(a, b, c);
}
diff --git a/clang/test/CodeGen/fma-builtins.c b/clang/test/CodeGen/fma-builtins.c
index a21f536c1ed9..4686b3686a4f 100644
--- a/clang/test/CodeGen/fma-builtins.c
+++ b/clang/test/CodeGen/fma-builtins.c
@@ -149,37 +149,29 @@ __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_fmaddsub_ps(a, b, c);
}
__m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3>
+ // CHECK-NOT: fneg
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_fmaddsub_pd(a, b, c);
}
__m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_fmsubadd_ps
// CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
return _mm_fmsubadd_ps(a, b, c);
}
__m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3>
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
return _mm_fmsubadd_pd(a, b, c);
}
@@ -241,36 +233,28 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
__m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_fmaddsub_ps
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_fmaddsub_ps(a, b, c);
}
__m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_fmaddsub_pd
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_fmaddsub_pd(a, b, c);
}
__m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_fmsubadd_ps
- // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.+}}
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
return _mm256_fmsubadd_ps(a, b, c);
}
__m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_fmsubadd_pd
// CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
return _mm256_fmsubadd_pd(a, b, c);
}
diff --git a/clang/test/CodeGen/fma4-builtins.c b/clang/test/CodeGen/fma4-builtins.c
index 7dc769afbf72..274ca481fabf 100644
--- a/clang/test/CodeGen/fma4-builtins.c
+++ b/clang/test/CodeGen/fma4-builtins.c
@@ -149,37 +149,29 @@ __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
__m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_maddsub_ps
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: shufflevector <4 x float> [[SUB]], <4 x float> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_maddsub_ps(a, b, c);
}
__m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_maddsub_pd
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: shufflevector <2 x double> [[SUB]], <2 x double> [[ADD]], <2 x i32> <i32 0, i32 3>
+ // CHECK-NOT: fneg
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_maddsub_pd(a, b, c);
}
__m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
// CHECK-LABEL: test_mm_msubadd_ps
// CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
- // CHECK: shufflevector <4 x float> [[ADD]], <4 x float> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
return _mm_msubadd_ps(a, b, c);
}
__m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
// CHECK-LABEL: test_mm_msubadd_pd
// CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
- // CHECK: shufflevector <2 x double> [[ADD]], <2 x double> [[SUB]], <2 x i32> <i32 0, i32 3>
+ // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
return _mm_msubadd_pd(a, b, c);
}
@@ -241,36 +233,28 @@ __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
__m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_maddsub_ps
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: shufflevector <8 x float> [[SUB]], <8 x float> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK-NOT: fneg
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_maddsub_ps(a, b, c);
}
__m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_maddsub_pd
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[SUB]], <4 x double> [[ADD]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK-NOT: fneg
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_maddsub_pd(a, b, c);
}
__m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
// CHECK-LABEL: test_mm256_msubadd_ps
// CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
- // CHECK: [[SUB:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
- // CHECK: shufflevector <8 x float> [[ADD]], <8 x float> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
return _mm256_msubadd_ps(a, b, c);
}
__m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
// CHECK-LABEL: test_mm256_msubadd_pd
// CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}}
- // CHECK: [[SUB:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]
- // CHECK: [[ADD:%.+]] = call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
- // CHECK: shufflevector <4 x double> [[ADD]], <4 x double> [[SUB]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
return _mm256_msubadd_pd(a, b, c);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5796686dd79f..94a1ac987da2 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -1787,6 +1787,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FMA3 and FMA4
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+ [IntrNoMem]>;
+ def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+ [IntrNoMem]>;
+ def int_x86_fma_vfmaddsub_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+ [IntrNoMem]>;
+ def int_x86_fma_vfmaddsub_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+ [IntrNoMem]>;
+
def int_x86_avx512_vfmadd_pd_512 :
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
@@ -1797,7 +1816,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<3>]>;
- // TODO: Can we use 2 vfmadds+shufflevector?
def int_x86_avx512_vfmaddsub_pd_512 :
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index faea5b047722..f6e4a6b85199 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -99,7 +99,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("fma4.vfmadd.s") || // Added in 7.0
Name.startswith("fma.vfmadd.") || // Added in 7.0
Name.startswith("fma.vfmsub.") || // Added in 7.0
- Name.startswith("fma.vfmaddsub.") || // Added in 7.0
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
Name.startswith("fma.vfnmadd.") || // Added in 7.0
Name.startswith("fma.vfnmsub.") || // Added in 7.0
@@ -3212,28 +3211,26 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
- } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
- Name.startswith("fma.vfmsubadd.p"))) {
- bool IsSubAdd = Name[7] == 's';
- int NumElts = CI->getType()->getVectorNumElements();
+ } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
+ unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
+ unsigned EltWidth = CI->getType()->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_fma_vfmaddsub_ps;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_fma_vfmaddsub_pd;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
+ else
+ llvm_unreachable("Unexpected intrinsic");
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
-
- Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
- Ops[0]->getType());
- Value *Odd = Builder.CreateCall(FMA, Ops);
Ops[2] = Builder.CreateFNeg(Ops[2]);
- Value *Even = Builder.CreateCall(FMA, Ops);
-
- if (IsSubAdd)
- std::swap(Even, Odd);
-
- SmallVector<uint32_t, 32> Idxs(NumElts);
- for (int i = 0; i != NumElts; ++i)
- Idxs[i] = i + (i % 2) * NumElts;
-
- Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ Ops);
} else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
Name.startswith("avx512.mask3.vfmaddsub.p") ||
Name.startswith("avx512.maskz.vfmaddsub.p") ||
@@ -3243,9 +3240,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Drop the "avx512.mask." to make it easier.
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
bool IsSubAdd = Name[3] == 's';
- if (CI->getNumArgOperands() == 5 &&
- (!isa<ConstantInt>(CI->getArgOperand(4)) ||
- cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
+ if (CI->getNumArgOperands() == 5) {
Intrinsic::ID IID;
// Check the character before ".512" in string.
if (Name[Name.size()-5] == 's')
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index b3080a178dd3..669b6889525a 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -997,6 +997,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
More information about the llvm-commits
mailing list