[llvm] r336383 - [X86] Remove the last of the 'x86.fma.' intrinsics and autoupgrade them to 'llvm.fma'. Add upgrade tests for all.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 5 11:43:58 PDT 2018
Author: ctopper
Date: Thu Jul 5 11:43:58 2018
New Revision: 336383
URL: http://llvm.org/viewvc/llvm-project?rev=336383&view=rev
Log:
[X86] Remove the last of the 'x86.fma.' intrinsics and autoupgrade them to 'llvm.fma'. Add upgrade tests for all.
Still need to remove the AVX512 masked versions.
Added:
llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll
- copied, changed from r336376, llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll
- copied, changed from r336376, llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=336383&r1=336382&r2=336383&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Jul 5 11:43:58 2018
@@ -1912,23 +1912,6 @@ let TargetPrefix = "x86" in { // All in
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
- def int_x86_fma_vfmaddsub_ps : // FIXME: remove this intrinsic.
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
- [IntrNoMem]>;
- def int_x86_fma_vfmaddsub_pd : // FIXME: remove this intrinsic.
- Intrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
- [IntrNoMem]>;
- def int_x86_fma_vfmaddsub_ps_256 : // FIXME: remove this intrinsic.
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
- [IntrNoMem]>;
- def int_x86_fma_vfmaddsub_pd_256 : // FIXME: remove this intrinsic.
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
- [IntrNoMem]>;
-
def int_x86_avx512_mask_vfmadd_pd_128 : // FIXME: remove this intrinsic.
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=336383&r1=336382&r2=336383&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Jul 5 11:43:58 2018
@@ -76,6 +76,7 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name=="ssse3.pabs.d.128" || // Added in 6.0
Name.startswith("fma.vfmadd.") || // Added in 7.0
Name.startswith("fma.vfmsub.") || // Added in 7.0
+ Name.startswith("fma.vfmaddsub.") || // Added in 7.0
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
Name.startswith("fma.vfnmadd.") || // Added in 7.0
Name.startswith("fma.vfnmsub.") || // Added in 7.0
@@ -2778,25 +2779,30 @@ void llvm::UpgradeIntrinsicCall(CallInst
if (IsScalar)
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
(uint64_t)0);
- } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
- // Handle FSUBADD.
- unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
- unsigned EltWidth = CI->getType()->getScalarSizeInBits();
- Intrinsic::ID IID;
- if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_fma_vfmaddsub_ps;
- else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_fma_vfmaddsub_pd;
- else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
- else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
- else
- llvm_unreachable("Unexpected intrinsic");
- Value *Arg2 = Builder.CreateFNeg(CI->getArgOperand(2));
- Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), Arg2 };
- Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
- Ops);
+ } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
+ Name.startswith("fma.vfmsubadd.p"))) {
+ bool IsSubAdd = Name[7] == 's';
+ int NumElts = CI->getType()->getVectorNumElements();
+
+
+
+ Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2) };
+
+ Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
+ Ops[0]->getType());
+ Value *Odd = Builder.CreateCall(FMA, Ops);
+ Ops[2] = Builder.CreateFNeg(Ops[2]);
+ Value *Even = Builder.CreateCall(FMA, Ops);
+
+ if (IsSubAdd)
+ std::swap(Even, Odd);
+
+ SmallVector<uint32_t, 32> Idxs(NumElts);
+ for (int i = 0; i != NumElts; ++i)
+ Idxs[i] = i + (i % 2) * NumElts;
+
+ Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
Name.startswith("avx512.maskz.pternlog."))) {
bool ZeroMask = Name[11] == 'z';
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=336383&r1=336382&r2=336383&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Thu Jul 5 11:43:58 2018
@@ -1249,10 +1249,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
- X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
- X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(fma4_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
X86_INTRINSIC_DATA(fma4_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
Copied: llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll (from r336376, llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll?p2=llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll&p1=llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll&r1=336376&r2=336383&rev=336383&view=diff
==============================================================================
(empty)
Modified: llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll?rev=336383&r1=336382&r2=336383&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll Thu Jul 5 11:43:58 2018
@@ -25,8 +25,12 @@ define <4 x float> @test_x86_fma_vfmadd_
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a0, i64 0
+ %2 = extractelement <4 x float> %a1, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = insertelement <4 x float> %a0, float %4, i64 0
+ ret <4 x float> %5
}
define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -52,10 +56,13 @@ define <4 x float> @test_x86_fma_vfmadd_
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a1, i64 0
+ %2 = extractelement <4 x float> %a0, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = insertelement <4 x float> %a1, float %4, i64 0
+ ret <4 x float> %5
}
-declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
@@ -78,8 +85,12 @@ define <2 x double> @test_x86_fma_vfmadd
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a0, i64 0
+ %2 = extractelement <2 x double> %a1, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = insertelement <2 x double> %a0, double %4, i64 0
+ ret <2 x double> %5
}
define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
@@ -105,10 +116,13 @@ define <2 x double> @test_x86_fma_vfmadd
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a1, i64 0
+ %2 = extractelement <2 x double> %a0, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = insertelement <2 x double> %a1, double %4, i64 0
+ ret <2 x double> %5
}
-declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
@@ -130,10 +144,9 @@ define <4 x float> @test_x86_fma_vfmadd_
; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+ ret <4 x float> %1
}
-declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
@@ -155,10 +168,9 @@ define <2 x double> @test_x86_fma_vfmadd
; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+ ret <2 x double> %1
}
-declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
@@ -180,10 +192,9 @@ define <8 x float> @test_x86_fma_vfmadd_
; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+ ret <8 x float> %1
}
-declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
@@ -205,10 +216,9 @@ define <4 x double> @test_x86_fma_vfmadd
; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+ ret <4 x double> %1
}
-declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFMSUB
define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -232,8 +242,13 @@ define <4 x float> @test_x86_fma_vfmsub_
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a0, i64 0
+ %2 = extractelement <4 x float> %a1, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = fsub float -0.000000e+00, %3
+ %5 = call float @llvm.fma.f32(float %1, float %2, float %4)
+ %6 = insertelement <4 x float> %a0, float %5, i64 0
+ ret <4 x float> %6
}
define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -259,10 +274,14 @@ define <4 x float> @test_x86_fma_vfmsub_
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a1, i64 0
+ %2 = extractelement <4 x float> %a0, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = fsub float -0.000000e+00, %3
+ %5 = call float @llvm.fma.f32(float %1, float %2, float %4)
+ %6 = insertelement <4 x float> %a1, float %5, i64 0
+ ret <4 x float> %6
}
-declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
@@ -285,8 +304,13 @@ define <2 x double> @test_x86_fma_vfmsub
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a0, i64 0
+ %2 = extractelement <2 x double> %a1, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = fsub double -0.000000e+00, %3
+ %5 = call double @llvm.fma.f64(double %1, double %2, double %4)
+ %6 = insertelement <2 x double> %a0, double %5, i64 0
+ ret <2 x double> %6
}
define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
@@ -312,10 +336,14 @@ define <2 x double> @test_x86_fma_vfmsub
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a1, i64 0
+ %2 = extractelement <2 x double> %a0, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = fsub double -0.000000e+00, %3
+ %5 = call double @llvm.fma.f64(double %1, double %2, double %4)
+ %6 = insertelement <2 x double> %a1, double %5, i64 0
+ ret <2 x double> %6
}
-declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
@@ -337,10 +365,10 @@ define <4 x float> @test_x86_fma_vfmsub_
; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
+ ret <4 x float> %2
}
-declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
@@ -362,10 +390,10 @@ define <2 x double> @test_x86_fma_vfmsub
; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
+ ret <2 x double> %2
}
-declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
@@ -387,10 +415,10 @@ define <8 x float> @test_x86_fma_vfmsub_
; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
+ ret <8 x float> %2
}
-declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
@@ -412,10 +440,10 @@ define <4 x double> @test_x86_fma_vfmsub
; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
+ ret <4 x double> %2
}
-declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFNMADD
define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -439,8 +467,13 @@ define <4 x float> @test_x86_fma_vfnmadd
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a0, i64 0
+ %2 = extractelement <4 x float> %a1, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = fsub float -0.000000e+00, %2
+ %5 = call float @llvm.fma.f32(float %1, float %4, float %3)
+ %6 = insertelement <4 x float> %a0, float %5, i64 0
+ ret <4 x float> %6
}
define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -466,10 +499,14 @@ define <4 x float> @test_x86_fma_vfnmadd
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a1, i64 0
+ %2 = extractelement <4 x float> %a0, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = fsub float -0.000000e+00, %2
+ %5 = call float @llvm.fma.f32(float %1, float %4, float %3)
+ %6 = insertelement <4 x float> %a1, float %5, i64 0
+ ret <4 x float> %6
}
-declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
@@ -492,8 +529,13 @@ define <2 x double> @test_x86_fma_vfnmad
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a0, i64 0
+ %2 = extractelement <2 x double> %a1, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = fsub double -0.000000e+00, %2
+ %5 = call double @llvm.fma.f64(double %1, double %4, double %3)
+ %6 = insertelement <2 x double> %a0, double %5, i64 0
+ ret <2 x double> %6
}
define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
@@ -519,10 +561,14 @@ define <2 x double> @test_x86_fma_vfnmad
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a1, i64 0
+ %2 = extractelement <2 x double> %a0, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = fsub double -0.000000e+00, %2
+ %5 = call double @llvm.fma.f64(double %1, double %4, double %3)
+ %6 = insertelement <2 x double> %a1, double %5, i64 0
+ ret <2 x double> %6
}
-declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
@@ -544,10 +590,10 @@ define <4 x float> @test_x86_fma_vfnmadd
; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
+ ret <4 x float> %2
}
-declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
@@ -569,10 +615,10 @@ define <2 x double> @test_x86_fma_vfnmad
; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
+ ret <2 x double> %2
}
-declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
@@ -594,10 +640,10 @@ define <8 x float> @test_x86_fma_vfnmadd
; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
+ ret <8 x float> %2
}
-declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
@@ -619,10 +665,10 @@ define <4 x double> @test_x86_fma_vfnmad
; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
+ ret <4 x double> %2
}
-declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFNMSUB
define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -646,8 +692,14 @@ define <4 x float> @test_x86_fma_vfnmsub
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a0, i64 0
+ %2 = extractelement <4 x float> %a1, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = fsub float -0.000000e+00, %2
+ %5 = fsub float -0.000000e+00, %3
+ %6 = call float @llvm.fma.f32(float %1, float %4, float %5)
+ %7 = insertelement <4 x float> %a0, float %6, i64 0
+ ret <4 x float> %7
}
define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -673,10 +725,15 @@ define <4 x float> @test_x86_fma_vfnmsub
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = extractelement <4 x float> %a1, i64 0
+ %2 = extractelement <4 x float> %a0, i64 0
+ %3 = extractelement <4 x float> %a2, i64 0
+ %4 = fsub float -0.000000e+00, %2
+ %5 = fsub float -0.000000e+00, %3
+ %6 = call float @llvm.fma.f32(float %1, float %4, float %5)
+ %7 = insertelement <4 x float> %a1, float %6, i64 0
+ ret <4 x float> %7
}
-declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
@@ -699,8 +756,14 @@ define <2 x double> @test_x86_fma_vfnmsu
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a0, i64 0
+ %2 = extractelement <2 x double> %a1, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = fsub double -0.000000e+00, %2
+ %5 = fsub double -0.000000e+00, %3
+ %6 = call double @llvm.fma.f64(double %1, double %4, double %5)
+ %7 = insertelement <2 x double> %a0, double %6, i64 0
+ ret <2 x double> %7
}
define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
@@ -726,10 +789,15 @@ define <2 x double> @test_x86_fma_vfnmsu
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = extractelement <2 x double> %a1, i64 0
+ %2 = extractelement <2 x double> %a0, i64 0
+ %3 = extractelement <2 x double> %a2, i64 0
+ %4 = fsub double -0.000000e+00, %2
+ %5 = fsub double -0.000000e+00, %3
+ %6 = call double @llvm.fma.f64(double %1, double %4, double %5)
+ %7 = insertelement <2 x double> %a1, double %6, i64 0
+ ret <2 x double> %7
}
-declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
@@ -751,10 +819,11 @@ define <4 x float> @test_x86_fma_vfnmsub
; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
+ ret <4 x float> %3
}
-declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
@@ -776,10 +845,11 @@ define <2 x double> @test_x86_fma_vfnmsu
; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
+ ret <2 x double> %3
}
-declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
@@ -801,10 +871,11 @@ define <8 x float> @test_x86_fma_vfnmsub
; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
+ ret <8 x float> %3
}
-declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
@@ -826,10 +897,11 @@ define <4 x double> @test_x86_fma_vfnmsu
; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
+ ret <4 x double> %3
}
-declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFMADDSUB
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -852,10 +924,12 @@ define <4 x float> @test_x86_fma_vfmadds
; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+ %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
+ %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x float> %4
}
-declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
@@ -877,10 +951,12 @@ define <2 x double> @test_x86_fma_vfmadd
; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+ %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
+ %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %4
}
-declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
@@ -902,10 +978,12 @@ define <8 x float> @test_x86_fma_vfmadds
; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+ %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
+ %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ ret <8 x float> %4
}
-declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
@@ -927,10 +1005,12 @@ define <4 x double> @test_x86_fma_vfmadd
; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+ %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
+ %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x double> %4
}
-declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFMSUBADD
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -953,10 +1033,12 @@ define <4 x float> @test_x86_fma_vfmsuba
; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+ %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
+ %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x float> %4
}
-declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
@@ -978,10 +1060,12 @@ define <2 x double> @test_x86_fma_vfmsub
; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+ %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
+ %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %4
}
-declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
@@ -1003,10 +1087,12 @@ define <8 x float> @test_x86_fma_vfmsuba
; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+ %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
+ %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ ret <8 x float> %4
}
-declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
@@ -1028,9 +1114,18 @@ define <4 x double> @test_x86_fma_vfmsub
; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem
; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+ %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
+ %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x double> %4
}
-declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
attributes #0 = { nounwind }
Copied: llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll (from r336376, llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll?p2=llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll&p1=llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll&r1=336376&r2=336383&rev=336383&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86-upgrade.ll Thu Jul 5 11:43:58 2018
@@ -2,45 +2,6 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
-; VFMADD
-define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
-
-define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
-
define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
; CHECK: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll?rev=336383&r1=336382&r2=336383&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll Thu Jul 5 11:43:58 2018
@@ -46,40 +46,36 @@ define <4 x float> @test_x86_fma_vfmadd_
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+ ret <4 x float> %1
}
-declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+ ret <2 x double> %1
}
-declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+ ret <8 x float> %1
}
-declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+ ret <4 x double> %1
}
-declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFMSUB
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -87,40 +83,40 @@ define <4 x float> @test_x86_fma_vfmsub_
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
+ ret <4 x float> %2
}
-declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
+ ret <2 x double> %2
}
-declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
+ ret <8 x float> %2
}
-declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
+ ret <4 x double> %2
}
-declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFNMADD
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -128,40 +124,40 @@ define <4 x float> @test_x86_fma_vfnmadd
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
+ ret <4 x float> %2
}
-declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
+ ret <2 x double> %2
}
-declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
+ ret <8 x float> %2
}
-declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
+ ret <4 x double> %2
}
-declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFNMSUB
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -169,40 +165,44 @@ define <4 x float> @test_x86_fma_vfnmsub
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
+ ret <4 x float> %3
}
-declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
+ ret <2 x double> %3
}
-declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
+ %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
+ ret <8 x float> %3
}
-declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
+ %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
+ ret <4 x double> %3
}
-declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFMADDSUB
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -210,40 +210,48 @@ define <4 x float> @test_x86_fma_vfmadds
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+ %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
+ %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x float> %4
}
-declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+ %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
+ %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %4
}
-declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+ %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
+ %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ ret <8 x float> %4
}
-declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+ %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
+ %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x double> %4
}
-declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; VFMSUBADD
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -251,39 +259,52 @@ define <4 x float> @test_x86_fma_vfmsuba
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+ %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
+ %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x float> %4
}
-declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+ %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
+ %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %4
}
-declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+ %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
+ %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
+ %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ ret <8 x float> %4
}
-declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+ %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
+ %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
+ %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x double> %4
}
-declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #2
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list