[clang] b2497e5 - [PowerPC] Add generic fnmsub intrinsic
Qiu Chaofan via cfe-commits
cfe-commits at lists.llvm.org
Sun Mar 6 21:06:46 PST 2022
Author: Qiu Chaofan
Date: 2022-03-07T13:00:06+08:00
New Revision: b2497e54356d454d0e16d8f44086bf6db6aff0e3
URL: https://github.com/llvm/llvm-project/commit/b2497e54356d454d0e16d8f44086bf6db6aff0e3
DIFF: https://github.com/llvm/llvm-project/commit/b2497e54356d454d0e16d8f44086bf6db6aff0e3.diff
LOG: [PowerPC] Add generic fnmsub intrinsic
Currently in Clang, we have two types of builtins for fnmsub operation:
one for float/double vector, they'll be transformed into IR operations;
one for float/double scalar, they'll generate corresponding intrinsics.
But for the vector version of builtin, the 3 op chain may be recognized
as expensive by some passes (like early cse). We need some way to keep
the fnmsub form until code generation.
This patch introduces ppc.fnmsub.* intrinsic to unify four fnmsub
intrinsics.
Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D116015
Added:
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/PowerPC/builtins-ppc-fma.c
clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c
clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5b917ead9cd9c..acbeac326ece6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15778,6 +15778,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
}
// FMA variations
+ case PPC::BI__builtin_ppc_fnmsub:
+ case PPC::BI__builtin_ppc_fnmsubs:
case PPC::BI__builtin_vsx_xvmaddadp:
case PPC::BI__builtin_vsx_xvmaddasp:
case PPC::BI__builtin_vsx_xvnmaddadp:
@@ -15816,6 +15818,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
F, {X, Y, Builder.CreateFNeg(Z, "neg")});
else
return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+ case PPC::BI__builtin_ppc_fnmsub:
+ case PPC::BI__builtin_ppc_fnmsubs:
case PPC::BI__builtin_vsx_xvnmsubadp:
case PPC::BI__builtin_vsx_xvnmsubasp:
if (Builder.getIsFPConstrained())
@@ -15824,10 +15828,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
"neg");
else
- return Builder.CreateFNeg(
- Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
- "neg");
- }
+ return Builder.CreateCall(
+ CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
+ }
llvm_unreachable("Unknown FMA operation");
return nullptr; // Suppress no-return warning
}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c
index 3f124e8c8299c..111302337954b 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c
@@ -32,12 +32,8 @@ void test_fma(void) {
// CHECK: <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]])
vf = __builtin_vsx_xvnmsubasp(vf, vf, vf);
- // CHECK: [[RESULT:%[^ ]+]] = fneg <4 x float> %{{.*}}
- // CHECK: [[RESULT2:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT]])
- // CHECK: fneg <4 x float> [[RESULT2]]
+ // CHECK: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
vd = __builtin_vsx_xvnmsubadp(vd, vd, vd);
- // CHECK: [[RESULT:%[^ ]+]] = fneg <2 x double> %{{.*}}
- // CHECK: [[RESULT2:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]])
- // CHECK: fneg <2 x double> [[RESULT2]]
+ // CHECK: call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c
index 909210996064c..f09ba841e2202 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c
@@ -142,9 +142,7 @@ void test_float(void) {
vf = __builtin_vsx_xvnmsubasp(vf, vf, vf);
// CHECK-LABEL: try-xvnmsubasp
- // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}}
- // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]])
- // CHECK-UNCONSTRAINED: fneg <4 x float> [[RESULT1]]
+ // CHECK-UNCONSTRAINED: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
// CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}}
// CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK-CONSTRAINED: fneg <4 x float> [[RESULT1]]
@@ -152,9 +150,7 @@ void test_float(void) {
vd = __builtin_vsx_xvnmsubadp(vd, vd, vd);
// CHECK-LABEL: try-xvnmsubadp
- // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}}
- // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]])
- // CHECK-UNCONSTRAINED: fneg <2 x double> [[RESULT1]]
+ // CHECK-UNCONSTRAINED: call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
// CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}}
// CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK-CONSTRAINED: fneg <2 x double> [[RESULT1]]
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
index b0028e9737730..ea0e49ea7edc9 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
@@ -894,20 +894,12 @@ void test1() {
// CHECK-LE-NEXT: fneg <2 x double> %[[FM]]
res_vf = vec_nmsub(vf, vf, vf);
-// CHECK: fneg <4 x float> %{{[0-9]+}}
-// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
-// CHECK: fneg <4 x float> %{{[0-9]+}}
-// CHECK-LE: fneg <4 x float> %{{[0-9]+}}
-// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
-// CHECK-LE: fneg <4 x float> %{{[0-9]+}}
+// CHECK: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+// CHECK-LE: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
res_vd = vec_nmsub(vd, vd, vd);
-// CHECK: fneg <2 x double> %{{[0-9]+}}
-// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
-// CHECK-NEXT: fneg <2 x double> %[[FM]]
-// CHECK-LE: fneg <2 x double> %{{[0-9]+}}
-// CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
-// CHECK-LE-NEXT: fneg <2 x double> %[[FM]]
+// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+// CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
/* vec_nor */
res_vsll = vec_nor(vsll, vsll);
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c
index 47b81660a91ef..0f9322bad456d 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c
@@ -95,10 +95,11 @@ float fnmadds (float f) {
// CHECK-LABEL: @fnmsub(
// CHECK: [[D_ADDR:%.*]] = alloca double, align 8
// CHECK-NEXT: store double [[D:%.*]], double* [[D_ADDR]], align 8
+// CHECK-COUNT-3: load double, double* [[D_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[D_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.ppc.fnmsub(double [[TMP0]], double [[TMP1]], double [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.ppc.fnmsub.f64(double [[TMP0]], double [[TMP1]], double [[TMP2]])
// CHECK-NEXT: ret double [[TMP3]]
//
double fnmsub (double d) {
@@ -108,10 +109,11 @@ double fnmsub (double d) {
// CHECK-LABEL: @fnmsubs(
// CHECK: [[F_ADDR:%.*]] = alloca float, align 4
// CHECK-NEXT: store float [[F:%.*]], float* [[F_ADDR]], align 4
+// CHECK-COUNT-3: load float, float* [[F_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[F_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[F_ADDR]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.ppc.fnmsubs(float [[TMP0]], float [[TMP1]], float [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.ppc.fnmsub.f32(float [[TMP0]], float [[TMP1]], float [[TMP2]])
// CHECK-NEXT: ret float [[TMP3]]
//
float fnmsubs (float f) {
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index b01fa10763b83..44d2d52705d02 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1722,15 +1722,9 @@ let TargetPrefix = "ppc" in {
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
def int_ppc_fnmsub
- : GCCBuiltin<"__builtin_ppc_fnmsub">,
- Intrinsic <[llvm_double_ty],
- [llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem]>;
- def int_ppc_fnmsubs
- : GCCBuiltin<"__builtin_ppc_fnmsubs">,
- Intrinsic <[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem]>;
+ : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
def int_ppc_fre
: GCCBuiltin<"__builtin_ppc_fre">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 7910ba899993b..7b44ec8b39b92 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -627,6 +627,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
@@ -10549,6 +10551,16 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
0);
}
+ case Intrinsic::ppc_fnmsub: {
+ EVT VT = Op.getOperand(1).getValueType();
+ if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
+ return DAG.getNode(
+ ISD::FNEG, dl, VT,
+ DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
+ DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
+ return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ }
case Intrinsic::ppc_convert_f128_to_ppcf128:
case Intrinsic::ppc_convert_ppcf128_to_f128: {
RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
@@ -11220,6 +11232,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
N->getOperand(2), N->getOperand(1)));
break;
+ case Intrinsic::ppc_fnmsub:
case Intrinsic::ppc_convert_f128_to_ppcf128:
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
break;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 4cbcf8acbd59c..961f9e3d51c7e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3728,8 +3728,6 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
// XL Compat intrinsics.
def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (FMSUB $A, $B, $C)>;
def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (FMSUBS $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (FNMSUB $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (FNMSUBS $A, $B, $C)>;
def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (FNMADD $A, $B, $C)>;
def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (FNMADDS $A, $B, $C)>;
def : Pat<(int_ppc_fre f64:$A), (FRE $A)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 110f7d79fbc55..d33593365691f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2897,7 +2897,6 @@ def : Pat<(v2i64 (PPCvcmp_rec v2i64:$vA, v2i64:$vB, 199)),
// XL Compat builtins.
def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (XSNMSUBMDP $A, $B, $C)>;
def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>;
def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>;
def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>;
@@ -3311,7 +3310,6 @@ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
// XL Compat builtins.
def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (XSMSUBMSP $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (XSNMSUBMSP $A, $B, $C)>;
def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (XSNMADDMSP $A, $B, $C)>;
def : Pat<(int_ppc_fres f32:$A), (XSRESP $A)>;
def : Pat<(i32 (int_ppc_extract_exp f64:$A)),
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll
index d67b8793871f4..443f65ce1cd6c 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll
@@ -98,49 +98,104 @@ entry:
declare float @llvm.ppc.fnmadds(float, float, float)
-define dso_local double @fnmsub_t0(double %d, double %d2, double %d3) {
-; CHECK-PWR8-LABEL: fnmsub_t0:
+define dso_local float @fnmsub_f32(float %f, float %f2, float %f3) {
+; CHECK-PWR8-LABEL: fnmsub_f32:
; CHECK-PWR8: # %bb.0: # %entry
-; CHECK-PWR8-NEXT: xsnmsubmdp 1, 2, 3
+; CHECK-PWR8-NEXT: xsnmsubasp 3, 1, 2
+; CHECK-PWR8-NEXT: fmr 1, 3
; CHECK-PWR8-NEXT: blr
;
-; CHECK-NOVSX-LABEL: fnmsub_t0:
+; CHECK-NOVSX-LABEL: fnmsub_f32:
+; CHECK-NOVSX: # %bb.0: # %entry
+; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3
+; CHECK-NOVSX-NEXT: blr
+;
+; CHECK-PWR7-LABEL: fnmsub_f32:
+; CHECK-PWR7: # %bb.0: # %entry
+; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3
+; CHECK-PWR7-NEXT: blr
+entry:
+ %0 = tail call float @llvm.ppc.fnmsub.f32(float %f, float %f2, float %f3)
+ ret float %0
+}
+
+declare float @llvm.ppc.fnmsub.f32(float, float, float)
+
+define dso_local double @fnmsub_f64(double %f, double %f2, double %f3) {
+; CHECK-PWR8-LABEL: fnmsub_f64:
+; CHECK-PWR8: # %bb.0: # %entry
+; CHECK-PWR8-NEXT: xsnmsubadp 3, 1, 2
+; CHECK-PWR8-NEXT: fmr 1, 3
+; CHECK-PWR8-NEXT: blr
+;
+; CHECK-NOVSX-LABEL: fnmsub_f64:
; CHECK-NOVSX: # %bb.0: # %entry
; CHECK-NOVSX-NEXT: fnmsub 1, 1, 2, 3
; CHECK-NOVSX-NEXT: blr
;
-; CHECK-PWR7-LABEL: fnmsub_t0:
+; CHECK-PWR7-LABEL: fnmsub_f64:
; CHECK-PWR7: # %bb.0: # %entry
-; CHECK-PWR7-NEXT: xsnmsubmdp 1, 2, 3
+; CHECK-PWR7-NEXT: xsnmsubadp 3, 1, 2
+; CHECK-PWR7-NEXT: fmr 1, 3
; CHECK-PWR7-NEXT: blr
entry:
- %0 = tail call double @llvm.ppc.fnmsub(double %d, double %d2, double %d3)
+ %0 = tail call double @llvm.ppc.fnmsub.f64(double %f, double %f2, double %f3)
ret double %0
}
-declare double @llvm.ppc.fnmsub(double, double, double)
+declare double @llvm.ppc.fnmsub.f64(double, double, double)
-define dso_local float @fnmsubs_t0(float %f, float %f2, float %f3) {
-; CHECK-PWR8-LABEL: fnmsubs_t0:
+define dso_local <4 x float> @fnmsub_v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) {
+; CHECK-PWR8-LABEL: fnmsub_v4f32:
; CHECK-PWR8: # %bb.0: # %entry
-; CHECK-PWR8-NEXT: xsnmsubmsp 1, 2, 3
+; CHECK-PWR8-NEXT: xvnmsubasp 36, 34, 35
+; CHECK-PWR8-NEXT: vmr 2, 4
; CHECK-PWR8-NEXT: blr
;
-; CHECK-NOVSX-LABEL: fnmsubs_t0:
+; CHECK-NOVSX-LABEL: fnmsub_v4f32:
; CHECK-NOVSX: # %bb.0: # %entry
-; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3
+; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 5, 9
+; CHECK-NOVSX-NEXT: fnmsubs 2, 2, 6, 10
+; CHECK-NOVSX-NEXT: fnmsubs 3, 3, 7, 11
+; CHECK-NOVSX-NEXT: fnmsubs 4, 4, 8, 12
; CHECK-NOVSX-NEXT: blr
;
-; CHECK-PWR7-LABEL: fnmsubs_t0:
+; CHECK-PWR7-LABEL: fnmsub_v4f32:
; CHECK-PWR7: # %bb.0: # %entry
-; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3
+; CHECK-PWR7-NEXT: xvnmsubasp 36, 34, 35
+; CHECK-PWR7-NEXT: vmr 2, 4
; CHECK-PWR7-NEXT: blr
entry:
- %0 = tail call float @llvm.ppc.fnmsubs(float %f, float %f2, float %f3)
- ret float %0
+ %0 = tail call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3)
+ ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+define dso_local <2 x double> @fnmsub_v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) {
+; CHECK-PWR8-LABEL: fnmsub_v2f64:
+; CHECK-PWR8: # %bb.0: # %entry
+; CHECK-PWR8-NEXT: xvnmsubadp 36, 34, 35
+; CHECK-PWR8-NEXT: vmr 2, 4
+; CHECK-PWR8-NEXT: blr
+;
+; CHECK-NOVSX-LABEL: fnmsub_v2f64:
+; CHECK-NOVSX: # %bb.0: # %entry
+; CHECK-NOVSX-NEXT: fnmsub 1, 1, 3, 5
+; CHECK-NOVSX-NEXT: fnmsub 2, 2, 4, 6
+; CHECK-NOVSX-NEXT: blr
+;
+; CHECK-PWR7-LABEL: fnmsub_v2f64:
+; CHECK-PWR7: # %bb.0: # %entry
+; CHECK-PWR7-NEXT: xvnmsubadp 36, 34, 35
+; CHECK-PWR7-NEXT: vmr 2, 4
+; CHECK-PWR7-NEXT: blr
+entry:
+ %0 = tail call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3)
+ ret <2 x double> %0
}
-declare float @llvm.ppc.fnmsubs(float, float, float)
+declare <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double>, <2 x double>, <2 x double>)
define dso_local double @fre(double %d) {
; CHECK-PWR8-LABEL: fre:
More information about the cfe-commits
mailing list