[llvm] e8d2ff2 - [PowerPC] Add fma/fsqrt/fmax strict-fp intrinsics
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Mon May 11 22:44:24 PDT 2020
Author: Qiu Chaofan
Date: 2020-05-12T13:44:09+08:00
New Revision: e8d2ff22f09fea835ffc778e41a2acae4f4a5632
URL: https://github.com/llvm/llvm-project/commit/e8d2ff22f09fea835ffc778e41a2acae4f4a5632
DIFF: https://github.com/llvm/llvm-project/commit/e8d2ff22f09fea835ffc778e41a2acae4f4a5632.diff
LOG: [PowerPC] Add fma/fsqrt/fmax strict-fp intrinsics
This patch adds strict-fp intrinsics support for fma, fsqrt, fmaxnum and
fminnum on PowerPC.
Reviewed By: hfinkel
Differential Revision: https://reviews.llvm.org/D72749
Added:
llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
llvm/test/CodeGen/PowerPC/fp-strict.ll
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 041be6fdb067..9d49b3a1e069 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -291,11 +291,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
+
+ if (Subtarget.hasFSQRT()) {
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+ }
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -933,11 +940,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
@@ -1001,6 +1016,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
}
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 04c81159d657..a43c472f13ec 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2576,10 +2576,10 @@ let Uses = [RM] in {
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
"fsqrt", "$frD, $frB", IIC_FPSqrtD,
- [(set f64:$frD, (fsqrt f64:$frB))]>;
+ [(set f64:$frD, (any_fsqrt f64:$frB))]>;
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
"fsqrts", "$frD, $frB", IIC_FPSqrtS,
- [(set f32:$frD, (fsqrt f32:$frB))]>;
+ [(set f32:$frD, (any_fsqrt f32:$frB))]>;
}
}
}
@@ -3001,40 +3001,40 @@ let isCommutable = 1 in {
defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FMADDS : AForm_1r<59, 29,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
defm FMSUB : AForm_1r<63, 28,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
- (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+ (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
defm FMSUBS : AForm_1r<59, 28,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
- (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+ (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
defm FNMADD : AForm_1r<63, 31,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
- (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+ (fneg (any_fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
defm FNMADDS : AForm_1r<59, 31,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
- (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+ (fneg (any_fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
defm FNMSUB : AForm_1r<63, 30,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ [(set f64:$FRT, (fneg (any_fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
defm FNMSUBS : AForm_1r<59, 30,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ [(set f32:$FRT, (fneg (any_fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
} // isCommutable
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 7621913780d1..419f3fcd13e6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -409,7 +409,7 @@ let hasSideEffects = 0 in {
def XSMADDADP : XX3Form<60, 33,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -425,7 +425,7 @@ let hasSideEffects = 0 in {
def XSMSUBADP : XX3Form<60, 49,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -441,7 +441,7 @@ let hasSideEffects = 0 in {
def XSNMADDADP : XX3Form<60, 161,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -457,7 +457,7 @@ let hasSideEffects = 0 in {
def XSNMSUBADP : XX3Form<60, 177,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -473,7 +473,7 @@ let hasSideEffects = 0 in {
def XVMADDADP : XX3Form<60, 97,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -489,7 +489,7 @@ let hasSideEffects = 0 in {
def XVMADDASP : XX3Form<60, 65,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -505,7 +505,7 @@ let hasSideEffects = 0 in {
def XVMSUBADP : XX3Form<60, 113,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -521,7 +521,7 @@ let hasSideEffects = 0 in {
def XVMSUBASP : XX3Form<60, 81,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -537,7 +537,7 @@ let hasSideEffects = 0 in {
def XVNMADDADP : XX3Form<60, 225,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -569,7 +569,7 @@ let hasSideEffects = 0 in {
def XVNMSUBADP : XX3Form<60, 241,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -585,7 +585,7 @@ let hasSideEffects = 0 in {
def XVNMSUBASP : XX3Form<60, 209,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -604,7 +604,7 @@ let hasSideEffects = 0 in {
def XSSQRTDP : XX2Form<60, 75,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xssqrtdp $XT, $XB", IIC_FPSqrtD,
- [(set f64:$XT, (fsqrt f64:$XB))]>;
+ [(set f64:$XT, (any_fsqrt f64:$XB))]>;
def XSREDP : XX2Form<60, 90,
(outs vsfrc:$XT), (ins vsfrc:$XB),
@@ -634,11 +634,11 @@ let hasSideEffects = 0 in {
def XVSQRTDP : XX2Form<60, 203,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtdp $XT, $XB", IIC_FPSqrtD,
- [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fsqrt v2f64:$XB))]>;
def XVSQRTSP : XX2Form<60, 139,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>;
def XVTDIVDP : XX3Form_1<60, 125,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
@@ -1157,7 +1157,6 @@ let Predicates = [HasVSX, HasP8Vector] in {
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsdivsp $XT, $XA, $XB", IIC_FPDivS,
[(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>;
- } // mayRaiseFPException
def XSRESP : XX2Form<60, 26,
(outs vssrc:$XT), (ins vssrc:$XB),
@@ -1171,7 +1170,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
def XSSQRTSP : XX2Form<60, 11,
(outs vssrc:$XT), (ins vssrc:$XB),
"xssqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set f32:$XT, (fsqrt f32:$XB))]>;
+ [(set f32:$XT, (any_fsqrt f32:$XB))]>;
def XSRSQRTESP : XX2Form<60, 10,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsrsqrtesp $XT, $XB", IIC_VecFP,
@@ -1184,7 +1183,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
@@ -1203,7 +1202,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (any_fma f32:$XA, f32:$XB,
(fneg f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -1223,7 +1222,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -1243,7 +1242,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
(fneg f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -1272,6 +1271,7 @@ let Predicates = [HasVSX, HasP8Vector] in {
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
+ } // mayRaiseFPException
let Predicates = [HasVSX, HasDirectMove] in {
// VSX direct move instructions
@@ -1367,27 +1367,25 @@ let Predicates = [HasVSX, HasP9Vector] in {
[(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>;
def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
[(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>;
- } // mayRaiseFPException
// Square-Root
def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
- [(set f128:$vT, (fsqrt f128:$vB))]>;
+ [(set f128:$vT, (any_fsqrt f128:$vB))]>;
// (Negative) Multiply-{Add/Subtract}
def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
[(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- f128:$vTi))]>;
+ (any_fma f128:$vA, f128:$vB, f128:$vTi))]>;
def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
[(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi)))]>;
+ (any_fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi)))]>;
def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
[(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- f128:$vTi)))]>;
+ (fneg (any_fma f128:$vA, f128:$vB,
+ f128:$vTi)))]>;
def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
[(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi))))]>;
+ (fneg (any_fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi))))]>;
let isCommutable = 1 in {
def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
@@ -1429,6 +1427,7 @@ let Predicates = [HasVSX, HasP9Vector] in {
[(set f128:$vT,
(fneg (int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
+ } // mayRaiseFPException
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
// QP Compare Ordered/Unordered
@@ -2594,13 +2593,13 @@ def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
(XXSEL $vC, $vB, $vA)>;
-def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)),
(v4f32 (XVMAXSP $src1, $src2))>;
-def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)),
(v4f32 (XVMINSP $src1, $src2))>;
-def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
(v2f64 (XVMAXDP $src1, $src2))>;
-def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
(v2f64 (XVMINDP $src1, $src2))>;
// f32 Min.
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
index 80968ababd2b..2f92382f4709 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
@@ -6,6 +6,9 @@ declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, m
declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
define fp128 @fadd_f128(fp128 %f1, fp128 %f2) {
; CHECK-LABEL: fadd_f128:
; CHECK: # %bb.0:
@@ -53,3 +56,72 @@ define fp128 @fdiv_f128(fp128 %f1, fp128 %f2) {
metadata !"fpexcept.strict")
ret fp128 %res
}
+
+define fp128 @fmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) {
+; CHECK-LABEL: fmadd_f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsmaddqp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+ %res = call fp128 @llvm.experimental.constrained.fma.f128(
+ fp128 %f0, fp128 %f1, fp128 %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret fp128 %res
+}
+
+define fp128 @fmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) {
+; CHECK-LABEL: fmsub_f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsmsubqp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+ %neg = fneg fp128 %f2
+ %res = call fp128 @llvm.experimental.constrained.fma.f128(
+ fp128 %f0, fp128 %f1, fp128 %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret fp128 %res
+}
+
+define fp128 @fnmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) {
+; CHECK-LABEL: fnmadd_f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsnmaddqp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+ %fma = call fp128 @llvm.experimental.constrained.fma.f128(
+ fp128 %f0, fp128 %f1, fp128 %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg fp128 %fma
+ ret fp128 %res
+}
+
+define fp128 @fnmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) {
+; CHECK-LABEL: fnmsub_f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsnmsubqp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+ %neg = fneg fp128 %f2
+ %fma = call fp128 @llvm.experimental.constrained.fma.f128(
+ fp128 %f0, fp128 %f1, fp128 %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg fp128 %fma
+ ret fp128 %res
+}
+
+
+define fp128 @fsqrt_f128(fp128 %f1) {
+; CHECK-LABEL: fsqrt_f128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xssqrtqp v2, v2
+; CHECK-NEXT: blr
+ %res = call fp128 @llvm.experimental.constrained.sqrt.f128(
+ fp128 %f1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret fp128 %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll b/llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll
new file mode 100644
index 000000000000..14e8be5d7d39
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-minmax.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s
+
+declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata)
+declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata)
+
+define <4 x float> @fmaxnum_v4f32(<4 x float> %vf0, <4 x float> %vf1) {
+; CHECK-LABEL: fmaxnum_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmaxsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(
+ <4 x float> %vf0, <4 x float> %vf1,
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fmaxnum_v2f64(<2 x double> %vf0, <2 x double> %vf1) {
+; CHECK-LABEL: fmaxnum_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmaxdp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+ <2 x double> %vf0, <2 x double> %vf1,
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+
+define <4 x float> @fminnum_v4f32(<4 x float> %vf0, <4 x float> %vf1) {
+; CHECK-LABEL: fminnum_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvminsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(
+ <4 x float> %vf0, <4 x float> %vf1,
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fminnum_v2f64(<2 x double> %vf0, <2 x double> %vf1) {
+; CHECK-LABEL: fminnum_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmindp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+ <2 x double> %vf0, <2 x double> %vf1,
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll
index 2324a16b6ecd..743f68029be9 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll
@@ -23,6 +23,16 @@ declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata,
declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+
define float @fadd_f32(float %f1, float %f2) {
; CHECK-LABEL: fadd_f32:
; CHECK: # %bb.0:
@@ -411,3 +421,513 @@ define double @no_fma_fold(double %f1, double %f2, double %f3) {
metadata !"fpexcept.strict")
ret double %add
}
+
+define float @fmadd_f32(float %f0, float %f1, float %f2) {
+; CHECK-LABEL: fmadd_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsmaddasp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmadd_f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fmadds f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %res = call float @llvm.experimental.constrained.fma.f32(
+ float %f0, float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @fmadd_f64(double %f0, double %f1, double %f2) {
+; CHECK-LABEL: fmadd_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsmaddadp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmadd_f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fmadd f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %res = call double @llvm.experimental.constrained.fma.f64(
+ double %f0, double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) {
+; CHECK-LABEL: fmadd_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmaddasp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmadd_v4f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: addi r3, r1, -32
+; NOVSX-NEXT: addi r4, r1, -48
+; NOVSX-NEXT: stvx v4, 0, r3
+; NOVSX-NEXT: addi r3, r1, -64
+; NOVSX-NEXT: stvx v3, 0, r4
+; NOVSX-NEXT: stvx v2, 0, r3
+; NOVSX-NEXT: addi r3, r1, -16
+; NOVSX-NEXT: lfs f0, -20(r1)
+; NOVSX-NEXT: lfs f1, -36(r1)
+; NOVSX-NEXT: lfs f2, -52(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: lfs f1, -40(r1)
+; NOVSX-NEXT: lfs f2, -56(r1)
+; NOVSX-NEXT: stfs f0, -4(r1)
+; NOVSX-NEXT: lfs f0, -24(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: lfs f1, -44(r1)
+; NOVSX-NEXT: lfs f2, -60(r1)
+; NOVSX-NEXT: stfs f0, -8(r1)
+; NOVSX-NEXT: lfs f0, -28(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: lfs f1, -48(r1)
+; NOVSX-NEXT: lfs f2, -64(r1)
+; NOVSX-NEXT: stfs f0, -12(r1)
+; NOVSX-NEXT: lfs f0, -32(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: stfs f0, -16(r1)
+; NOVSX-NEXT: lvx v2, 0, r3
+; NOVSX-NEXT: blr
+ %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
+ <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) {
+; CHECK-LABEL: fmadd_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmaddadp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmadd_v2f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fmadd f2, f2, f4, f6
+; NOVSX-NEXT: fmadd f1, f1, f3, f5
+; NOVSX-NEXT: blr
+ %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
+ <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define float @fmsub_f32(float %f0, float %f1, float %f2) {
+; CHECK-LABEL: fmsub_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsmsubasp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmsub_f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fmsubs f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %neg = fneg float %f2
+ %res = call float @llvm.experimental.constrained.fma.f32(
+ float %f0, float %f1, float %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @fmsub_f64(double %f0, double %f1, double %f2) {
+; CHECK-LABEL: fmsub_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsmsubadp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmsub_f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fmsub f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %neg = fneg double %f2
+ %res = call double @llvm.experimental.constrained.fma.f64(
+ double %f0, double %f1, double %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) {
+; CHECK-LABEL: fmsub_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmsubasp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmsub_v4f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: vspltisb v5, -1
+; NOVSX-NEXT: addi r3, r1, -48
+; NOVSX-NEXT: addi r4, r1, -64
+; NOVSX-NEXT: stvx v3, 0, r3
+; NOVSX-NEXT: addi r3, r1, -32
+; NOVSX-NEXT: stvx v2, 0, r4
+; NOVSX-NEXT: vslw v5, v5, v5
+; NOVSX-NEXT: vsubfp v4, v5, v4
+; NOVSX-NEXT: stvx v4, 0, r3
+; NOVSX-NEXT: addi r3, r1, -16
+; NOVSX-NEXT: lfs f0, -36(r1)
+; NOVSX-NEXT: lfs f1, -52(r1)
+; NOVSX-NEXT: lfs f2, -20(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: lfs f1, -56(r1)
+; NOVSX-NEXT: lfs f2, -24(r1)
+; NOVSX-NEXT: stfs f0, -4(r1)
+; NOVSX-NEXT: lfs f0, -40(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: lfs f1, -60(r1)
+; NOVSX-NEXT: lfs f2, -28(r1)
+; NOVSX-NEXT: stfs f0, -8(r1)
+; NOVSX-NEXT: lfs f0, -44(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: lfs f1, -64(r1)
+; NOVSX-NEXT: lfs f2, -32(r1)
+; NOVSX-NEXT: stfs f0, -12(r1)
+; NOVSX-NEXT: lfs f0, -48(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: stfs f0, -16(r1)
+; NOVSX-NEXT: lvx v2, 0, r3
+; NOVSX-NEXT: blr
+ %neg = fneg <4 x float> %vf2
+ %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
+ <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) {
+; CHECK-LABEL: fmsub_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmsubadp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fmsub_v2f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fmsub f2, f2, f4, f6
+; NOVSX-NEXT: fmsub f1, f1, f3, f5
+; NOVSX-NEXT: blr
+ %neg = fneg <2 x double> %vf2
+ %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
+ <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define float @fnmadd_f32(float %f0, float %f1, float %f2) {
+; CHECK-LABEL: fnmadd_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsnmaddasp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmadd_f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fnmadds f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %fma = call float @llvm.experimental.constrained.fma.f32(
+ float %f0, float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg float %fma
+ ret float %res
+}
+
+define double @fnmadd_f64(double %f0, double %f1, double %f2) {
+; CHECK-LABEL: fnmadd_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsnmaddadp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmadd_f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fnmadd f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %fma = call double @llvm.experimental.constrained.fma.f64(
+ double %f0, double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg double %fma
+ ret double %res
+}
+
+define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) {
+; CHECK-LABEL: fnmadd_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmaddasp v4, v2, v3
+; CHECK-NEXT: xvnegsp v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmadd_v4f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: addi r3, r1, -32
+; NOVSX-NEXT: addi r4, r1, -48
+; NOVSX-NEXT: stvx v4, 0, r3
+; NOVSX-NEXT: addi r3, r1, -64
+; NOVSX-NEXT: stvx v3, 0, r4
+; NOVSX-NEXT: stvx v2, 0, r3
+; NOVSX-NEXT: vspltisb v2, -1
+; NOVSX-NEXT: addi r3, r1, -16
+; NOVSX-NEXT: lfs f0, -20(r1)
+; NOVSX-NEXT: lfs f1, -36(r1)
+; NOVSX-NEXT: lfs f2, -52(r1)
+; NOVSX-NEXT: vslw v2, v2, v2
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: lfs f1, -40(r1)
+; NOVSX-NEXT: lfs f2, -56(r1)
+; NOVSX-NEXT: stfs f0, -4(r1)
+; NOVSX-NEXT: lfs f0, -24(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: lfs f1, -44(r1)
+; NOVSX-NEXT: lfs f2, -60(r1)
+; NOVSX-NEXT: stfs f0, -8(r1)
+; NOVSX-NEXT: lfs f0, -28(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: lfs f1, -48(r1)
+; NOVSX-NEXT: lfs f2, -64(r1)
+; NOVSX-NEXT: stfs f0, -12(r1)
+; NOVSX-NEXT: lfs f0, -32(r1)
+; NOVSX-NEXT: fmadds f0, f2, f1, f0
+; NOVSX-NEXT: stfs f0, -16(r1)
+; NOVSX-NEXT: lvx v3, 0, r3
+; NOVSX-NEXT: vsubfp v2, v2, v3
+; NOVSX-NEXT: blr
+ %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
+ <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg <4 x float> %fma
+ ret <4 x float> %res
+}
+
+define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) {
+; CHECK-LABEL: fnmadd_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvnmaddadp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmadd_v2f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fnmadd f2, f2, f4, f6
+; NOVSX-NEXT: fnmadd f1, f1, f3, f5
+; NOVSX-NEXT: blr
+ %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
+ <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg <2 x double> %fma
+ ret <2 x double> %res
+}
+
+define float @fnmsub_f32(float %f0, float %f1, float %f2) {
+; CHECK-LABEL: fnmsub_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsnmsubasp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmsub_f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fnmsubs f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %neg = fneg float %f2
+ %fma = call float @llvm.experimental.constrained.fma.f32(
+ float %f0, float %f1, float %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg float %fma
+ ret float %res
+}
+
+define double @fnmsub_f64(double %f0, double %f1, double %f2) {
+; CHECK-LABEL: fnmsub_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xsnmsubadp f3, f1, f2
+; CHECK-NEXT: fmr f1, f3
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmsub_f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fnmsub f1, f1, f2, f3
+; NOVSX-NEXT: blr
+ %neg = fneg double %f2
+ %fma = call double @llvm.experimental.constrained.fma.f64(
+ double %f0, double %f1, double %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg double %fma
+ ret double %res
+}
+
+define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) {
+; CHECK-LABEL: fnmsub_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvnmsubasp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmsub_v4f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: vspltisb v5, -1
+; NOVSX-NEXT: addi r3, r1, -48
+; NOVSX-NEXT: addi r4, r1, -64
+; NOVSX-NEXT: stvx v3, 0, r3
+; NOVSX-NEXT: addi r3, r1, -32
+; NOVSX-NEXT: stvx v2, 0, r4
+; NOVSX-NEXT: vslw v5, v5, v5
+; NOVSX-NEXT: vsubfp v4, v5, v4
+; NOVSX-NEXT: stvx v4, 0, r3
+; NOVSX-NEXT: addi r3, r1, -16
+; NOVSX-NEXT: lfs f0, -36(r1)
+; NOVSX-NEXT: lfs f1, -52(r1)
+; NOVSX-NEXT: lfs f2, -20(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: lfs f1, -56(r1)
+; NOVSX-NEXT: lfs f2, -24(r1)
+; NOVSX-NEXT: stfs f0, -4(r1)
+; NOVSX-NEXT: lfs f0, -40(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: lfs f1, -60(r1)
+; NOVSX-NEXT: lfs f2, -28(r1)
+; NOVSX-NEXT: stfs f0, -8(r1)
+; NOVSX-NEXT: lfs f0, -44(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: lfs f1, -64(r1)
+; NOVSX-NEXT: lfs f2, -32(r1)
+; NOVSX-NEXT: stfs f0, -12(r1)
+; NOVSX-NEXT: lfs f0, -48(r1)
+; NOVSX-NEXT: fmadds f0, f1, f0, f2
+; NOVSX-NEXT: stfs f0, -16(r1)
+; NOVSX-NEXT: lvx v2, 0, r3
+; NOVSX-NEXT: vsubfp v2, v5, v2
+; NOVSX-NEXT: blr
+ %neg = fneg <4 x float> %vf2
+ %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
+ <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg <4 x float> %fma
+ ret <4 x float> %res
+}
+
+define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) {
+; CHECK-LABEL: fnmsub_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvnmsubadp v4, v2, v3
+; CHECK-NEXT: vmr v2, v4
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fnmsub_v2f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fnmsub f2, f2, f4, f6
+; NOVSX-NEXT: fnmsub f1, f1, f3, f5
+; NOVSX-NEXT: blr
+ %neg = fneg <2 x double> %vf2
+ %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
+ <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = fneg <2 x double> %fma
+ ret <2 x double> %res
+}
+
+define float @fsqrt_f32(float %f1) {
+; CHECK-LABEL: fsqrt_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xssqrtsp f1, f1
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fsqrt_f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fsqrts f1, f1
+; NOVSX-NEXT: blr
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %f1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @fsqrt_f64(double %f1) {
+; CHECK-LABEL: fsqrt_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xssqrtdp f1, f1
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fsqrt_f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fsqrt f1, f1
+; NOVSX-NEXT: blr
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %f1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) {
+; CHECK-LABEL: fsqrt_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsqrtsp v2, v2
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fsqrt_v4f32:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: addi r3, r1, -32
+; NOVSX-NEXT: stvx v2, 0, r3
+; NOVSX-NEXT: addi r3, r1, -16
+; NOVSX-NEXT: lfs f0, -20(r1)
+; NOVSX-NEXT: fsqrts f0, f0
+; NOVSX-NEXT: stfs f0, -4(r1)
+; NOVSX-NEXT: lfs f0, -24(r1)
+; NOVSX-NEXT: fsqrts f0, f0
+; NOVSX-NEXT: stfs f0, -8(r1)
+; NOVSX-NEXT: lfs f0, -28(r1)
+; NOVSX-NEXT: fsqrts f0, f0
+; NOVSX-NEXT: stfs f0, -12(r1)
+; NOVSX-NEXT: lfs f0, -32(r1)
+; NOVSX-NEXT: fsqrts f0, f0
+; NOVSX-NEXT: stfs f0, -16(r1)
+; NOVSX-NEXT: lvx v2, 0, r3
+; NOVSX-NEXT: blr
+ %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
+ <4 x float> %vf1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) {
+; CHECK-LABEL: fsqrt_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsqrtdp v2, v2
+; CHECK-NEXT: blr
+;
+; NOVSX-LABEL: fsqrt_v2f64:
+; NOVSX: # %bb.0:
+; NOVSX-NEXT: fsqrt f2, f2
+; NOVSX-NEXT: fsqrt f1, f1
+; NOVSX-NEXT: blr
+ %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+ <2 x double> %vf1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 66d6f2368011..4117f279eb16 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -1445,8 +1445,8 @@ define <4 x double> @constrained_vector_sqrt_v4f64() #0 {
; PC64LE-NEXT: lxvd2x 1, 0, 4
; PC64LE-NEXT: xxswapd 0, 0
; PC64LE-NEXT: xxswapd 1, 1
-; PC64LE-NEXT: xvsqrtdp 34, 0
-; PC64LE-NEXT: xvsqrtdp 35, 1
+; PC64LE-NEXT: xvsqrtdp 35, 0
+; PC64LE-NEXT: xvsqrtdp 34, 1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_sqrt_v4f64:
@@ -1456,9 +1456,9 @@ define <4 x double> @constrained_vector_sqrt_v4f64() #0 {
; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI29_1 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI29_1 at toc@l
-; PC64LE9-NEXT: xvsqrtdp 34, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: xvsqrtdp 35, 0
+; PC64LE9-NEXT: lxvx 0, 0, 3
+; PC64LE9-NEXT: xvsqrtdp 34, 0
; PC64LE9-NEXT: blr
entry:
%sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
@@ -5323,58 +5323,26 @@ entry:
define <2 x double> @constrained_vector_maxnum_v2f64() #0 {
; PC64LE-LABEL: constrained_vector_maxnum_v2f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: addis 3, 2, .LCPI86_0 at toc@ha
; PC64LE-NEXT: addis 4, 2, .LCPI86_1 at toc@ha
-; PC64LE-NEXT: lfs 1, .LCPI86_0 at toc@l(3)
-; PC64LE-NEXT: lfs 2, .LCPI86_1 at toc@l(4)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI86_3 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI86_2 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI86_3 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI86_2 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: xxmrghd 34, 1, 0
-; PC64LE-NEXT: addi 1, 1, 64
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: addi 3, 3, .LCPI86_0 at toc@l
+; PC64LE-NEXT: addi 4, 4, .LCPI86_1 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: lxvd2x 1, 0, 4
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 1, 1
+; PC64LE-NEXT: xvmaxdp 34, 1, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_maxnum_v2f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI86_0 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI86_0 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI86_0 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI86_1 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI86_1 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addis 3, 2, .LCPI86_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI86_2 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI86_3 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI86_3 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 34, 1, 0
-; PC64LE9-NEXT: addi 1, 1, 48
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI86_1 at toc@l
+; PC64LE9-NEXT: lxvx 1, 0, 3
+; PC64LE9-NEXT: xvmaxdp 34, 1, 0
; PC64LE9-NEXT: blr
entry:
%max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
@@ -5491,41 +5459,27 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -80(1)
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: addis 4, 2, .LCPI88_1 at toc@ha
-; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: stdu 1, -32(1)
; PC64LE-NEXT: addis 3, 2, .LCPI88_0 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI88_1 at toc@l(4)
+; PC64LE-NEXT: addis 4, 2, .LCPI88_1 at toc@ha
; PC64LE-NEXT: lfs 1, .LCPI88_0 at toc@l(3)
+; PC64LE-NEXT: lfs 2, .LCPI88_1 at toc@l(4)
; PC64LE-NEXT: bl fmax
; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI88_3 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI88_2 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI88_3 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI88_2 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: addis 4, 2, .LCPI88_5 at toc@ha
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: addis 3, 2, .LCPI88_4 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI88_5 at toc@l(4)
-; PC64LE-NEXT: xxmrghd 63, 1, 0
-; PC64LE-NEXT: lfs 1, .LCPI88_4 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: xxswapd 0, 63
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: xxlor 2, 63, 63
-; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: addis 4, 2, .LCPI88_3 at toc@ha
; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: addi 3, 3, .LCPI88_2 at toc@l
+; PC64LE-NEXT: addi 4, 4, .LCPI88_3 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 4
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 2, 2
+; PC64LE-NEXT: xvmaxdp 2, 2, 0
+; PC64LE-NEXT: xxswapd 0, 2
+; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE-NEXT: fmr 1, 0
-; PC64LE-NEXT: addi 1, 1, 80
+; PC64LE-NEXT: addi 1, 1, 32
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
@@ -5534,37 +5488,25 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
+; PC64LE9-NEXT: stdu 1, -32(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI88_0 at toc@ha
; PC64LE9-NEXT: lfs 1, .LCPI88_0 at toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI88_1 at toc@ha
; PC64LE9-NEXT: lfs 2, .LCPI88_1 at toc@l(3)
-; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; PC64LE9-NEXT: bl fmax
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI88_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI88_2 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI88_2 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI88_3 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI88_3 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: addis 3, 2, .LCPI88_4 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 63, 1, 0
-; PC64LE9-NEXT: lfs 1, .LCPI88_4 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI88_5 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI88_5 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addi 3, 3, .LCPI88_3 at toc@l
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xxswapd 1, 63
-; PC64LE9-NEXT: xscpsgndp 2, 63, 63
-; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: lxvx 1, 0, 3
+; PC64LE9-NEXT: xvmaxdp 2, 1, 0
+; PC64LE9-NEXT: xxswapd 1, 2
; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: addi 1, 1, 64
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: addi 1, 1, 32
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
@@ -5579,102 +5521,42 @@ entry:
define <4 x double> @constrained_vector_maxnum_v4f64() #0 {
; PC64LE-LABEL: constrained_vector_maxnum_v4f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -80(1)
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: addis 4, 2, .LCPI89_1 at toc@ha
-; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI89_0 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI89_1 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI89_0 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI89_3 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI89_2 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI89_3 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI89_2 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: addis 4, 2, .LCPI89_5 at toc@ha
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: addis 3, 2, .LCPI89_4 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI89_5 at toc@l(4)
-; PC64LE-NEXT: xxmrghd 63, 1, 0
-; PC64LE-NEXT: lfs 1, .LCPI89_4 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI89_7 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI89_6 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI89_7 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI89_6 at toc@l(3)
-; PC64LE-NEXT: bl fmax
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: vmr 2, 31
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: xxmrghd 35, 1, 0
-; PC64LE-NEXT: addi 1, 1, 80
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: addis 4, 2, .LCPI89_1 at toc@ha
+; PC64LE-NEXT: addis 5, 2, .LCPI89_2 at toc@ha
+; PC64LE-NEXT: addis 6, 2, .LCPI89_3 at toc@ha
+; PC64LE-NEXT: addi 3, 3, .LCPI89_0 at toc@l
+; PC64LE-NEXT: addi 4, 4, .LCPI89_1 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: lxvd2x 1, 0, 4
+; PC64LE-NEXT: addi 3, 5, .LCPI89_2 at toc@l
+; PC64LE-NEXT: addi 4, 6, .LCPI89_3 at toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 4
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 1, 1
+; PC64LE-NEXT: xxswapd 2, 2
+; PC64LE-NEXT: xxswapd 3, 3
+; PC64LE-NEXT: xvmaxdp 34, 1, 0
+; PC64LE-NEXT: xvmaxdp 35, 3, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_maxnum_v4f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI89_0 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI89_0 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI89_0 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI89_1 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI89_1 at toc@l(3)
-; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addi 3, 3, .LCPI89_1 at toc@l
+; PC64LE9-NEXT: lxvx 1, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI89_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI89_2 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI89_2 at toc@l
+; PC64LE9-NEXT: xvmaxdp 34, 1, 0
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI89_3 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI89_3 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: addis 3, 2, .LCPI89_4 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 63, 1, 0
-; PC64LE9-NEXT: lfs 1, .LCPI89_4 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI89_5 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI89_5 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addis 3, 2, .LCPI89_6 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI89_6 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI89_7 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI89_7 at toc@l(3)
-; PC64LE9-NEXT: bl fmax
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: vmr 2, 31
-; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 35, 1, 0
-; PC64LE9-NEXT: addi 1, 1, 64
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI89_3 at toc@l
+; PC64LE9-NEXT: lxvx 1, 0, 3
+; PC64LE9-NEXT: xvmaxdp 35, 1, 0
; PC64LE9-NEXT: blr
entry:
%max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
@@ -5732,58 +5614,26 @@ define <1 x float> @constrained_vector_minnum_v1f32() #0 {
define <2 x double> @constrained_vector_minnum_v2f64() #0 {
; PC64LE-LABEL: constrained_vector_minnum_v2f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: addis 3, 2, .LCPI91_0 at toc@ha
; PC64LE-NEXT: addis 4, 2, .LCPI91_1 at toc@ha
-; PC64LE-NEXT: lfs 1, .LCPI91_0 at toc@l(3)
-; PC64LE-NEXT: lfs 2, .LCPI91_1 at toc@l(4)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI91_3 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI91_2 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI91_3 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI91_2 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: xxmrghd 34, 1, 0
-; PC64LE-NEXT: addi 1, 1, 64
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: addi 3, 3, .LCPI91_0 at toc@l
+; PC64LE-NEXT: addi 4, 4, .LCPI91_1 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: lxvd2x 1, 0, 4
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 1, 1
+; PC64LE-NEXT: xvmindp 34, 1, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_minnum_v2f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI91_0 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI91_0 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI91_0 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI91_1 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI91_1 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addis 3, 2, .LCPI91_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI91_2 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI91_3 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI91_3 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 34, 1, 0
-; PC64LE9-NEXT: addi 1, 1, 48
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI91_1 at toc@l
+; PC64LE9-NEXT: lxvx 1, 0, 3
+; PC64LE9-NEXT: xvmindp 34, 1, 0
; PC64LE9-NEXT: blr
entry:
%min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
@@ -5900,41 +5750,27 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -80(1)
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: addis 4, 2, .LCPI93_1 at toc@ha
-; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: stdu 1, -32(1)
; PC64LE-NEXT: addis 3, 2, .LCPI93_0 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI93_1 at toc@l(4)
+; PC64LE-NEXT: addis 4, 2, .LCPI93_1 at toc@ha
; PC64LE-NEXT: lfs 1, .LCPI93_0 at toc@l(3)
+; PC64LE-NEXT: lfs 2, .LCPI93_1 at toc@l(4)
; PC64LE-NEXT: bl fmin
; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI93_3 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI93_2 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI93_3 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI93_2 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: addis 4, 2, .LCPI93_5 at toc@ha
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: addis 3, 2, .LCPI93_4 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI93_5 at toc@l(4)
-; PC64LE-NEXT: xxmrghd 63, 1, 0
-; PC64LE-NEXT: lfs 1, .LCPI93_4 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: xxswapd 0, 63
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: xxlor 2, 63, 63
-; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: addis 4, 2, .LCPI93_3 at toc@ha
; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: addi 3, 3, .LCPI93_2 at toc@l
+; PC64LE-NEXT: addi 4, 4, .LCPI93_3 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: lxvd2x 2, 0, 4
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 2, 2
+; PC64LE-NEXT: xvmindp 2, 2, 0
+; PC64LE-NEXT: xxswapd 0, 2
+; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
; PC64LE-NEXT: fmr 1, 0
-; PC64LE-NEXT: addi 1, 1, 80
+; PC64LE-NEXT: addi 1, 1, 32
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
@@ -5943,37 +5779,25 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
+; PC64LE9-NEXT: stdu 1, -32(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI93_0 at toc@ha
; PC64LE9-NEXT: lfs 1, .LCPI93_0 at toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI93_1 at toc@ha
; PC64LE9-NEXT: lfs 2, .LCPI93_1 at toc@l(3)
-; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
; PC64LE9-NEXT: bl fmin
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI93_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI93_2 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI93_2 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI93_3 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI93_3 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: addis 3, 2, .LCPI93_4 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 63, 1, 0
-; PC64LE9-NEXT: lfs 1, .LCPI93_4 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI93_5 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI93_5 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addi 3, 3, .LCPI93_3 at toc@l
; PC64LE9-NEXT: fmr 3, 1
-; PC64LE9-NEXT: xxswapd 1, 63
-; PC64LE9-NEXT: xscpsgndp 2, 63, 63
-; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: lxvx 1, 0, 3
+; PC64LE9-NEXT: xvmindp 2, 1, 0
+; PC64LE9-NEXT: xxswapd 1, 2
; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: addi 1, 1, 64
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: addi 1, 1, 32
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
@@ -5988,102 +5812,42 @@ entry:
define <4 x double> @constrained_vector_minnum_v4f64() #0 {
; PC64LE-LABEL: constrained_vector_minnum_v4f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -80(1)
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: addis 4, 2, .LCPI94_1 at toc@ha
-; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI94_0 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI94_1 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI94_0 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI94_3 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI94_2 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI94_3 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI94_2 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: addis 4, 2, .LCPI94_5 at toc@ha
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: addis 3, 2, .LCPI94_4 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI94_5 at toc@l(4)
-; PC64LE-NEXT: xxmrghd 63, 1, 0
-; PC64LE-NEXT: lfs 1, .LCPI94_4 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: addis 4, 2, .LCPI94_7 at toc@ha
-; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT: addis 3, 2, .LCPI94_6 at toc@ha
-; PC64LE-NEXT: lfs 2, .LCPI94_7 at toc@l(4)
-; PC64LE-NEXT: lfs 1, .LCPI94_6 at toc@l(3)
-; PC64LE-NEXT: bl fmin
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: li 3, 48
-; PC64LE-NEXT: vmr 2, 31
-; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: li 3, 64
-; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT: xxmrghd 35, 1, 0
-; PC64LE-NEXT: addi 1, 1, 80
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: addis 4, 2, .LCPI94_1 at toc@ha
+; PC64LE-NEXT: addis 5, 2, .LCPI94_2 at toc@ha
+; PC64LE-NEXT: addis 6, 2, .LCPI94_3 at toc@ha
+; PC64LE-NEXT: addi 3, 3, .LCPI94_0 at toc@l
+; PC64LE-NEXT: addi 4, 4, .LCPI94_1 at toc@l
+; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: lxvd2x 1, 0, 4
+; PC64LE-NEXT: addi 3, 5, .LCPI94_2 at toc@l
+; PC64LE-NEXT: addi 4, 6, .LCPI94_3 at toc@l
+; PC64LE-NEXT: lxvd2x 2, 0, 3
+; PC64LE-NEXT: lxvd2x 3, 0, 4
+; PC64LE-NEXT: xxswapd 0, 0
+; PC64LE-NEXT: xxswapd 1, 1
+; PC64LE-NEXT: xxswapd 2, 2
+; PC64LE-NEXT: xxswapd 3, 3
+; PC64LE-NEXT: xvmindp 34, 1, 0
+; PC64LE-NEXT: xvmindp 35, 3, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_minnum_v4f64:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI94_0 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI94_0 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI94_0 at toc@l
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI94_1 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI94_1 at toc@l(3)
-; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addi 3, 3, .LCPI94_1 at toc@l
+; PC64LE9-NEXT: lxvx 1, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI94_2 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI94_2 at toc@l(3)
+; PC64LE9-NEXT: addi 3, 3, .LCPI94_2 at toc@l
+; PC64LE9-NEXT: xvmindp 34, 1, 0
+; PC64LE9-NEXT: lxvx 0, 0, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI94_3 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI94_3 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: addis 3, 2, .LCPI94_4 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 63, 1, 0
-; PC64LE9-NEXT: lfs 1, .LCPI94_4 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI94_5 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI94_5 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addis 3, 2, .LCPI94_6 at toc@ha
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT: lfs 1, .LCPI94_6 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI94_7 at toc@ha
-; PC64LE9-NEXT: lfs 2, .LCPI94_7 at toc@l(3)
-; PC64LE9-NEXT: bl fmin
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: vmr 2, 31
-; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT: xxmrghd 35, 1, 0
-; PC64LE9-NEXT: addi 1, 1, 64
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: addi 3, 3, .LCPI94_3 at toc@l
+; PC64LE9-NEXT: lxvx 1, 0, 3
+; PC64LE9-NEXT: xvmindp 35, 1, 0
; PC64LE9-NEXT: blr
entry:
%min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
More information about the llvm-commits
mailing list