[libcxx-commits] [llvm] [flang] [lld] [clang-tools-extra] [libcxx] [clang] [libc] [compiler-rt] [lldb] [Legalizer] Expand fmaximum and fminimum (PR #67301)
Qiu Chaofan via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Jan 9 22:44:31 PST 2024
================
@@ -8262,6 +8262,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned Opc = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ bool IsMax = Opc == ISD::FMAXIMUM;
+
+ if (VT.isVector() &&
+ isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
+ return SDValue();
+
+ // First, implement comparison not propagating NaN. If no native fmin or fmax
+ // available, use plain select with setcc instead.
+ SDValue MinMax;
+ if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE,
+ VT)) {
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT,
+ LHS, RHS);
+ } else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM,
+ VT)) {
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS);
+ } else {
+ SDValue Compare =
+ DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
+ MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS);
+ }
+
+ // Propagate any NaN of both operands
+ if (!N->getFlags().hasNoNaNs() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ ConstantFP *FPNaN = ConstantFP::get(
+ *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
+ MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
+ DAG.getConstantFP(*FPNaN, DL, VT), MinMax);
+ }
+
+ // fminimum/fmaximum requires -0.0 less than +0.0
+ if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(LHS) &&
+ !DAG.isKnownNeverZeroFloat(RHS)) {
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+ DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+ SDValue TestZero =
+ DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+ SDValue LCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+ MinMax);
+ SDValue RCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
+ LCmp);
+ MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax);
----------------
ecnelises wrote:
PowerPC implements `fmaxnum_ieee` into `xsmaxdp` which repsects zero signs. I did not see AMDGPU backend implements it.
While LoongArch (using `fmax.d`) ISA just says _following the specification of maxNum(x,y) operation in the IEEE 754-2008 standard_. I think it's unspecified. (maybe @llvm/pr-subscribers-loongarch )
https://github.com/llvm/llvm-project/pull/67301
More information about the libcxx-commits
mailing list