[lldb] [clang] [libcxx] [lld] [compiler-rt] [libc] [clang-tools-extra] [llvm] [flang] [Legalizer] Expand fmaximum and fminimum (PR #67301)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 8 23:58:42 PST 2024
================
@@ -8262,6 +8262,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned Opc = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ bool IsMax = Opc == ISD::FMAXIMUM;
+
+ if (VT.isVector() &&
+ isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
+ return SDValue();
+
+ // First, implement comparison not propagating NaN. If no native fmin or fmax
+ // available, use plain select with setcc instead.
+ SDValue MinMax;
+ if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE,
+ VT)) {
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT,
+ LHS, RHS);
+ } else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM,
+ VT)) {
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS);
+ } else {
+ SDValue Compare =
+ DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
+ MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS);
+ }
+
+ // Propagate any NaN of both operands
+ if (!N->getFlags().hasNoNaNs() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ ConstantFP *FPNaN = ConstantFP::get(
+ *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
+ MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
+ DAG.getConstantFP(*FPNaN, DL, VT), MinMax);
+ }
+
+ // fminimum/fmaximum requires -0.0 less than +0.0
+ if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(LHS) &&
+ !DAG.isKnownNeverZeroFloat(RHS)) {
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+ DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+ SDValue TestZero =
+ DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+ SDValue LCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+ MinMax);
+ SDValue RCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
+ LCmp);
+ MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax);
----------------
arsenm wrote:
I think we should try to avoid using is_fpclass here. Additionally, I think we have under-defined the internally used IEEE nodes. As currently defined, minnum_ieee/maxnum_ieee have unspecified signed 0 order. However for AMDGPU at least, the actual hardware instructions have always appropriately ordered 0s. We could either refine the _IEEE node definitions to be IEEE -2019 and require ordered 0 behavior which doesn't require this fixup.
https://github.com/llvm/llvm-project/pull/67301
More information about the cfe-commits
mailing list