[llvm] [Legalizer] Expand fmaximum and fminimum (PR #67301)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 25 02:13:11 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
<details>
<summary>Changes</summary>
According to langref, llvm.maximum/minimum has -0.0 < +0.0 semantics and propagates NaN.
Expand the nodes on targets not supporting the operation, by adding extra check for NaN and using is_fpclass to check zero signs.
Migrated from https://reviews.llvm.org/D158053
---
Patch is 39.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67301.diff
9 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+6)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+7)
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+58)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+5-9)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+6-4)
- (modified) llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll (+10-18)
- (added) llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll (+97)
- (added) llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll (+847)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c6a7aa17146dd4f..429cfd72af2e6e0 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5089,6 +5089,9 @@ class TargetLowering : public TargetLoweringBase {
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand fminimum/fmaximum into multiple comparison with selects.
+ SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
+
/// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
/// \param N Node to expand
/// \returns The expansion result
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f19beea3a3ed8b7..33f6354d5584540 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3540,6 +3540,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Expanded);
break;
}
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index dec81475f3a88fc..db132035adcf293 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -949,6 +949,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 39489e0bf142eb2..23de9829b5e9ffd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8177,6 +8177,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned Opc = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ bool NoNaN = (N->getFlags().hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)));
+ bool NoZeroSign =
+ (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
+ DAG.isKnownNeverZeroFloat(RHS));
+ bool IsMax = Opc == ISD::FMAXIMUM;
+
+ if (VT.isVector() &&
+ isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
+ return SDValue();
+
+ SDValue MinMax;
+ if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE,
+ VT))
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT,
+ LHS, RHS);
+ else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, VT))
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS);
+ else
+ MinMax = DAG.getSelect(
+ DL, VT,
+ DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT), LHS,
+ RHS);
+
+ // Propagate any NaN of both operands
+ if (!NoNaN) {
+ ConstantFP *FPNaN = ConstantFP::get(
+ *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
+ MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
+ DAG.getConstantFP(*FPNaN, DL, VT), MinMax);
+ }
+
+ // fminimum/fmaximum requires -0.0 less than +0.0
+ if (!NoZeroSign) {
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+ DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+ SDValue TestZero =
+ DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+ SDValue LCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+ MinMax);
+ SDValue RCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
+ LCmp);
+ MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax);
+ }
+
+ return MinMax;
+}
+
/// Returns a true value if if this FPClassTest can be performed with an ordered
/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
/// std::nullopt if it cannot be performed as a compare with 0.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 69ef942df1f6e78..9eac62175ee5fc4 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1549,15 +1549,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we can use
- // a NEON instruction with an undef lane instead. This has a performance
- // penalty on some cores, so we don't do this unless we have been
- // asked to by the core tuning model.
- if (Subtarget->useNEONForSinglePrecisionFP()) {
- setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
- }
+ // a NEON instruction with an undef lane instead.
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8e6644821031c17..2c2f3926fc93c58 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -440,8 +440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZfa())
- setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
+ Subtarget.hasStdExtZfa() ? Legal : Custom);
}
if (Subtarget.hasStdExtFOrZfinx()) {
@@ -464,9 +464,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
- if (Subtarget.hasStdExtZfa())
+ if (Subtarget.hasStdExtZfa()) {
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- else
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
+ } else
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
}
@@ -481,6 +482,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
} else {
if (Subtarget.is64Bit())
setOperationAction(FPRndMode, MVT::f64, Custom);
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index be741f536ac757f..528bfe0411730ab 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -46,12 +46,10 @@ define float @fminnum32_intrinsic(float %x, float %y) {
define float @fminnum32_nsz_intrinsic(float %x, float %y) {
; ARMV7-LABEL: fminnum32_nsz_intrinsic:
; ARMV7: @ %bb.0:
-; ARMV7-NEXT: vmov s0, r0
-; ARMV7-NEXT: vmov s2, r1
-; ARMV7-NEXT: vcmp.f32 s0, s2
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovlt.f32 s2, s0
-; ARMV7-NEXT: vmov r0, s2
+; ARMV7-NEXT: vmov s0, r1
+; ARMV7-NEXT: vmov s2, r0
+; ARMV7-NEXT: vmin.f32 d0, d1, d0
+; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
; ARMV8-LABEL: fminnum32_nsz_intrinsic:
@@ -78,9 +76,7 @@ define float @fminnum32_non_zero_intrinsic(float %x) {
; ARMV7: @ %bb.0:
; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00
; ARMV7-NEXT: vmov s2, r0
-; ARMV7-NEXT: vcmp.f32 s2, s0
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovlt.f32 s0, s2
+; ARMV7-NEXT: vmin.f32 d0, d1, d0
; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
@@ -136,12 +132,10 @@ define float @fmaxnum32_intrinsic(float %x, float %y) {
define float @fmaxnum32_nsz_intrinsic(float %x, float %y) {
; ARMV7-LABEL: fmaxnum32_nsz_intrinsic:
; ARMV7: @ %bb.0:
-; ARMV7-NEXT: vmov s0, r0
-; ARMV7-NEXT: vmov s2, r1
-; ARMV7-NEXT: vcmp.f32 s0, s2
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovgt.f32 s2, s0
-; ARMV7-NEXT: vmov r0, s2
+; ARMV7-NEXT: vmov s0, r1
+; ARMV7-NEXT: vmov s2, r0
+; ARMV7-NEXT: vmax.f32 d0, d1, d0
+; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
; ARMV8-LABEL: fmaxnum32_nsz_intrinsic:
@@ -210,9 +204,7 @@ define float @fmaxnum32_non_zero_intrinsic(float %x) {
; ARMV7: @ %bb.0:
; ARMV7-NEXT: vmov.f32 s0, #1.000000e+00
; ARMV7-NEXT: vmov s2, r0
-; ARMV7-NEXT: vcmp.f32 s2, s0
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovgt.f32 s0, s2
+; ARMV7-NEXT: vmax.f32 d0, d1, d0
; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
new file mode 100644
index 000000000000000..6d9eb1337682740
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
+
+define fp128 @f128_minimum(fp128 %a, fp128 %b) {
+; CHECK-LABEL: f128_minimum:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscmpuqp 0, 2, 3
+; CHECK-NEXT: vmr 4, 2
+; CHECK-NEXT: bge 0, .LBB0_8
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bun 0, .LBB0_9
+; CHECK-NEXT: .LBB0_2: # %entry
+; CHECK-NEXT: xststdcqp 0, 2, 4
+; CHECK-NEXT: bc 4, 2, .LBB0_10
+; CHECK-NEXT: .LBB0_3: # %entry
+; CHECK-NEXT: xststdcqp 0, 3, 4
+; CHECK-NEXT: bc 12, 2, .LBB0_5
+; CHECK-NEXT: .LBB0_4: # %entry
+; CHECK-NEXT: vmr 3, 2
+; CHECK-NEXT: .LBB0_5: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: xscmpuqp 0, 4, 2
+; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: # %bb.6: # %entry
+; CHECK-NEXT: vmr 3, 4
+; CHECK-NEXT: .LBB0_7: # %entry
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_8: # %entry
+; CHECK-NEXT: vmr 4, 3
+; CHECK-NEXT: bnu 0, .LBB0_2
+; CHECK-NEXT: .LBB0_9:
+; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-NEXT: lxv 36, 0(3)
+; CHECK-NEXT: xststdcqp 0, 2, 4
+; CHECK-NEXT: bc 12, 2, .LBB0_3
+; CHECK-NEXT: .LBB0_10: # %entry
+; CHECK-NEXT: vmr 2, 4
+; CHECK-NEXT: xststdcqp 0, 3, 4
+; CHECK-NEXT: bc 4, 2, .LBB0_4
+; CHECK-NEXT: b .LBB0_5
+entry:
+ %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b)
+ ret fp128 %m
+}
+
+define fp128 @f128_maximum(fp128 %a, fp128 %b) {
+; CHECK-LABEL: f128_maximum:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscmpuqp 0, 2, 3
+; CHECK-NEXT: vmr 4, 2
+; CHECK-NEXT: ble 0, .LBB1_8
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bun 0, .LBB1_9
+; CHECK-NEXT: .LBB1_2: # %entry
+; CHECK-NEXT: xststdcqp 0, 2, 8
+; CHECK-NEXT: bc 4, 2, .LBB1_10
+; CHECK-NEXT: .LBB1_3: # %entry
+; CHECK-NEXT: xststdcqp 0, 3, 8
+; CHECK-NEXT: bc 12, 2, .LBB1_5
+; CHECK-NEXT: .LBB1_4: # %entry
+; CHECK-NEXT: vmr 3, 2
+; CHECK-NEXT: .LBB1_5: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: xscmpuqp 0, 4, 2
+; CHECK-NEXT: beq 0, .LBB1_7
+; CHECK-NEXT: # %bb.6: # %entry
+; CHECK-NEXT: vmr 3, 4
+; CHECK-NEXT: .LBB1_7: # %entry
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB1_8: # %entry
+; CHECK-NEXT: vmr 4, 3
+; CHECK-NEXT: bnu 0, .LBB1_2
+; CHECK-NEXT: .LBB1_9:
+; CHECK-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI1_0 at toc@l
+; CHECK-NEXT: lxv 36, 0(3)
+; CHECK-NEXT: xststdcqp 0, 2, 8
+; CHECK-NEXT: bc 12, 2, .LBB1_3
+; CHECK-NEXT: .LBB1_10: # %entry
+; CHECK-NEXT: vmr 2, 4
+; CHECK-NEXT: xststdcqp 0, 3, 8
+; CHECK-NEXT: bc 4, 2, .LBB1_4
+; CHECK-NEXT: b .LBB1_5
+entry:
+ %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b)
+ ret fp128 %m
+}
+
+declare fp128 @llvm.minimum.f128(fp128, fp128)
+declare fp128 @llvm.maximum.f128(fp128, fp128)
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
new file mode 100644
index 000000000000000..24fa7c716ea2951
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -0,0 +1,847 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefix=NOVSX
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefix=VSX
+; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr8 < %s | FileCheck %s --check-prefix=AIX
+
+define float @f32_minimum(float %a, float %b) {
+; NOVSX-LABEL: f32_minimum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: fmr 0, 1
+; NOVSX-NEXT: stfs 2, -8(1)
+; NOVSX-NEXT: stfs 1, -4(1)
+; NOVSX-NEXT: bc 12, 0, .LBB0_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB0_2: # %entry
+; NOVSX-NEXT: lwz 3, -4(1)
+; NOVSX-NEXT: bc 4, 3, .LBB0_4
+; NOVSX-NEXT: # %bb.3:
+; NOVSX-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI0_0 at toc@l(4)
+; NOVSX-NEXT: .LBB0_4: # %entry
+; NOVSX-NEXT: xoris 3, 3, 32768
+; NOVSX-NEXT: cmplwi 3, 0
+; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: bc 12, 2, .LBB0_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: .LBB0_6: # %entry
+; NOVSX-NEXT: xoris 3, 3, 32768
+; NOVSX-NEXT: cmplwi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB0_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
+; NOVSX-NEXT: .LBB0_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI0_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB0_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB0_10: # %entry
+; NOVSX-NEXT: fmr 1, 2
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: f32_minimum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: fcmpu 0, 1, 2
+; VSX-NEXT: xscvdpspn 0, 1
+; VSX-NEXT: xscvdpspn 3, 2
+; VSX-NEXT: mffprwz 3, 0
+; VSX-NEXT: bc 12, 3, .LBB0_2
+; VSX-NEXT: # %bb.1: # %entry
+; VSX-NEXT: xsmindp 0, 1, 2
+; VSX-NEXT: b .LBB0_3
+; VSX-NEXT: .LBB0_2:
+; VSX-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
+; VSX-NEXT: lfs 0, .LCPI0_0 at toc@l(4)
+; VSX-NEXT: .LBB0_3: # %entry
+; VSX-NEXT: xoris 3, 3, 32768
+; VSX-NEXT: cmplwi 3, 0
+; VSX-NEXT: mffprwz 3, 3
+; VSX-NEXT: bc 12, 2, .LBB0_5
+; VSX-NEXT: # %bb.4: # %entry
+; VSX-NEXT: fmr 1, 0
+; VSX-NEXT: .LBB0_5: # %entry
+; VSX-NEXT: xoris 3, 3, 32768
+; VSX-NEXT: cmplwi 3, 0
+; VSX-NEXT: bc 12, 2, .LBB0_7
+; VSX-NEXT: # %bb.6: # %entry
+; VSX-NEXT: fmr 2, 1
+; VSX-NEXT: .LBB0_7: # %entry
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: fcmpu 0, 0, 1
+; VSX-NEXT: bc 12, 2, .LBB0_9
+; VSX-NEXT: # %bb.8: # %entry
+; VSX-NEXT: fmr 2, 0
+; VSX-NEXT: .LBB0_9: # %entry
+; VSX-NEXT: fmr 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: f32_minimum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: fcmpu 0, 1, 2
+; AIX-NEXT: xscvdpspn 0, 1
+; AIX-NEXT: xscvdpspn 3, 2
+; AIX-NEXT: mffprwz 3, 0
+; AIX-NEXT: bc 12, 3, L..BB0_2
+; AIX-NEXT: # %bb.1: # %entry
+; AIX-NEXT: xsmindp 0, 1, 2
+; AIX-NEXT: b L..BB0_3
+; AIX-NEXT: L..BB0_2:
+; AIX-NEXT: ld 4, L..C0(2) # %const.0
+; AIX-NEXT: lfs 0, 0(4)
+; AIX-NEXT: L..BB0_3: # %entry
+; AIX-NEXT: xoris 3, 3, 32768
+; AIX-NEXT: cmplwi 3, 0
+; AIX-NEXT: mffprwz 3, 3
+; AIX-NEXT: bc 12, 2, L..BB0_5
+; AIX-NEXT: # %bb.4: # %entry
+; AIX-NEXT: fmr 1, 0
+; AIX-NEXT: L..BB0_5: # %entry
+; AIX-NEXT: xoris 3, 3, 32768
+; AIX-NEXT: cmplwi 3, 0
+; AIX-NEXT: bc 12, 2, L..BB0_7
+; AIX-NEXT: # %bb.6: # %entry
+; AIX-NEXT: fmr 2, 1
+; AIX-NEXT: L..BB0_7: # %entry
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: fcmpu 0, 0, 1
+; AIX-NEXT: bc 12, 2, L..BB0_9
+; AIX-NEXT: # %bb.8: # %entry
+; AIX-NEXT: fmr 2, 0
+; AIX-NEXT: L..BB0_9: # %entry
+; AIX-NEXT: fmr 1, 2
+; AIX-NEXT: blr
+entry:
+ %m = call float @llvm.minimum.f32(float %a, float %b)
+ ret float %m
+}
+
+define float @f32_maximum(float %a, float %b) {
+; NOVSX-LABEL: f32_maximum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: fmr 0, 1
+; NOVSX-NEXT: stfs 2, -8(1)
+; NOVSX-NEXT: stfs 1, -4(1)
+; NOVSX-NEXT: bc 12, 1, .LBB1_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB1_2: # %entry
+; NOVSX-NEXT: lwz 3, -4(1)
+; NOVSX-NEXT: bc 4, 3, .LBB1_4
+; NOVSX-NEXT: # %bb.3:
+; NOVSX-NEXT: addis 4, 2, .LCPI1_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI1_0 at toc@l(4)
+; NOVSX-NEXT: .LBB1_4: # %entry
+; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: bc 12, 2, .LBB1_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: .LBB1_6: # %entry
+; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB1_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
+; NOVSX-NEXT: .LBB1_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI1_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB1_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB1_10: # %entry
+; NOVSX-NEXT: fmr 1, 2
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: f32_maximum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: fcmpu 0, 1, 2
+; VSX-NEXT: xscvdpspn 0, 1
+; VSX-NEXT: xscvdpspn 3, 2
+; VSX-NEXT: mffprwz 3, 0
+; VSX-NEXT: bc 12, 3, .LBB1_2
+; VSX-NEXT: # %bb.1: # %entry
+; VSX-NEXT: xsmaxdp 0, 1, 2
+; VSX-NEXT: b .LBB1_3
+; VSX-NEXT: .LBB1_2:
+; VSX-NEXT: addis 4, 2, .LCPI1_0 at toc@ha
+; VSX-NEXT: lfs 0, .LCPI1_0 at toc@l(4)
+; VSX-NEXT: .LBB1_3: # %entry
+; VSX-NEXT: cmpwi 3, 0
+; VSX-NEXT: mffprwz 3, 3
+; VSX-NEXT: bc 12, 2, .LBB1_5
+; VSX-NEXT: # %bb.4: # %entry
+; VSX-NEXT: fmr 1, 0
+; VSX-NEXT: .LBB1_5: # %entry
+; VSX-NEXT: cmpwi 3, 0
+; VSX-NEXT: bc 12, 2, .LBB1_7
+; VSX-NEXT: # %bb.6: # %entry
+; VSX-NEXT: fmr 2, 1
+; VSX-NEXT: .LBB1_7: # %entry
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: fcmpu 0, 0, 1
+; VSX-NEXT: bc 12, 2, .LBB1_9
+; VSX-NEXT: # %bb.8: # %entry
+; VSX-NEXT: fmr 2, 0
+; VSX-NEXT: .LBB1_9: # %entry
+; VSX-NEXT: fmr 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: f32_maximum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: fcmpu 0, 1, 2
+; AIX-NEXT: xscvdpspn 0, 1
+; AIX-NEXT: xscvdpspn 3, 2
+; AIX-NEXT: mffprwz 3, 0
+; AIX-NEXT: bc 12, 3, L..BB1_2
+; AIX-NEXT: # %bb.1: # %entry
+; AI...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/67301
More information about the llvm-commits
mailing list