[flang-commits] [flang] [clang-tools-extra] [compiler-rt] [clang] [llvm] [Legalizer] Expand fmaximum and fminimum (PR #67301)
Qiu Chaofan via flang-commits
flang-commits at lists.llvm.org
Mon Nov 6 01:23:28 PST 2023
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/67301
>From 92abb76631594dfc2ca586c46c38031610be0548 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Mon, 25 Sep 2023 17:08:59 +0800
Subject: [PATCH 1/3] [Legalizer] Expand fmaximum and fminimum
According to langref, llvm.maximum/minimum has -0.0 < +0.0 semantics and
propagates NaN.
Expand the nodes on targets not supporting the operation, by adding
extra check for NaN and using is_fpclass to check zero signs.
---
llvm/include/llvm/CodeGen/TargetLowering.h | 3 +
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 +
.../SelectionDAG/LegalizeVectorOps.cpp | 7 +
.../CodeGen/SelectionDAG/TargetLowering.cpp | 58 ++
llvm/lib/Target/ARM/ARMISelLowering.cpp | 14 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +-
.../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 28 +-
.../CodeGen/PowerPC/fminimum-fmaximum-f128.ll | 97 ++
.../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 847 ++++++++++++++++++
9 files changed, 1039 insertions(+), 31 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
create mode 100644 llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c6a7aa17146dd4f..429cfd72af2e6e0 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5089,6 +5089,9 @@ class TargetLowering : public TargetLoweringBase {
/// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand fminimum/fmaximum into multiple comparison with selects.
+ SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
+
/// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
/// \param N Node to expand
/// \returns The expansion result
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f19beea3a3ed8b7..33f6354d5584540 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3540,6 +3540,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Expanded);
break;
}
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index dec81475f3a88fc..db132035adcf293 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -949,6 +949,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 39489e0bf142eb2..23de9829b5e9ffd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8177,6 +8177,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned Opc = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ bool NoNaN = (N->getFlags().hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)));
+ bool NoZeroSign =
+ (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
+ DAG.isKnownNeverZeroFloat(RHS));
+ bool IsMax = Opc == ISD::FMAXIMUM;
+
+ if (VT.isVector() &&
+ isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
+ return SDValue();
+
+ SDValue MinMax;
+ if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE,
+ VT))
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT,
+ LHS, RHS);
+ else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, VT))
+ MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS);
+ else
+ MinMax = DAG.getSelect(
+ DL, VT,
+ DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT), LHS,
+ RHS);
+
+ // Propagate any NaN of both operands
+ if (!NoNaN) {
+ ConstantFP *FPNaN = ConstantFP::get(
+ *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
+ MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
+ DAG.getConstantFP(*FPNaN, DL, VT), MinMax);
+ }
+
+ // fminimum/fmaximum requires -0.0 less than +0.0
+ if (!NoZeroSign) {
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+ DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+ SDValue TestZero =
+ DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+ SDValue LCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+ MinMax);
+ SDValue RCmp = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
+ LCmp);
+ MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax);
+ }
+
+ return MinMax;
+}
+
/// Returns a true value if if this FPClassTest can be performed with an ordered
/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
/// std::nullopt if it cannot be performed as a compare with 0.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 69ef942df1f6e78..9eac62175ee5fc4 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1549,15 +1549,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we can use
- // a NEON instruction with an undef lane instead. This has a performance
- // penalty on some cores, so we don't do this unless we have been
- // asked to by the core tuning model.
- if (Subtarget->useNEONForSinglePrecisionFP()) {
- setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
- }
+ // a NEON instruction with an undef lane instead.
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8e6644821031c17..2c2f3926fc93c58 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -440,8 +440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZfa())
- setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
+ Subtarget.hasStdExtZfa() ? Legal : Custom);
}
if (Subtarget.hasStdExtFOrZfinx()) {
@@ -464,9 +464,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
- if (Subtarget.hasStdExtZfa())
+ if (Subtarget.hasStdExtZfa()) {
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- else
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
+ } else
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
}
@@ -481,6 +482,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
} else {
if (Subtarget.is64Bit())
setOperationAction(FPRndMode, MVT::f64, Custom);
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index be741f536ac757f..528bfe0411730ab 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -46,12 +46,10 @@ define float @fminnum32_intrinsic(float %x, float %y) {
define float @fminnum32_nsz_intrinsic(float %x, float %y) {
; ARMV7-LABEL: fminnum32_nsz_intrinsic:
; ARMV7: @ %bb.0:
-; ARMV7-NEXT: vmov s0, r0
-; ARMV7-NEXT: vmov s2, r1
-; ARMV7-NEXT: vcmp.f32 s0, s2
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovlt.f32 s2, s0
-; ARMV7-NEXT: vmov r0, s2
+; ARMV7-NEXT: vmov s0, r1
+; ARMV7-NEXT: vmov s2, r0
+; ARMV7-NEXT: vmin.f32 d0, d1, d0
+; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
; ARMV8-LABEL: fminnum32_nsz_intrinsic:
@@ -78,9 +76,7 @@ define float @fminnum32_non_zero_intrinsic(float %x) {
; ARMV7: @ %bb.0:
; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00
; ARMV7-NEXT: vmov s2, r0
-; ARMV7-NEXT: vcmp.f32 s2, s0
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovlt.f32 s0, s2
+; ARMV7-NEXT: vmin.f32 d0, d1, d0
; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
@@ -136,12 +132,10 @@ define float @fmaxnum32_intrinsic(float %x, float %y) {
define float @fmaxnum32_nsz_intrinsic(float %x, float %y) {
; ARMV7-LABEL: fmaxnum32_nsz_intrinsic:
; ARMV7: @ %bb.0:
-; ARMV7-NEXT: vmov s0, r0
-; ARMV7-NEXT: vmov s2, r1
-; ARMV7-NEXT: vcmp.f32 s0, s2
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovgt.f32 s2, s0
-; ARMV7-NEXT: vmov r0, s2
+; ARMV7-NEXT: vmov s0, r1
+; ARMV7-NEXT: vmov s2, r0
+; ARMV7-NEXT: vmax.f32 d0, d1, d0
+; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
; ARMV8-LABEL: fmaxnum32_nsz_intrinsic:
@@ -210,9 +204,7 @@ define float @fmaxnum32_non_zero_intrinsic(float %x) {
; ARMV7: @ %bb.0:
; ARMV7-NEXT: vmov.f32 s0, #1.000000e+00
; ARMV7-NEXT: vmov s2, r0
-; ARMV7-NEXT: vcmp.f32 s2, s0
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovgt.f32 s0, s2
+; ARMV7-NEXT: vmax.f32 d0, d1, d0
; ARMV7-NEXT: vmov r0, s0
; ARMV7-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
new file mode 100644
index 000000000000000..6d9eb1337682740
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
+
+define fp128 @f128_minimum(fp128 %a, fp128 %b) {
+; CHECK-LABEL: f128_minimum:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscmpuqp 0, 2, 3
+; CHECK-NEXT: vmr 4, 2
+; CHECK-NEXT: bge 0, .LBB0_8
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bun 0, .LBB0_9
+; CHECK-NEXT: .LBB0_2: # %entry
+; CHECK-NEXT: xststdcqp 0, 2, 4
+; CHECK-NEXT: bc 4, 2, .LBB0_10
+; CHECK-NEXT: .LBB0_3: # %entry
+; CHECK-NEXT: xststdcqp 0, 3, 4
+; CHECK-NEXT: bc 12, 2, .LBB0_5
+; CHECK-NEXT: .LBB0_4: # %entry
+; CHECK-NEXT: vmr 3, 2
+; CHECK-NEXT: .LBB0_5: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: xscmpuqp 0, 4, 2
+; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: # %bb.6: # %entry
+; CHECK-NEXT: vmr 3, 4
+; CHECK-NEXT: .LBB0_7: # %entry
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB0_8: # %entry
+; CHECK-NEXT: vmr 4, 3
+; CHECK-NEXT: bnu 0, .LBB0_2
+; CHECK-NEXT: .LBB0_9:
+; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-NEXT: lxv 36, 0(3)
+; CHECK-NEXT: xststdcqp 0, 2, 4
+; CHECK-NEXT: bc 12, 2, .LBB0_3
+; CHECK-NEXT: .LBB0_10: # %entry
+; CHECK-NEXT: vmr 2, 4
+; CHECK-NEXT: xststdcqp 0, 3, 4
+; CHECK-NEXT: bc 4, 2, .LBB0_4
+; CHECK-NEXT: b .LBB0_5
+entry:
+ %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b)
+ ret fp128 %m
+}
+
+define fp128 @f128_maximum(fp128 %a, fp128 %b) {
+; CHECK-LABEL: f128_maximum:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscmpuqp 0, 2, 3
+; CHECK-NEXT: vmr 4, 2
+; CHECK-NEXT: ble 0, .LBB1_8
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bun 0, .LBB1_9
+; CHECK-NEXT: .LBB1_2: # %entry
+; CHECK-NEXT: xststdcqp 0, 2, 8
+; CHECK-NEXT: bc 4, 2, .LBB1_10
+; CHECK-NEXT: .LBB1_3: # %entry
+; CHECK-NEXT: xststdcqp 0, 3, 8
+; CHECK-NEXT: bc 12, 2, .LBB1_5
+; CHECK-NEXT: .LBB1_4: # %entry
+; CHECK-NEXT: vmr 3, 2
+; CHECK-NEXT: .LBB1_5: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-NEXT: lxv 34, 0(3)
+; CHECK-NEXT: xscmpuqp 0, 4, 2
+; CHECK-NEXT: beq 0, .LBB1_7
+; CHECK-NEXT: # %bb.6: # %entry
+; CHECK-NEXT: vmr 3, 4
+; CHECK-NEXT: .LBB1_7: # %entry
+; CHECK-NEXT: vmr 2, 3
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB1_8: # %entry
+; CHECK-NEXT: vmr 4, 3
+; CHECK-NEXT: bnu 0, .LBB1_2
+; CHECK-NEXT: .LBB1_9:
+; CHECK-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI1_0 at toc@l
+; CHECK-NEXT: lxv 36, 0(3)
+; CHECK-NEXT: xststdcqp 0, 2, 8
+; CHECK-NEXT: bc 12, 2, .LBB1_3
+; CHECK-NEXT: .LBB1_10: # %entry
+; CHECK-NEXT: vmr 2, 4
+; CHECK-NEXT: xststdcqp 0, 3, 8
+; CHECK-NEXT: bc 4, 2, .LBB1_4
+; CHECK-NEXT: b .LBB1_5
+entry:
+ %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b)
+ ret fp128 %m
+}
+
+declare fp128 @llvm.minimum.f128(fp128, fp128)
+declare fp128 @llvm.maximum.f128(fp128, fp128)
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
new file mode 100644
index 000000000000000..24fa7c716ea2951
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -0,0 +1,847 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefix=NOVSX
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefix=VSX
+; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr8 < %s | FileCheck %s --check-prefix=AIX
+
+define float @f32_minimum(float %a, float %b) {
+; NOVSX-LABEL: f32_minimum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: fmr 0, 1
+; NOVSX-NEXT: stfs 2, -8(1)
+; NOVSX-NEXT: stfs 1, -4(1)
+; NOVSX-NEXT: bc 12, 0, .LBB0_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB0_2: # %entry
+; NOVSX-NEXT: lwz 3, -4(1)
+; NOVSX-NEXT: bc 4, 3, .LBB0_4
+; NOVSX-NEXT: # %bb.3:
+; NOVSX-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI0_0 at toc@l(4)
+; NOVSX-NEXT: .LBB0_4: # %entry
+; NOVSX-NEXT: xoris 3, 3, 32768
+; NOVSX-NEXT: cmplwi 3, 0
+; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: bc 12, 2, .LBB0_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: .LBB0_6: # %entry
+; NOVSX-NEXT: xoris 3, 3, 32768
+; NOVSX-NEXT: cmplwi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB0_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
+; NOVSX-NEXT: .LBB0_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI0_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB0_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB0_10: # %entry
+; NOVSX-NEXT: fmr 1, 2
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: f32_minimum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: fcmpu 0, 1, 2
+; VSX-NEXT: xscvdpspn 0, 1
+; VSX-NEXT: xscvdpspn 3, 2
+; VSX-NEXT: mffprwz 3, 0
+; VSX-NEXT: bc 12, 3, .LBB0_2
+; VSX-NEXT: # %bb.1: # %entry
+; VSX-NEXT: xsmindp 0, 1, 2
+; VSX-NEXT: b .LBB0_3
+; VSX-NEXT: .LBB0_2:
+; VSX-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
+; VSX-NEXT: lfs 0, .LCPI0_0 at toc@l(4)
+; VSX-NEXT: .LBB0_3: # %entry
+; VSX-NEXT: xoris 3, 3, 32768
+; VSX-NEXT: cmplwi 3, 0
+; VSX-NEXT: mffprwz 3, 3
+; VSX-NEXT: bc 12, 2, .LBB0_5
+; VSX-NEXT: # %bb.4: # %entry
+; VSX-NEXT: fmr 1, 0
+; VSX-NEXT: .LBB0_5: # %entry
+; VSX-NEXT: xoris 3, 3, 32768
+; VSX-NEXT: cmplwi 3, 0
+; VSX-NEXT: bc 12, 2, .LBB0_7
+; VSX-NEXT: # %bb.6: # %entry
+; VSX-NEXT: fmr 2, 1
+; VSX-NEXT: .LBB0_7: # %entry
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: fcmpu 0, 0, 1
+; VSX-NEXT: bc 12, 2, .LBB0_9
+; VSX-NEXT: # %bb.8: # %entry
+; VSX-NEXT: fmr 2, 0
+; VSX-NEXT: .LBB0_9: # %entry
+; VSX-NEXT: fmr 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: f32_minimum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: fcmpu 0, 1, 2
+; AIX-NEXT: xscvdpspn 0, 1
+; AIX-NEXT: xscvdpspn 3, 2
+; AIX-NEXT: mffprwz 3, 0
+; AIX-NEXT: bc 12, 3, L..BB0_2
+; AIX-NEXT: # %bb.1: # %entry
+; AIX-NEXT: xsmindp 0, 1, 2
+; AIX-NEXT: b L..BB0_3
+; AIX-NEXT: L..BB0_2:
+; AIX-NEXT: ld 4, L..C0(2) # %const.0
+; AIX-NEXT: lfs 0, 0(4)
+; AIX-NEXT: L..BB0_3: # %entry
+; AIX-NEXT: xoris 3, 3, 32768
+; AIX-NEXT: cmplwi 3, 0
+; AIX-NEXT: mffprwz 3, 3
+; AIX-NEXT: bc 12, 2, L..BB0_5
+; AIX-NEXT: # %bb.4: # %entry
+; AIX-NEXT: fmr 1, 0
+; AIX-NEXT: L..BB0_5: # %entry
+; AIX-NEXT: xoris 3, 3, 32768
+; AIX-NEXT: cmplwi 3, 0
+; AIX-NEXT: bc 12, 2, L..BB0_7
+; AIX-NEXT: # %bb.6: # %entry
+; AIX-NEXT: fmr 2, 1
+; AIX-NEXT: L..BB0_7: # %entry
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: fcmpu 0, 0, 1
+; AIX-NEXT: bc 12, 2, L..BB0_9
+; AIX-NEXT: # %bb.8: # %entry
+; AIX-NEXT: fmr 2, 0
+; AIX-NEXT: L..BB0_9: # %entry
+; AIX-NEXT: fmr 1, 2
+; AIX-NEXT: blr
+entry:
+ %m = call float @llvm.minimum.f32(float %a, float %b)
+ ret float %m
+}
+
+define float @f32_maximum(float %a, float %b) {
+; NOVSX-LABEL: f32_maximum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: fmr 0, 1
+; NOVSX-NEXT: stfs 2, -8(1)
+; NOVSX-NEXT: stfs 1, -4(1)
+; NOVSX-NEXT: bc 12, 1, .LBB1_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB1_2: # %entry
+; NOVSX-NEXT: lwz 3, -4(1)
+; NOVSX-NEXT: bc 4, 3, .LBB1_4
+; NOVSX-NEXT: # %bb.3:
+; NOVSX-NEXT: addis 4, 2, .LCPI1_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI1_0 at toc@l(4)
+; NOVSX-NEXT: .LBB1_4: # %entry
+; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: bc 12, 2, .LBB1_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: .LBB1_6: # %entry
+; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB1_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
+; NOVSX-NEXT: .LBB1_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI1_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB1_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB1_10: # %entry
+; NOVSX-NEXT: fmr 1, 2
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: f32_maximum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: fcmpu 0, 1, 2
+; VSX-NEXT: xscvdpspn 0, 1
+; VSX-NEXT: xscvdpspn 3, 2
+; VSX-NEXT: mffprwz 3, 0
+; VSX-NEXT: bc 12, 3, .LBB1_2
+; VSX-NEXT: # %bb.1: # %entry
+; VSX-NEXT: xsmaxdp 0, 1, 2
+; VSX-NEXT: b .LBB1_3
+; VSX-NEXT: .LBB1_2:
+; VSX-NEXT: addis 4, 2, .LCPI1_0 at toc@ha
+; VSX-NEXT: lfs 0, .LCPI1_0 at toc@l(4)
+; VSX-NEXT: .LBB1_3: # %entry
+; VSX-NEXT: cmpwi 3, 0
+; VSX-NEXT: mffprwz 3, 3
+; VSX-NEXT: bc 12, 2, .LBB1_5
+; VSX-NEXT: # %bb.4: # %entry
+; VSX-NEXT: fmr 1, 0
+; VSX-NEXT: .LBB1_5: # %entry
+; VSX-NEXT: cmpwi 3, 0
+; VSX-NEXT: bc 12, 2, .LBB1_7
+; VSX-NEXT: # %bb.6: # %entry
+; VSX-NEXT: fmr 2, 1
+; VSX-NEXT: .LBB1_7: # %entry
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: fcmpu 0, 0, 1
+; VSX-NEXT: bc 12, 2, .LBB1_9
+; VSX-NEXT: # %bb.8: # %entry
+; VSX-NEXT: fmr 2, 0
+; VSX-NEXT: .LBB1_9: # %entry
+; VSX-NEXT: fmr 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: f32_maximum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: fcmpu 0, 1, 2
+; AIX-NEXT: xscvdpspn 0, 1
+; AIX-NEXT: xscvdpspn 3, 2
+; AIX-NEXT: mffprwz 3, 0
+; AIX-NEXT: bc 12, 3, L..BB1_2
+; AIX-NEXT: # %bb.1: # %entry
+; AIX-NEXT: xsmaxdp 0, 1, 2
+; AIX-NEXT: b L..BB1_3
+; AIX-NEXT: L..BB1_2:
+; AIX-NEXT: ld 4, L..C1(2) # %const.0
+; AIX-NEXT: lfs 0, 0(4)
+; AIX-NEXT: L..BB1_3: # %entry
+; AIX-NEXT: cmpwi 3, 0
+; AIX-NEXT: mffprwz 3, 3
+; AIX-NEXT: bc 12, 2, L..BB1_5
+; AIX-NEXT: # %bb.4: # %entry
+; AIX-NEXT: fmr 1, 0
+; AIX-NEXT: L..BB1_5: # %entry
+; AIX-NEXT: cmpwi 3, 0
+; AIX-NEXT: bc 12, 2, L..BB1_7
+; AIX-NEXT: # %bb.6: # %entry
+; AIX-NEXT: fmr 2, 1
+; AIX-NEXT: L..BB1_7: # %entry
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: fcmpu 0, 0, 1
+; AIX-NEXT: bc 12, 2, L..BB1_9
+; AIX-NEXT: # %bb.8: # %entry
+; AIX-NEXT: fmr 2, 0
+; AIX-NEXT: L..BB1_9: # %entry
+; AIX-NEXT: fmr 1, 2
+; AIX-NEXT: blr
+entry:
+ %m = call float @llvm.maximum.f32(float %a, float %b)
+ ret float %m
+}
+
+define double @f64_minimum(double %a, double %b) {
+; NOVSX-LABEL: f64_minimum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: fmr 0, 1
+; NOVSX-NEXT: stfd 2, -16(1)
+; NOVSX-NEXT: stfd 1, -8(1)
+; NOVSX-NEXT: bc 12, 0, .LBB2_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB2_2: # %entry
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: bc 4, 3, .LBB2_4
+; NOVSX-NEXT: # %bb.3:
+; NOVSX-NEXT: addis 4, 2, .LCPI2_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI2_0 at toc@l(4)
+; NOVSX-NEXT: .LBB2_4: # %entry
+; NOVSX-NEXT: li 4, 1
+; NOVSX-NEXT: rldic 4, 4, 63, 0
+; NOVSX-NEXT: cmpd 3, 4
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: bc 12, 2, .LBB2_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: .LBB2_6: # %entry
+; NOVSX-NEXT: cmpd 3, 4
+; NOVSX-NEXT: bc 12, 2, .LBB2_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
+; NOVSX-NEXT: .LBB2_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI2_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI2_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB2_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB2_10: # %entry
+; NOVSX-NEXT: fmr 1, 2
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: f64_minimum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: fcmpu 0, 1, 2
+; VSX-NEXT: mffprd 3, 1
+; VSX-NEXT: bc 12, 3, .LBB2_2
+; VSX-NEXT: # %bb.1: # %entry
+; VSX-NEXT: xsmindp 0, 1, 2
+; VSX-NEXT: b .LBB2_3
+; VSX-NEXT: .LBB2_2:
+; VSX-NEXT: addis 4, 2, .LCPI2_0 at toc@ha
+; VSX-NEXT: lfs 0, .LCPI2_0 at toc@l(4)
+; VSX-NEXT: .LBB2_3: # %entry
+; VSX-NEXT: li 4, 1
+; VSX-NEXT: rldic 4, 4, 63, 0
+; VSX-NEXT: cmpd 3, 4
+; VSX-NEXT: mffprd 3, 2
+; VSX-NEXT: bc 12, 2, .LBB2_5
+; VSX-NEXT: # %bb.4: # %entry
+; VSX-NEXT: fmr 1, 0
+; VSX-NEXT: .LBB2_5: # %entry
+; VSX-NEXT: cmpd 3, 4
+; VSX-NEXT: bc 12, 2, .LBB2_7
+; VSX-NEXT: # %bb.6: # %entry
+; VSX-NEXT: fmr 2, 1
+; VSX-NEXT: .LBB2_7: # %entry
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: fcmpu 0, 0, 1
+; VSX-NEXT: bc 12, 2, .LBB2_9
+; VSX-NEXT: # %bb.8: # %entry
+; VSX-NEXT: fmr 2, 0
+; VSX-NEXT: .LBB2_9: # %entry
+; VSX-NEXT: fmr 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: f64_minimum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: fcmpu 0, 1, 2
+; AIX-NEXT: mffprd 3, 1
+; AIX-NEXT: bc 12, 3, L..BB2_2
+; AIX-NEXT: # %bb.1: # %entry
+; AIX-NEXT: xsmindp 0, 1, 2
+; AIX-NEXT: b L..BB2_3
+; AIX-NEXT: L..BB2_2:
+; AIX-NEXT: ld 4, L..C2(2) # %const.0
+; AIX-NEXT: lfs 0, 0(4)
+; AIX-NEXT: L..BB2_3: # %entry
+; AIX-NEXT: li 4, 1
+; AIX-NEXT: rldic 4, 4, 63, 0
+; AIX-NEXT: cmpd 3, 4
+; AIX-NEXT: mffprd 3, 2
+; AIX-NEXT: bc 12, 2, L..BB2_5
+; AIX-NEXT: # %bb.4: # %entry
+; AIX-NEXT: fmr 1, 0
+; AIX-NEXT: L..BB2_5: # %entry
+; AIX-NEXT: cmpd 3, 4
+; AIX-NEXT: bc 12, 2, L..BB2_7
+; AIX-NEXT: # %bb.6: # %entry
+; AIX-NEXT: fmr 2, 1
+; AIX-NEXT: L..BB2_7: # %entry
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: fcmpu 0, 0, 1
+; AIX-NEXT: bc 12, 2, L..BB2_9
+; AIX-NEXT: # %bb.8: # %entry
+; AIX-NEXT: fmr 2, 0
+; AIX-NEXT: L..BB2_9: # %entry
+; AIX-NEXT: fmr 1, 2
+; AIX-NEXT: blr
+entry:
+ %m = call double @llvm.minimum.f64(double %a, double %b)
+ ret double %m
+}
+
+define double @f64_maximum(double %a, double %b) {
+; NOVSX-LABEL: f64_maximum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 2
+; NOVSX-NEXT: fmr 0, 1
+; NOVSX-NEXT: stfd 2, -16(1)
+; NOVSX-NEXT: stfd 1, -8(1)
+; NOVSX-NEXT: bc 12, 1, .LBB3_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 0, 2
+; NOVSX-NEXT: .LBB3_2: # %entry
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: bc 4, 3, .LBB3_4
+; NOVSX-NEXT: # %bb.3:
+; NOVSX-NEXT: addis 4, 2, .LCPI3_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI3_0 at toc@l(4)
+; NOVSX-NEXT: .LBB3_4: # %entry
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: bc 12, 2, .LBB3_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 0
+; NOVSX-NEXT: .LBB3_6: # %entry
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB3_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 2, 1
+; NOVSX-NEXT: .LBB3_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI3_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI3_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB3_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: .LBB3_10: # %entry
+; NOVSX-NEXT: fmr 1, 2
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: f64_maximum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: fcmpu 0, 1, 2
+; VSX-NEXT: mffprd 3, 1
+; VSX-NEXT: bc 12, 3, .LBB3_2
+; VSX-NEXT: # %bb.1: # %entry
+; VSX-NEXT: xsmaxdp 0, 1, 2
+; VSX-NEXT: b .LBB3_3
+; VSX-NEXT: .LBB3_2:
+; VSX-NEXT: addis 4, 2, .LCPI3_0 at toc@ha
+; VSX-NEXT: lfs 0, .LCPI3_0 at toc@l(4)
+; VSX-NEXT: .LBB3_3: # %entry
+; VSX-NEXT: cmpdi 3, 0
+; VSX-NEXT: mffprd 3, 2
+; VSX-NEXT: bc 12, 2, .LBB3_5
+; VSX-NEXT: # %bb.4: # %entry
+; VSX-NEXT: fmr 1, 0
+; VSX-NEXT: .LBB3_5: # %entry
+; VSX-NEXT: cmpdi 3, 0
+; VSX-NEXT: bc 12, 2, .LBB3_7
+; VSX-NEXT: # %bb.6: # %entry
+; VSX-NEXT: fmr 2, 1
+; VSX-NEXT: .LBB3_7: # %entry
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: fcmpu 0, 0, 1
+; VSX-NEXT: bc 12, 2, .LBB3_9
+; VSX-NEXT: # %bb.8: # %entry
+; VSX-NEXT: fmr 2, 0
+; VSX-NEXT: .LBB3_9: # %entry
+; VSX-NEXT: fmr 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: f64_maximum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: fcmpu 0, 1, 2
+; AIX-NEXT: mffprd 3, 1
+; AIX-NEXT: bc 12, 3, L..BB3_2
+; AIX-NEXT: # %bb.1: # %entry
+; AIX-NEXT: xsmaxdp 0, 1, 2
+; AIX-NEXT: b L..BB3_3
+; AIX-NEXT: L..BB3_2:
+; AIX-NEXT: ld 4, L..C3(2) # %const.0
+; AIX-NEXT: lfs 0, 0(4)
+; AIX-NEXT: L..BB3_3: # %entry
+; AIX-NEXT: cmpdi 3, 0
+; AIX-NEXT: mffprd 3, 2
+; AIX-NEXT: bc 12, 2, L..BB3_5
+; AIX-NEXT: # %bb.4: # %entry
+; AIX-NEXT: fmr 1, 0
+; AIX-NEXT: L..BB3_5: # %entry
+; AIX-NEXT: cmpdi 3, 0
+; AIX-NEXT: bc 12, 2, L..BB3_7
+; AIX-NEXT: # %bb.6: # %entry
+; AIX-NEXT: fmr 2, 1
+; AIX-NEXT: L..BB3_7: # %entry
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: fcmpu 0, 0, 1
+; AIX-NEXT: bc 12, 2, L..BB3_9
+; AIX-NEXT: # %bb.8: # %entry
+; AIX-NEXT: fmr 2, 0
+; AIX-NEXT: L..BB3_9: # %entry
+; AIX-NEXT: fmr 1, 2
+; AIX-NEXT: blr
+entry:
+ %m = call double @llvm.maximum.f64(double %a, double %b)
+ ret double %m
+}
+
+define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
+; NOVSX-LABEL: v4f32_minimum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: vcmpeqfp 5, 3, 3
+; NOVSX-NEXT: vspltisb 4, -1
+; NOVSX-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
+; NOVSX-NEXT: vcmpeqfp 0, 2, 2
+; NOVSX-NEXT: addi 3, 3, .LCPI4_0 at toc@l
+; NOVSX-NEXT: vcmpgtfp 1, 3, 2
+; NOVSX-NEXT: vslw 4, 4, 4
+; NOVSX-NEXT: vnot 5, 5
+; NOVSX-NEXT: vnot 0, 0
+; NOVSX-NEXT: vsel 1, 3, 2, 1
+; NOVSX-NEXT: vor 5, 0, 5
+; NOVSX-NEXT: lvx 0, 0, 3
+; NOVSX-NEXT: vsel 5, 1, 0, 5
+; NOVSX-NEXT: vcmpequw 0, 2, 4
+; NOVSX-NEXT: vcmpequw 4, 3, 4
+; NOVSX-NEXT: vsel 2, 5, 2, 0
+; NOVSX-NEXT: vxor 0, 0, 0
+; NOVSX-NEXT: vsel 2, 2, 3, 4
+; NOVSX-NEXT: vcmpeqfp 3, 5, 0
+; NOVSX-NEXT: vsel 2, 5, 2, 3
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: v4f32_minimum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: xxleqv 36, 36, 36
+; VSX-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
+; VSX-NEXT: xvcmpeqsp 0, 35, 35
+; VSX-NEXT: addi 3, 3, .LCPI4_0 at toc@l
+; VSX-NEXT: xvcmpeqsp 1, 34, 34
+; VSX-NEXT: lxvd2x 3, 0, 3
+; VSX-NEXT: vslw 4, 4, 4
+; VSX-NEXT: xvminsp 2, 34, 35
+; VSX-NEXT: xxlnor 0, 0, 0
+; VSX-NEXT: xxlnor 1, 1, 1
+; VSX-NEXT: vcmpequw 5, 2, 4
+; VSX-NEXT: xxlor 0, 1, 0
+; VSX-NEXT: vcmpequw 4, 3, 4
+; VSX-NEXT: xxsel 0, 2, 3, 0
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: xxsel 2, 0, 34, 37
+; VSX-NEXT: xvcmpeqsp 1, 0, 1
+; VSX-NEXT: xxsel 2, 2, 35, 36
+; VSX-NEXT: xxsel 34, 0, 2, 1
+; VSX-NEXT: blr
+;
+; AIX-LABEL: v4f32_minimum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: xxleqv 36, 36, 36
+; AIX-NEXT: ld 3, L..C4(2) # %const.0
+; AIX-NEXT: xvcmpeqsp 0, 35, 35
+; AIX-NEXT: xvcmpeqsp 1, 34, 34
+; AIX-NEXT: vslw 4, 4, 4
+; AIX-NEXT: lxvw4x 3, 0, 3
+; AIX-NEXT: xvminsp 2, 34, 35
+; AIX-NEXT: xxlnor 0, 0, 0
+; AIX-NEXT: xxlnor 1, 1, 1
+; AIX-NEXT: vcmpequw 5, 2, 4
+; AIX-NEXT: xxlor 0, 1, 0
+; AIX-NEXT: vcmpequw 4, 3, 4
+; AIX-NEXT: xxsel 0, 2, 3, 0
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: xxsel 2, 0, 34, 37
+; AIX-NEXT: xvcmpeqsp 1, 0, 1
+; AIX-NEXT: xxsel 2, 2, 35, 36
+; AIX-NEXT: xxsel 34, 0, 2, 1
+; AIX-NEXT: blr
+entry:
+ %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %m
+}
+
+define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
+; NOVSX-LABEL: v4f32_maximum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: vcmpeqfp 4, 3, 3
+; NOVSX-NEXT: addis 3, 2, .LCPI5_0 at toc@ha
+; NOVSX-NEXT: vcmpeqfp 5, 2, 2
+; NOVSX-NEXT: addi 3, 3, .LCPI5_0 at toc@l
+; NOVSX-NEXT: vcmpgtfp 0, 2, 3
+; NOVSX-NEXT: lvx 1, 0, 3
+; NOVSX-NEXT: vnot 4, 4
+; NOVSX-NEXT: vnot 5, 5
+; NOVSX-NEXT: vsel 0, 3, 2, 0
+; NOVSX-NEXT: vor 4, 5, 4
+; NOVSX-NEXT: vxor 5, 5, 5
+; NOVSX-NEXT: vsel 4, 0, 1, 4
+; NOVSX-NEXT: vcmpequw 0, 2, 5
+; NOVSX-NEXT: vsel 2, 4, 2, 0
+; NOVSX-NEXT: vcmpequw 0, 3, 5
+; NOVSX-NEXT: vsel 2, 2, 3, 0
+; NOVSX-NEXT: vcmpeqfp 3, 4, 5
+; NOVSX-NEXT: vsel 2, 4, 2, 3
+; NOVSX-NEXT: blr
+;
+; VSX-LABEL: v4f32_maximum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: xvcmpeqsp 0, 35, 35
+; VSX-NEXT: addis 3, 2, .LCPI5_0 at toc@ha
+; VSX-NEXT: xvcmpeqsp 1, 34, 34
+; VSX-NEXT: addi 3, 3, .LCPI5_0 at toc@l
+; VSX-NEXT: xvmaxsp 2, 34, 35
+; VSX-NEXT: lxvd2x 3, 0, 3
+; VSX-NEXT: xxlxor 36, 36, 36
+; VSX-NEXT: vcmpequw 5, 2, 4
+; VSX-NEXT: xxlnor 0, 0, 0
+; VSX-NEXT: xxlnor 1, 1, 1
+; VSX-NEXT: vcmpequw 0, 3, 4
+; VSX-NEXT: xxlor 0, 1, 0
+; VSX-NEXT: xxsel 0, 2, 3, 0
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: xvcmpeqsp 2, 0, 36
+; VSX-NEXT: xxsel 1, 1, 35, 32
+; VSX-NEXT: xxsel 34, 0, 1, 2
+; VSX-NEXT: blr
+;
+; AIX-LABEL: v4f32_maximum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: xvcmpeqsp 0, 35, 35
+; AIX-NEXT: ld 3, L..C5(2) # %const.0
+; AIX-NEXT: xvcmpeqsp 1, 34, 34
+; AIX-NEXT: xvmaxsp 2, 34, 35
+; AIX-NEXT: xxlxor 36, 36, 36
+; AIX-NEXT: lxvw4x 3, 0, 3
+; AIX-NEXT: vcmpequw 5, 2, 4
+; AIX-NEXT: xxlnor 0, 0, 0
+; AIX-NEXT: xxlnor 1, 1, 1
+; AIX-NEXT: vcmpequw 0, 3, 4
+; AIX-NEXT: xxlor 0, 1, 0
+; AIX-NEXT: xxsel 0, 2, 3, 0
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: xvcmpeqsp 2, 0, 36
+; AIX-NEXT: xxsel 1, 1, 35, 32
+; AIX-NEXT: xxsel 34, 0, 1, 2
+; AIX-NEXT: blr
+entry:
+ %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %m
+}
+
+define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
+; NOVSX-LABEL: v2f64_minimum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 3
+; NOVSX-NEXT: fmr 6, 1
+; NOVSX-NEXT: stfd 4, -16(1)
+; NOVSX-NEXT: stfd 2, -8(1)
+; NOVSX-NEXT: stfd 3, -32(1)
+; NOVSX-NEXT: stfd 1, -24(1)
+; NOVSX-NEXT: bc 12, 0, .LBB6_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 6, 3
+; NOVSX-NEXT: .LBB6_2: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
+; NOVSX-NEXT: ld 4, -24(1)
+; NOVSX-NEXT: lfs 0, .LCPI6_0 at toc@l(3)
+; NOVSX-NEXT: fmr 5, 0
+; NOVSX-NEXT: bc 12, 3, .LBB6_4
+; NOVSX-NEXT: # %bb.3: # %entry
+; NOVSX-NEXT: fmr 5, 6
+; NOVSX-NEXT: .LBB6_4: # %entry
+; NOVSX-NEXT: li 3, 1
+; NOVSX-NEXT: rldic 3, 3, 63, 0
+; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: ld 4, -32(1)
+; NOVSX-NEXT: bc 12, 2, .LBB6_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 5
+; NOVSX-NEXT: .LBB6_6: # %entry
+; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: bc 12, 2, .LBB6_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 3, 1
+; NOVSX-NEXT: .LBB6_8: # %entry
+; NOVSX-NEXT: addis 4, 2, .LCPI6_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI6_1 at toc@l(4)
+; NOVSX-NEXT: fcmpu 0, 5, 1
+; NOVSX-NEXT: bc 12, 2, .LBB6_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 3, 5
+; NOVSX-NEXT: .LBB6_10: # %entry
+; NOVSX-NEXT: fcmpu 0, 2, 4
+; NOVSX-NEXT: fmr 5, 2
+; NOVSX-NEXT: bc 12, 0, .LBB6_12
+; NOVSX-NEXT: # %bb.11: # %entry
+; NOVSX-NEXT: fmr 5, 4
+; NOVSX-NEXT: .LBB6_12: # %entry
+; NOVSX-NEXT: ld 4, -8(1)
+; NOVSX-NEXT: bc 12, 3, .LBB6_14
+; NOVSX-NEXT: # %bb.13: # %entry
+; NOVSX-NEXT: fmr 0, 5
+; NOVSX-NEXT: .LBB6_14: # %entry
+; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: ld 4, -16(1)
+; NOVSX-NEXT: bc 4, 2, .LBB6_19
+; NOVSX-NEXT: # %bb.15: # %entry
+; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: bc 4, 2, .LBB6_20
+; NOVSX-NEXT: .LBB6_16: # %entry
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB6_18
+; NOVSX-NEXT: .LBB6_17: # %entry
+; NOVSX-NEXT: fmr 4, 0
+; NOVSX-NEXT: .LBB6_18: # %entry
+; NOVSX-NEXT: fmr 1, 3
+; NOVSX-NEXT: fmr 2, 4
+; NOVSX-NEXT: blr
+; NOVSX-NEXT: .LBB6_19: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: bc 12, 2, .LBB6_16
+; NOVSX-NEXT: .LBB6_20: # %entry
+; NOVSX-NEXT: fmr 4, 2
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 4, 2, .LBB6_17
+; NOVSX-NEXT: b .LBB6_18
+;
+; VSX-LABEL: v2f64_minimum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: xvcmpeqdp 36, 35, 35
+; VSX-NEXT: addis 3, 2, .LCPI6_1 at toc@ha
+; VSX-NEXT: xvcmpeqdp 37, 34, 34
+; VSX-NEXT: addi 3, 3, .LCPI6_1 at toc@l
+; VSX-NEXT: xvmindp 0, 34, 35
+; VSX-NEXT: lxvd2x 32, 0, 3
+; VSX-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
+; VSX-NEXT: addi 3, 3, .LCPI6_0 at toc@l
+; VSX-NEXT: lxvd2x 1, 0, 3
+; VSX-NEXT: vcmpequd 1, 2, 0
+; VSX-NEXT: xxlnor 36, 36, 36
+; VSX-NEXT: xxlnor 37, 37, 37
+; VSX-NEXT: xxlor 2, 37, 36
+; VSX-NEXT: vcmpequd 4, 3, 0
+; VSX-NEXT: xxsel 0, 0, 1, 2
+; VSX-NEXT: xxlxor 1, 1, 1
+; VSX-NEXT: xxsel 2, 0, 34, 33
+; VSX-NEXT: xvcmpeqdp 34, 0, 1
+; VSX-NEXT: xxsel 1, 2, 35, 36
+; VSX-NEXT: xxsel 34, 0, 1, 34
+; VSX-NEXT: blr
+;
+; AIX-LABEL: v2f64_minimum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: xvcmpeqdp 36, 35, 35
+; AIX-NEXT: ld 3, L..C6(2) # %const.1
+; AIX-NEXT: xvcmpeqdp 37, 34, 34
+; AIX-NEXT: xvmindp 0, 34, 35
+; AIX-NEXT: lxvd2x 32, 0, 3
+; AIX-NEXT: ld 3, L..C7(2) # %const.0
+; AIX-NEXT: xxlnor 36, 36, 36
+; AIX-NEXT: lxvd2x 1, 0, 3
+; AIX-NEXT: xxlnor 37, 37, 37
+; AIX-NEXT: vcmpequd 1, 2, 0
+; AIX-NEXT: xxlor 2, 37, 36
+; AIX-NEXT: vcmpequd 4, 3, 0
+; AIX-NEXT: xxsel 0, 0, 1, 2
+; AIX-NEXT: xxlxor 1, 1, 1
+; AIX-NEXT: xxsel 2, 0, 34, 33
+; AIX-NEXT: xvcmpeqdp 34, 0, 1
+; AIX-NEXT: xxsel 1, 2, 35, 36
+; AIX-NEXT: xxsel 34, 0, 1, 34
+; AIX-NEXT: blr
+entry:
+ %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %m
+}
+
+define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
+; NOVSX-LABEL: v2f64_maximum:
+; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: fcmpu 0, 1, 3
+; NOVSX-NEXT: fmr 6, 1
+; NOVSX-NEXT: stfd 4, -16(1)
+; NOVSX-NEXT: stfd 2, -8(1)
+; NOVSX-NEXT: stfd 3, -32(1)
+; NOVSX-NEXT: stfd 1, -24(1)
+; NOVSX-NEXT: bc 12, 1, .LBB7_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr 6, 3
+; NOVSX-NEXT: .LBB7_2: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI7_0 at toc@ha
+; NOVSX-NEXT: lfs 0, .LCPI7_0 at toc@l(3)
+; NOVSX-NEXT: ld 3, -24(1)
+; NOVSX-NEXT: fmr 5, 0
+; NOVSX-NEXT: bc 12, 3, .LBB7_4
+; NOVSX-NEXT: # %bb.3: # %entry
+; NOVSX-NEXT: fmr 5, 6
+; NOVSX-NEXT: .LBB7_4: # %entry
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: ld 3, -32(1)
+; NOVSX-NEXT: bc 12, 2, .LBB7_6
+; NOVSX-NEXT: # %bb.5: # %entry
+; NOVSX-NEXT: fmr 1, 5
+; NOVSX-NEXT: .LBB7_6: # %entry
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB7_8
+; NOVSX-NEXT: # %bb.7: # %entry
+; NOVSX-NEXT: fmr 3, 1
+; NOVSX-NEXT: .LBB7_8: # %entry
+; NOVSX-NEXT: addis 3, 2, .LCPI7_1 at toc@ha
+; NOVSX-NEXT: lfs 1, .LCPI7_1 at toc@l(3)
+; NOVSX-NEXT: fcmpu 0, 5, 1
+; NOVSX-NEXT: bc 12, 2, .LBB7_10
+; NOVSX-NEXT: # %bb.9: # %entry
+; NOVSX-NEXT: fmr 3, 5
+; NOVSX-NEXT: .LBB7_10: # %entry
+; NOVSX-NEXT: fcmpu 0, 2, 4
+; NOVSX-NEXT: fmr 5, 2
+; NOVSX-NEXT: bc 12, 1, .LBB7_12
+; NOVSX-NEXT: # %bb.11: # %entry
+; NOVSX-NEXT: fmr 5, 4
+; NOVSX-NEXT: .LBB7_12: # %entry
+; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: bc 12, 3, .LBB7_14
+; NOVSX-NEXT: # %bb.13: # %entry
+; NOVSX-NEXT: fmr 0, 5
+; NOVSX-NEXT: .LBB7_14: # %entry
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: bc 4, 2, .LBB7_19
+; NOVSX-NEXT: # %bb.15: # %entry
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 4, 2, .LBB7_20
+; NOVSX-NEXT: .LBB7_16: # %entry
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 12, 2, .LBB7_18
+; NOVSX-NEXT: .LBB7_17: # %entry
+; NOVSX-NEXT: fmr 4, 0
+; NOVSX-NEXT: .LBB7_18: # %entry
+; NOVSX-NEXT: fmr 1, 3
+; NOVSX-NEXT: fmr 2, 4
+; NOVSX-NEXT: blr
+; NOVSX-NEXT: .LBB7_19: # %entry
+; NOVSX-NEXT: fmr 2, 0
+; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: bc 12, 2, .LBB7_16
+; NOVSX-NEXT: .LBB7_20: # %entry
+; NOVSX-NEXT: fmr 4, 2
+; NOVSX-NEXT: fcmpu 0, 0, 1
+; NOVSX-NEXT: bc 4, 2, .LBB7_17
+; NOVSX-NEXT: b .LBB7_18
+;
+; VSX-LABEL: v2f64_maximum:
+; VSX: # %bb.0: # %entry
+; VSX-NEXT: xvcmpeqdp 37, 35, 35
+; VSX-NEXT: addis 3, 2, .LCPI7_0 at toc@ha
+; VSX-NEXT: xvcmpeqdp 32, 34, 34
+; VSX-NEXT: addi 3, 3, .LCPI7_0 at toc@l
+; VSX-NEXT: xvmaxdp 0, 34, 35
+; VSX-NEXT: lxvd2x 1, 0, 3
+; VSX-NEXT: xxlxor 36, 36, 36
+; VSX-NEXT: vcmpequd 1, 2, 4
+; VSX-NEXT: xxlnor 37, 37, 37
+; VSX-NEXT: xxlnor 32, 32, 32
+; VSX-NEXT: xxlor 2, 32, 37
+; VSX-NEXT: vcmpequd 5, 3, 4
+; VSX-NEXT: xxsel 0, 0, 1, 2
+; VSX-NEXT: xxsel 1, 0, 34, 33
+; VSX-NEXT: xvcmpeqdp 34, 0, 36
+; VSX-NEXT: xxsel 1, 1, 35, 37
+; VSX-NEXT: xxsel 34, 0, 1, 34
+; VSX-NEXT: blr
+;
+; AIX-LABEL: v2f64_maximum:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: xvcmpeqdp 36, 35, 35
+; AIX-NEXT: ld 3, L..C8(2) # %const.0
+; AIX-NEXT: xvcmpeqdp 37, 34, 34
+; AIX-NEXT: xvmaxdp 0, 34, 35
+; AIX-NEXT: xxlxor 32, 32, 32
+; AIX-NEXT: lxvd2x 1, 0, 3
+; AIX-NEXT: vcmpequd 1, 2, 0
+; AIX-NEXT: xxlnor 36, 36, 36
+; AIX-NEXT: xxlnor 37, 37, 37
+; AIX-NEXT: xxlor 2, 37, 36
+; AIX-NEXT: vcmpequd 4, 3, 0
+; AIX-NEXT: xxsel 0, 0, 1, 2
+; AIX-NEXT: xxsel 1, 0, 34, 33
+; AIX-NEXT: xvcmpeqdp 34, 0, 32
+; AIX-NEXT: xxsel 1, 1, 35, 36
+; AIX-NEXT: xxsel 34, 0, 1, 34
+; AIX-NEXT: blr
+entry:
+ %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %m
+}
+
+declare float @llvm.maximum.f32(float, float)
+declare double @llvm.maximum.f64(double, double)
+declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+
+declare float @llvm.minimum.f32(float, float)
+declare double @llvm.minimum.f64(double, double)
+declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
>From 4b7e71bbb003c96e09ecae35d4bfde2cf73740e4 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 10 Oct 2023 13:31:26 +0800
Subject: [PATCH 2/3] Add small comments and update test
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 2 +
.../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 334 +++++++++---------
2 files changed, 169 insertions(+), 167 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f49390ac29c0d06..4a1eae5d656d9f4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8220,6 +8220,8 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType()))
return SDValue();
+ // First, implement comparison not propagating NaN. If no native fmin or fmax
+ // available, use plain select with setcc instead.
SDValue MinMax;
if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE,
VT))
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index 24fa7c716ea2951..c33875dbfee4641 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -21,13 +21,13 @@ define float @f32_minimum(float %a, float %b) {
; NOVSX-NEXT: lfs 0, .LCPI0_0 at toc@l(4)
; NOVSX-NEXT: .LBB0_4: # %entry
; NOVSX-NEXT: xoris 3, 3, 32768
+; NOVSX-NEXT: lwz 4, -8(1)
; NOVSX-NEXT: cmplwi 3, 0
-; NOVSX-NEXT: lwz 3, -8(1)
; NOVSX-NEXT: bc 12, 2, .LBB0_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: .LBB0_6: # %entry
-; NOVSX-NEXT: xoris 3, 3, 32768
+; NOVSX-NEXT: xoris 3, 4, 32768
; NOVSX-NEXT: cmplwi 3, 0
; NOVSX-NEXT: bc 12, 2, .LBB0_8
; NOVSX-NEXT: # %bb.7: # %entry
@@ -45,8 +45,8 @@ define float @f32_minimum(float %a, float %b) {
;
; VSX-LABEL: f32_minimum:
; VSX: # %bb.0: # %entry
-; VSX-NEXT: fcmpu 0, 1, 2
; VSX-NEXT: xscvdpspn 0, 1
+; VSX-NEXT: fcmpu 0, 1, 2
; VSX-NEXT: xscvdpspn 3, 2
; VSX-NEXT: mffprwz 3, 0
; VSX-NEXT: bc 12, 3, .LBB0_2
@@ -58,13 +58,13 @@ define float @f32_minimum(float %a, float %b) {
; VSX-NEXT: lfs 0, .LCPI0_0 at toc@l(4)
; VSX-NEXT: .LBB0_3: # %entry
; VSX-NEXT: xoris 3, 3, 32768
+; VSX-NEXT: mffprwz 4, 3
; VSX-NEXT: cmplwi 3, 0
-; VSX-NEXT: mffprwz 3, 3
; VSX-NEXT: bc 12, 2, .LBB0_5
; VSX-NEXT: # %bb.4: # %entry
; VSX-NEXT: fmr 1, 0
; VSX-NEXT: .LBB0_5: # %entry
-; VSX-NEXT: xoris 3, 3, 32768
+; VSX-NEXT: xoris 3, 4, 32768
; VSX-NEXT: cmplwi 3, 0
; VSX-NEXT: bc 12, 2, .LBB0_7
; VSX-NEXT: # %bb.6: # %entry
@@ -81,8 +81,8 @@ define float @f32_minimum(float %a, float %b) {
;
; AIX-LABEL: f32_minimum:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: fcmpu 0, 1, 2
; AIX-NEXT: xscvdpspn 0, 1
+; AIX-NEXT: fcmpu 0, 1, 2
; AIX-NEXT: xscvdpspn 3, 2
; AIX-NEXT: mffprwz 3, 0
; AIX-NEXT: bc 12, 3, L..BB0_2
@@ -94,13 +94,13 @@ define float @f32_minimum(float %a, float %b) {
; AIX-NEXT: lfs 0, 0(4)
; AIX-NEXT: L..BB0_3: # %entry
; AIX-NEXT: xoris 3, 3, 32768
+; AIX-NEXT: mffprwz 4, 3
; AIX-NEXT: cmplwi 3, 0
-; AIX-NEXT: mffprwz 3, 3
; AIX-NEXT: bc 12, 2, L..BB0_5
; AIX-NEXT: # %bb.4: # %entry
; AIX-NEXT: fmr 1, 0
; AIX-NEXT: L..BB0_5: # %entry
-; AIX-NEXT: xoris 3, 3, 32768
+; AIX-NEXT: xoris 3, 4, 32768
; AIX-NEXT: cmplwi 3, 0
; AIX-NEXT: bc 12, 2, L..BB0_7
; AIX-NEXT: # %bb.6: # %entry
@@ -137,12 +137,12 @@ define float @f32_maximum(float %a, float %b) {
; NOVSX-NEXT: lfs 0, .LCPI1_0 at toc@l(4)
; NOVSX-NEXT: .LBB1_4: # %entry
; NOVSX-NEXT: cmpwi 3, 0
-; NOVSX-NEXT: lwz 3, -8(1)
+; NOVSX-NEXT: lwz 4, -8(1)
; NOVSX-NEXT: bc 12, 2, .LBB1_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: .LBB1_6: # %entry
-; NOVSX-NEXT: cmpwi 3, 0
+; NOVSX-NEXT: cmpwi 4, 0
; NOVSX-NEXT: bc 12, 2, .LBB1_8
; NOVSX-NEXT: # %bb.7: # %entry
; NOVSX-NEXT: fmr 2, 1
@@ -159,8 +159,8 @@ define float @f32_maximum(float %a, float %b) {
;
; VSX-LABEL: f32_maximum:
; VSX: # %bb.0: # %entry
-; VSX-NEXT: fcmpu 0, 1, 2
; VSX-NEXT: xscvdpspn 0, 1
+; VSX-NEXT: fcmpu 0, 1, 2
; VSX-NEXT: xscvdpspn 3, 2
; VSX-NEXT: mffprwz 3, 0
; VSX-NEXT: bc 12, 3, .LBB1_2
@@ -171,13 +171,13 @@ define float @f32_maximum(float %a, float %b) {
; VSX-NEXT: addis 4, 2, .LCPI1_0 at toc@ha
; VSX-NEXT: lfs 0, .LCPI1_0 at toc@l(4)
; VSX-NEXT: .LBB1_3: # %entry
+; VSX-NEXT: mffprwz 4, 3
; VSX-NEXT: cmpwi 3, 0
-; VSX-NEXT: mffprwz 3, 3
; VSX-NEXT: bc 12, 2, .LBB1_5
; VSX-NEXT: # %bb.4: # %entry
; VSX-NEXT: fmr 1, 0
; VSX-NEXT: .LBB1_5: # %entry
-; VSX-NEXT: cmpwi 3, 0
+; VSX-NEXT: cmpwi 4, 0
; VSX-NEXT: bc 12, 2, .LBB1_7
; VSX-NEXT: # %bb.6: # %entry
; VSX-NEXT: fmr 2, 1
@@ -193,8 +193,8 @@ define float @f32_maximum(float %a, float %b) {
;
; AIX-LABEL: f32_maximum:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: fcmpu 0, 1, 2
; AIX-NEXT: xscvdpspn 0, 1
+; AIX-NEXT: fcmpu 0, 1, 2
; AIX-NEXT: xscvdpspn 3, 2
; AIX-NEXT: mffprwz 3, 0
; AIX-NEXT: bc 12, 3, L..BB1_2
@@ -205,13 +205,13 @@ define float @f32_maximum(float %a, float %b) {
; AIX-NEXT: ld 4, L..C1(2) # %const.0
; AIX-NEXT: lfs 0, 0(4)
; AIX-NEXT: L..BB1_3: # %entry
+; AIX-NEXT: mffprwz 4, 3
; AIX-NEXT: cmpwi 3, 0
-; AIX-NEXT: mffprwz 3, 3
; AIX-NEXT: bc 12, 2, L..BB1_5
; AIX-NEXT: # %bb.4: # %entry
; AIX-NEXT: fmr 1, 0
; AIX-NEXT: L..BB1_5: # %entry
-; AIX-NEXT: cmpwi 3, 0
+; AIX-NEXT: cmpwi 4, 0
; AIX-NEXT: bc 12, 2, L..BB1_7
; AIX-NEXT: # %bb.6: # %entry
; AIX-NEXT: fmr 2, 1
@@ -246,15 +246,15 @@ define double @f64_minimum(double %a, double %b) {
; NOVSX-NEXT: addis 4, 2, .LCPI2_0 at toc@ha
; NOVSX-NEXT: lfs 0, .LCPI2_0 at toc@l(4)
; NOVSX-NEXT: .LBB2_4: # %entry
-; NOVSX-NEXT: li 4, 1
-; NOVSX-NEXT: rldic 4, 4, 63, 0
-; NOVSX-NEXT: cmpd 3, 4
-; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: li 5, 1
+; NOVSX-NEXT: ld 4, -16(1)
+; NOVSX-NEXT: rldic 5, 5, 63, 0
+; NOVSX-NEXT: cmpd 3, 5
; NOVSX-NEXT: bc 12, 2, .LBB2_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: .LBB2_6: # %entry
-; NOVSX-NEXT: cmpd 3, 4
+; NOVSX-NEXT: cmpd 4, 5
; NOVSX-NEXT: bc 12, 2, .LBB2_8
; NOVSX-NEXT: # %bb.7: # %entry
; NOVSX-NEXT: fmr 2, 1
@@ -281,15 +281,15 @@ define double @f64_minimum(double %a, double %b) {
; VSX-NEXT: addis 4, 2, .LCPI2_0 at toc@ha
; VSX-NEXT: lfs 0, .LCPI2_0 at toc@l(4)
; VSX-NEXT: .LBB2_3: # %entry
-; VSX-NEXT: li 4, 1
-; VSX-NEXT: rldic 4, 4, 63, 0
-; VSX-NEXT: cmpd 3, 4
-; VSX-NEXT: mffprd 3, 2
+; VSX-NEXT: li 5, 1
+; VSX-NEXT: mffprd 4, 2
+; VSX-NEXT: rldic 5, 5, 63, 0
+; VSX-NEXT: cmpd 3, 5
; VSX-NEXT: bc 12, 2, .LBB2_5
; VSX-NEXT: # %bb.4: # %entry
; VSX-NEXT: fmr 1, 0
; VSX-NEXT: .LBB2_5: # %entry
-; VSX-NEXT: cmpd 3, 4
+; VSX-NEXT: cmpd 4, 5
; VSX-NEXT: bc 12, 2, .LBB2_7
; VSX-NEXT: # %bb.6: # %entry
; VSX-NEXT: fmr 2, 1
@@ -315,15 +315,15 @@ define double @f64_minimum(double %a, double %b) {
; AIX-NEXT: ld 4, L..C2(2) # %const.0
; AIX-NEXT: lfs 0, 0(4)
; AIX-NEXT: L..BB2_3: # %entry
-; AIX-NEXT: li 4, 1
-; AIX-NEXT: rldic 4, 4, 63, 0
-; AIX-NEXT: cmpd 3, 4
-; AIX-NEXT: mffprd 3, 2
+; AIX-NEXT: li 5, 1
+; AIX-NEXT: mffprd 4, 2
+; AIX-NEXT: rldic 5, 5, 63, 0
+; AIX-NEXT: cmpd 3, 5
; AIX-NEXT: bc 12, 2, L..BB2_5
; AIX-NEXT: # %bb.4: # %entry
; AIX-NEXT: fmr 1, 0
; AIX-NEXT: L..BB2_5: # %entry
-; AIX-NEXT: cmpd 3, 4
+; AIX-NEXT: cmpd 4, 5
; AIX-NEXT: bc 12, 2, L..BB2_7
; AIX-NEXT: # %bb.6: # %entry
; AIX-NEXT: fmr 2, 1
@@ -359,12 +359,12 @@ define double @f64_maximum(double %a, double %b) {
; NOVSX-NEXT: lfs 0, .LCPI3_0 at toc@l(4)
; NOVSX-NEXT: .LBB3_4: # %entry
; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: ld 3, -16(1)
+; NOVSX-NEXT: ld 4, -16(1)
; NOVSX-NEXT: bc 12, 2, .LBB3_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 1, 0
; NOVSX-NEXT: .LBB3_6: # %entry
-; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: cmpdi 4, 0
; NOVSX-NEXT: bc 12, 2, .LBB3_8
; NOVSX-NEXT: # %bb.7: # %entry
; NOVSX-NEXT: fmr 2, 1
@@ -391,13 +391,13 @@ define double @f64_maximum(double %a, double %b) {
; VSX-NEXT: addis 4, 2, .LCPI3_0 at toc@ha
; VSX-NEXT: lfs 0, .LCPI3_0 at toc@l(4)
; VSX-NEXT: .LBB3_3: # %entry
+; VSX-NEXT: mffprd 4, 2
; VSX-NEXT: cmpdi 3, 0
-; VSX-NEXT: mffprd 3, 2
; VSX-NEXT: bc 12, 2, .LBB3_5
; VSX-NEXT: # %bb.4: # %entry
; VSX-NEXT: fmr 1, 0
; VSX-NEXT: .LBB3_5: # %entry
-; VSX-NEXT: cmpdi 3, 0
+; VSX-NEXT: cmpdi 4, 0
; VSX-NEXT: bc 12, 2, .LBB3_7
; VSX-NEXT: # %bb.6: # %entry
; VSX-NEXT: fmr 2, 1
@@ -423,13 +423,13 @@ define double @f64_maximum(double %a, double %b) {
; AIX-NEXT: ld 4, L..C3(2) # %const.0
; AIX-NEXT: lfs 0, 0(4)
; AIX-NEXT: L..BB3_3: # %entry
+; AIX-NEXT: mffprd 4, 2
; AIX-NEXT: cmpdi 3, 0
-; AIX-NEXT: mffprd 3, 2
; AIX-NEXT: bc 12, 2, L..BB3_5
; AIX-NEXT: # %bb.4: # %entry
; AIX-NEXT: fmr 1, 0
; AIX-NEXT: L..BB3_5: # %entry
-; AIX-NEXT: cmpdi 3, 0
+; AIX-NEXT: cmpdi 4, 0
; AIX-NEXT: bc 12, 2, L..BB3_7
; AIX-NEXT: # %bb.6: # %entry
; AIX-NEXT: fmr 2, 1
@@ -450,71 +450,71 @@ entry:
define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
; NOVSX-LABEL: v4f32_minimum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: vcmpeqfp 5, 3, 3
-; NOVSX-NEXT: vspltisb 4, -1
+; NOVSX-NEXT: vcmpeqfp 0, 3, 3
+; NOVSX-NEXT: vcmpeqfp 1, 2, 2
; NOVSX-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
-; NOVSX-NEXT: vcmpeqfp 0, 2, 2
; NOVSX-NEXT: addi 3, 3, .LCPI4_0 at toc@l
-; NOVSX-NEXT: vcmpgtfp 1, 3, 2
-; NOVSX-NEXT: vslw 4, 4, 4
-; NOVSX-NEXT: vnot 5, 5
; NOVSX-NEXT: vnot 0, 0
-; NOVSX-NEXT: vsel 1, 3, 2, 1
-; NOVSX-NEXT: vor 5, 0, 5
-; NOVSX-NEXT: lvx 0, 0, 3
-; NOVSX-NEXT: vsel 5, 1, 0, 5
+; NOVSX-NEXT: vnot 1, 1
+; NOVSX-NEXT: vspltisb 4, -1
+; NOVSX-NEXT: vcmpgtfp 5, 3, 2
+; NOVSX-NEXT: vslw 4, 4, 4
+; NOVSX-NEXT: vor 0, 1, 0
+; NOVSX-NEXT: lvx 1, 0, 3
+; NOVSX-NEXT: vsel 5, 3, 2, 5
+; NOVSX-NEXT: vsel 5, 5, 1, 0
; NOVSX-NEXT: vcmpequw 0, 2, 4
; NOVSX-NEXT: vcmpequw 4, 3, 4
; NOVSX-NEXT: vsel 2, 5, 2, 0
-; NOVSX-NEXT: vxor 0, 0, 0
; NOVSX-NEXT: vsel 2, 2, 3, 4
-; NOVSX-NEXT: vcmpeqfp 3, 5, 0
+; NOVSX-NEXT: vxor 3, 3, 3
+; NOVSX-NEXT: vcmpeqfp 3, 5, 3
; NOVSX-NEXT: vsel 2, 5, 2, 3
; NOVSX-NEXT: blr
;
; VSX-LABEL: v4f32_minimum:
; VSX: # %bb.0: # %entry
-; VSX-NEXT: xxleqv 36, 36, 36
+; VSX-NEXT: xvcmpeqsp 1, 35, 35
+; VSX-NEXT: xvcmpeqsp 2, 34, 34
; VSX-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
-; VSX-NEXT: xvcmpeqsp 0, 35, 35
-; VSX-NEXT: addi 3, 3, .LCPI4_0 at toc@l
-; VSX-NEXT: xvcmpeqsp 1, 34, 34
-; VSX-NEXT: lxvd2x 3, 0, 3
+; VSX-NEXT: xxleqv 36, 36, 36
+; VSX-NEXT: xvminsp 0, 34, 35
; VSX-NEXT: vslw 4, 4, 4
-; VSX-NEXT: xvminsp 2, 34, 35
-; VSX-NEXT: xxlnor 0, 0, 0
+; VSX-NEXT: addi 3, 3, .LCPI4_0 at toc@l
; VSX-NEXT: xxlnor 1, 1, 1
+; VSX-NEXT: xxlnor 2, 2, 2
; VSX-NEXT: vcmpequw 5, 2, 4
-; VSX-NEXT: xxlor 0, 1, 0
-; VSX-NEXT: vcmpequw 4, 3, 4
-; VSX-NEXT: xxsel 0, 2, 3, 0
-; VSX-NEXT: xxlxor 1, 1, 1
-; VSX-NEXT: xxsel 2, 0, 34, 37
-; VSX-NEXT: xvcmpeqsp 1, 0, 1
-; VSX-NEXT: xxsel 2, 2, 35, 36
-; VSX-NEXT: xxsel 34, 0, 2, 1
+; VSX-NEXT: xxlor 1, 2, 1
+; VSX-NEXT: lxvd2x 2, 0, 3
+; VSX-NEXT: xxsel 0, 0, 2, 1
+; VSX-NEXT: xxlxor 2, 2, 2
+; VSX-NEXT: xvcmpeqsp 2, 0, 2
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequw 2, 3, 4
+; VSX-NEXT: xxsel 1, 1, 35, 34
+; VSX-NEXT: xxsel 34, 0, 1, 2
; VSX-NEXT: blr
;
; AIX-LABEL: v4f32_minimum:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: xxleqv 36, 36, 36
+; AIX-NEXT: xvcmpeqsp 1, 35, 35
+; AIX-NEXT: xvcmpeqsp 2, 34, 34
; AIX-NEXT: ld 3, L..C4(2) # %const.0
-; AIX-NEXT: xvcmpeqsp 0, 35, 35
-; AIX-NEXT: xvcmpeqsp 1, 34, 34
+; AIX-NEXT: xxleqv 36, 36, 36
+; AIX-NEXT: xvminsp 0, 34, 35
; AIX-NEXT: vslw 4, 4, 4
-; AIX-NEXT: lxvw4x 3, 0, 3
-; AIX-NEXT: xvminsp 2, 34, 35
-; AIX-NEXT: xxlnor 0, 0, 0
; AIX-NEXT: xxlnor 1, 1, 1
+; AIX-NEXT: xxlnor 2, 2, 2
; AIX-NEXT: vcmpequw 5, 2, 4
-; AIX-NEXT: xxlor 0, 1, 0
-; AIX-NEXT: vcmpequw 4, 3, 4
-; AIX-NEXT: xxsel 0, 2, 3, 0
-; AIX-NEXT: xxlxor 1, 1, 1
-; AIX-NEXT: xxsel 2, 0, 34, 37
-; AIX-NEXT: xvcmpeqsp 1, 0, 1
-; AIX-NEXT: xxsel 2, 2, 35, 36
-; AIX-NEXT: xxsel 34, 0, 2, 1
+; AIX-NEXT: xxlor 1, 2, 1
+; AIX-NEXT: lxvw4x 2, 0, 3
+; AIX-NEXT: xxsel 0, 0, 2, 1
+; AIX-NEXT: xxlxor 2, 2, 2
+; AIX-NEXT: xvcmpeqsp 2, 0, 2
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequw 2, 3, 4
+; AIX-NEXT: xxsel 1, 1, 35, 34
+; AIX-NEXT: xxsel 34, 0, 1, 2
; AIX-NEXT: blr
entry:
%m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -524,18 +524,18 @@ entry:
define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
; NOVSX-LABEL: v4f32_maximum:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: vcmpeqfp 4, 3, 3
+; NOVSX-NEXT: vcmpeqfp 5, 3, 3
+; NOVSX-NEXT: vcmpeqfp 0, 2, 2
; NOVSX-NEXT: addis 3, 2, .LCPI5_0 at toc@ha
-; NOVSX-NEXT: vcmpeqfp 5, 2, 2
; NOVSX-NEXT: addi 3, 3, .LCPI5_0 at toc@l
-; NOVSX-NEXT: vcmpgtfp 0, 2, 3
-; NOVSX-NEXT: lvx 1, 0, 3
-; NOVSX-NEXT: vnot 4, 4
; NOVSX-NEXT: vnot 5, 5
-; NOVSX-NEXT: vsel 0, 3, 2, 0
-; NOVSX-NEXT: vor 4, 5, 4
+; NOVSX-NEXT: vnot 0, 0
+; NOVSX-NEXT: vcmpgtfp 4, 2, 3
+; NOVSX-NEXT: vor 5, 0, 5
+; NOVSX-NEXT: lvx 0, 0, 3
+; NOVSX-NEXT: vsel 4, 3, 2, 4
+; NOVSX-NEXT: vsel 4, 4, 0, 5
; NOVSX-NEXT: vxor 5, 5, 5
-; NOVSX-NEXT: vsel 4, 0, 1, 4
; NOVSX-NEXT: vcmpequw 0, 2, 5
; NOVSX-NEXT: vsel 2, 4, 2, 0
; NOVSX-NEXT: vcmpequw 0, 3, 5
@@ -546,42 +546,42 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
;
; VSX-LABEL: v4f32_maximum:
; VSX: # %bb.0: # %entry
-; VSX-NEXT: xvcmpeqsp 0, 35, 35
+; VSX-NEXT: xvcmpeqsp 1, 35, 35
+; VSX-NEXT: xvcmpeqsp 2, 34, 34
; VSX-NEXT: addis 3, 2, .LCPI5_0 at toc@ha
-; VSX-NEXT: xvcmpeqsp 1, 34, 34
; VSX-NEXT: addi 3, 3, .LCPI5_0 at toc@l
-; VSX-NEXT: xvmaxsp 2, 34, 35
-; VSX-NEXT: lxvd2x 3, 0, 3
+; VSX-NEXT: xxlnor 1, 1, 1
+; VSX-NEXT: xxlnor 2, 2, 2
+; VSX-NEXT: xvmaxsp 0, 34, 35
; VSX-NEXT: xxlxor 36, 36, 36
; VSX-NEXT: vcmpequw 5, 2, 4
-; VSX-NEXT: xxlnor 0, 0, 0
-; VSX-NEXT: xxlnor 1, 1, 1
-; VSX-NEXT: vcmpequw 0, 3, 4
-; VSX-NEXT: xxlor 0, 1, 0
-; VSX-NEXT: xxsel 0, 2, 3, 0
-; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: xxlor 1, 2, 1
+; VSX-NEXT: lxvd2x 2, 0, 3
+; VSX-NEXT: xxsel 0, 0, 2, 1
; VSX-NEXT: xvcmpeqsp 2, 0, 36
-; VSX-NEXT: xxsel 1, 1, 35, 32
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequw 2, 3, 4
+; VSX-NEXT: xxsel 1, 1, 35, 34
; VSX-NEXT: xxsel 34, 0, 1, 2
; VSX-NEXT: blr
;
; AIX-LABEL: v4f32_maximum:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: xvcmpeqsp 0, 35, 35
+; AIX-NEXT: xvcmpeqsp 1, 35, 35
+; AIX-NEXT: xvcmpeqsp 2, 34, 34
; AIX-NEXT: ld 3, L..C5(2) # %const.0
-; AIX-NEXT: xvcmpeqsp 1, 34, 34
-; AIX-NEXT: xvmaxsp 2, 34, 35
+; AIX-NEXT: xvmaxsp 0, 34, 35
; AIX-NEXT: xxlxor 36, 36, 36
-; AIX-NEXT: lxvw4x 3, 0, 3
-; AIX-NEXT: vcmpequw 5, 2, 4
-; AIX-NEXT: xxlnor 0, 0, 0
; AIX-NEXT: xxlnor 1, 1, 1
-; AIX-NEXT: vcmpequw 0, 3, 4
-; AIX-NEXT: xxlor 0, 1, 0
-; AIX-NEXT: xxsel 0, 2, 3, 0
-; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: xxlnor 2, 2, 2
+; AIX-NEXT: vcmpequw 5, 2, 4
+; AIX-NEXT: xxlor 1, 2, 1
+; AIX-NEXT: lxvw4x 2, 0, 3
+; AIX-NEXT: xxsel 0, 0, 2, 1
; AIX-NEXT: xvcmpeqsp 2, 0, 36
-; AIX-NEXT: xxsel 1, 1, 35, 32
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequw 2, 3, 4
+; AIX-NEXT: xxsel 1, 1, 35, 34
; AIX-NEXT: xxsel 34, 0, 1, 2
; AIX-NEXT: blr
entry:
@@ -611,14 +611,14 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
; NOVSX-NEXT: fmr 5, 6
; NOVSX-NEXT: .LBB6_4: # %entry
; NOVSX-NEXT: li 3, 1
+; NOVSX-NEXT: ld 5, -32(1)
; NOVSX-NEXT: rldic 3, 3, 63, 0
; NOVSX-NEXT: cmpd 4, 3
-; NOVSX-NEXT: ld 4, -32(1)
; NOVSX-NEXT: bc 12, 2, .LBB6_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 1, 5
; NOVSX-NEXT: .LBB6_6: # %entry
-; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: cmpd 5, 3
; NOVSX-NEXT: bc 12, 2, .LBB6_8
; NOVSX-NEXT: # %bb.7: # %entry
; NOVSX-NEXT: fmr 3, 1
@@ -636,12 +636,12 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
; NOVSX-NEXT: # %bb.11: # %entry
; NOVSX-NEXT: fmr 5, 4
; NOVSX-NEXT: .LBB6_12: # %entry
-; NOVSX-NEXT: ld 4, -8(1)
+; NOVSX-NEXT: ld 5, -8(1)
; NOVSX-NEXT: bc 12, 3, .LBB6_14
; NOVSX-NEXT: # %bb.13: # %entry
; NOVSX-NEXT: fmr 0, 5
; NOVSX-NEXT: .LBB6_14: # %entry
-; NOVSX-NEXT: cmpd 4, 3
+; NOVSX-NEXT: cmpd 5, 3
; NOVSX-NEXT: ld 4, -16(1)
; NOVSX-NEXT: bc 4, 2, .LBB6_19
; NOVSX-NEXT: # %bb.15: # %entry
@@ -668,47 +668,47 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
;
; VSX-LABEL: v2f64_minimum:
; VSX: # %bb.0: # %entry
+; VSX-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
; VSX-NEXT: xvcmpeqdp 36, 35, 35
-; VSX-NEXT: addis 3, 2, .LCPI6_1 at toc@ha
; VSX-NEXT: xvcmpeqdp 37, 34, 34
-; VSX-NEXT: addi 3, 3, .LCPI6_1 at toc@l
-; VSX-NEXT: xvmindp 0, 34, 35
-; VSX-NEXT: lxvd2x 32, 0, 3
-; VSX-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
; VSX-NEXT: addi 3, 3, .LCPI6_0 at toc@l
-; VSX-NEXT: lxvd2x 1, 0, 3
-; VSX-NEXT: vcmpequd 1, 2, 0
; VSX-NEXT: xxlnor 36, 36, 36
; VSX-NEXT: xxlnor 37, 37, 37
-; VSX-NEXT: xxlor 2, 37, 36
-; VSX-NEXT: vcmpequd 4, 3, 0
-; VSX-NEXT: xxsel 0, 0, 1, 2
-; VSX-NEXT: xxlxor 1, 1, 1
-; VSX-NEXT: xxsel 2, 0, 34, 33
-; VSX-NEXT: xvcmpeqdp 34, 0, 1
-; VSX-NEXT: xxsel 1, 2, 35, 36
+; VSX-NEXT: xvmindp 0, 34, 35
+; VSX-NEXT: lxvd2x 2, 0, 3
+; VSX-NEXT: addis 3, 2, .LCPI6_1 at toc@ha
+; VSX-NEXT: xxlor 1, 37, 36
+; VSX-NEXT: addi 3, 3, .LCPI6_1 at toc@l
+; VSX-NEXT: lxvd2x 36, 0, 3
+; VSX-NEXT: vcmpequd 5, 2, 4
+; VSX-NEXT: xxsel 0, 0, 2, 1
+; VSX-NEXT: xxlxor 2, 2, 2
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequd 2, 3, 4
+; VSX-NEXT: xxsel 1, 1, 35, 34
+; VSX-NEXT: xvcmpeqdp 34, 0, 2
; VSX-NEXT: xxsel 34, 0, 1, 34
; VSX-NEXT: blr
;
; AIX-LABEL: v2f64_minimum:
; AIX: # %bb.0: # %entry
+; AIX-NEXT: ld 3, L..C6(2) # %const.0
; AIX-NEXT: xvcmpeqdp 36, 35, 35
-; AIX-NEXT: ld 3, L..C6(2) # %const.1
; AIX-NEXT: xvcmpeqdp 37, 34, 34
-; AIX-NEXT: xvmindp 0, 34, 35
-; AIX-NEXT: lxvd2x 32, 0, 3
-; AIX-NEXT: ld 3, L..C7(2) # %const.0
+; AIX-NEXT: lxvd2x 2, 0, 3
+; AIX-NEXT: ld 3, L..C7(2) # %const.1
; AIX-NEXT: xxlnor 36, 36, 36
-; AIX-NEXT: lxvd2x 1, 0, 3
; AIX-NEXT: xxlnor 37, 37, 37
-; AIX-NEXT: vcmpequd 1, 2, 0
-; AIX-NEXT: xxlor 2, 37, 36
-; AIX-NEXT: vcmpequd 4, 3, 0
-; AIX-NEXT: xxsel 0, 0, 1, 2
-; AIX-NEXT: xxlxor 1, 1, 1
-; AIX-NEXT: xxsel 2, 0, 34, 33
-; AIX-NEXT: xvcmpeqdp 34, 0, 1
-; AIX-NEXT: xxsel 1, 2, 35, 36
+; AIX-NEXT: xvmindp 0, 34, 35
+; AIX-NEXT: xxlor 1, 37, 36
+; AIX-NEXT: lxvd2x 36, 0, 3
+; AIX-NEXT: vcmpequd 5, 2, 4
+; AIX-NEXT: xxsel 0, 0, 2, 1
+; AIX-NEXT: xxlxor 2, 2, 2
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequd 2, 3, 4
+; AIX-NEXT: xxsel 1, 1, 35, 34
+; AIX-NEXT: xvcmpeqdp 34, 0, 2
; AIX-NEXT: xxsel 34, 0, 1, 34
; AIX-NEXT: blr
entry:
@@ -729,21 +729,21 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
; NOVSX-NEXT: # %bb.1: # %entry
; NOVSX-NEXT: fmr 6, 3
; NOVSX-NEXT: .LBB7_2: # %entry
-; NOVSX-NEXT: addis 3, 2, .LCPI7_0 at toc@ha
-; NOVSX-NEXT: lfs 0, .LCPI7_0 at toc@l(3)
+; NOVSX-NEXT: addis 4, 2, .LCPI7_0 at toc@ha
; NOVSX-NEXT: ld 3, -24(1)
+; NOVSX-NEXT: lfs 0, .LCPI7_0 at toc@l(4)
; NOVSX-NEXT: fmr 5, 0
; NOVSX-NEXT: bc 12, 3, .LBB7_4
; NOVSX-NEXT: # %bb.3: # %entry
; NOVSX-NEXT: fmr 5, 6
; NOVSX-NEXT: .LBB7_4: # %entry
; NOVSX-NEXT: cmpdi 3, 0
-; NOVSX-NEXT: ld 3, -32(1)
+; NOVSX-NEXT: ld 4, -32(1)
; NOVSX-NEXT: bc 12, 2, .LBB7_6
; NOVSX-NEXT: # %bb.5: # %entry
; NOVSX-NEXT: fmr 1, 5
; NOVSX-NEXT: .LBB7_6: # %entry
-; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: cmpdi 4, 0
; NOVSX-NEXT: bc 12, 2, .LBB7_8
; NOVSX-NEXT: # %bb.7: # %entry
; NOVSX-NEXT: fmr 3, 1
@@ -761,12 +761,12 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
; NOVSX-NEXT: # %bb.11: # %entry
; NOVSX-NEXT: fmr 5, 4
; NOVSX-NEXT: .LBB7_12: # %entry
-; NOVSX-NEXT: ld 3, -8(1)
+; NOVSX-NEXT: ld 4, -8(1)
; NOVSX-NEXT: bc 12, 3, .LBB7_14
; NOVSX-NEXT: # %bb.13: # %entry
; NOVSX-NEXT: fmr 0, 5
; NOVSX-NEXT: .LBB7_14: # %entry
-; NOVSX-NEXT: cmpdi 3, 0
+; NOVSX-NEXT: cmpdi 4, 0
; NOVSX-NEXT: ld 3, -16(1)
; NOVSX-NEXT: bc 4, 2, .LBB7_19
; NOVSX-NEXT: # %bb.15: # %entry
@@ -793,42 +793,42 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
;
; VSX-LABEL: v2f64_maximum:
; VSX: # %bb.0: # %entry
-; VSX-NEXT: xvcmpeqdp 37, 35, 35
; VSX-NEXT: addis 3, 2, .LCPI7_0 at toc@ha
-; VSX-NEXT: xvcmpeqdp 32, 34, 34
+; VSX-NEXT: xvcmpeqdp 36, 35, 35
+; VSX-NEXT: xvcmpeqdp 37, 34, 34
; VSX-NEXT: addi 3, 3, .LCPI7_0 at toc@l
+; VSX-NEXT: xxlnor 36, 36, 36
+; VSX-NEXT: xxlnor 37, 37, 37
; VSX-NEXT: xvmaxdp 0, 34, 35
-; VSX-NEXT: lxvd2x 1, 0, 3
+; VSX-NEXT: lxvd2x 2, 0, 3
+; VSX-NEXT: xxlor 1, 37, 36
; VSX-NEXT: xxlxor 36, 36, 36
-; VSX-NEXT: vcmpequd 1, 2, 4
-; VSX-NEXT: xxlnor 37, 37, 37
-; VSX-NEXT: xxlnor 32, 32, 32
-; VSX-NEXT: xxlor 2, 32, 37
-; VSX-NEXT: vcmpequd 5, 3, 4
-; VSX-NEXT: xxsel 0, 0, 1, 2
-; VSX-NEXT: xxsel 1, 0, 34, 33
+; VSX-NEXT: vcmpequd 5, 2, 4
+; VSX-NEXT: xxsel 0, 0, 2, 1
+; VSX-NEXT: xxsel 1, 0, 34, 37
+; VSX-NEXT: vcmpequd 2, 3, 4
+; VSX-NEXT: xxsel 1, 1, 35, 34
; VSX-NEXT: xvcmpeqdp 34, 0, 36
-; VSX-NEXT: xxsel 1, 1, 35, 37
; VSX-NEXT: xxsel 34, 0, 1, 34
; VSX-NEXT: blr
;
; AIX-LABEL: v2f64_maximum:
; AIX: # %bb.0: # %entry
-; AIX-NEXT: xvcmpeqdp 36, 35, 35
; AIX-NEXT: ld 3, L..C8(2) # %const.0
+; AIX-NEXT: xvcmpeqdp 36, 35, 35
; AIX-NEXT: xvcmpeqdp 37, 34, 34
-; AIX-NEXT: xvmaxdp 0, 34, 35
-; AIX-NEXT: xxlxor 32, 32, 32
-; AIX-NEXT: lxvd2x 1, 0, 3
-; AIX-NEXT: vcmpequd 1, 2, 0
+; AIX-NEXT: lxvd2x 2, 0, 3
; AIX-NEXT: xxlnor 36, 36, 36
; AIX-NEXT: xxlnor 37, 37, 37
-; AIX-NEXT: xxlor 2, 37, 36
-; AIX-NEXT: vcmpequd 4, 3, 0
-; AIX-NEXT: xxsel 0, 0, 1, 2
-; AIX-NEXT: xxsel 1, 0, 34, 33
-; AIX-NEXT: xvcmpeqdp 34, 0, 32
-; AIX-NEXT: xxsel 1, 1, 35, 36
+; AIX-NEXT: xvmaxdp 0, 34, 35
+; AIX-NEXT: xxlor 1, 37, 36
+; AIX-NEXT: xxlxor 36, 36, 36
+; AIX-NEXT: vcmpequd 5, 2, 4
+; AIX-NEXT: xxsel 0, 0, 2, 1
+; AIX-NEXT: xxsel 1, 0, 34, 37
+; AIX-NEXT: vcmpequd 2, 3, 4
+; AIX-NEXT: xxsel 1, 1, 35, 34
+; AIX-NEXT: xvcmpeqdp 34, 0, 36
; AIX-NEXT: xxsel 34, 0, 1, 34
; AIX-NEXT: blr
entry:
>From 1a5aa06efab33fc6f614729b727950d0d0b6f9ff Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Mon, 6 Nov 2023 17:22:41 +0800
Subject: [PATCH 3/3] Address comments
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 26 +++++++++----------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 327d80610308102..acba34f4ba15a69 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8270,11 +8270,6 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
unsigned Opc = N->getOpcode();
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- bool NoNaN = (N->getFlags().hasNoNaNs() ||
- (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)));
- bool NoZeroSign =
- (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
- DAG.isKnownNeverZeroFloat(RHS));
bool IsMax = Opc == ISD::FMAXIMUM;
if (VT.isVector() &&
@@ -8285,19 +8280,21 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
// available, use plain select with setcc instead.
SDValue MinMax;
if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE,
- VT))
+ VT)) {
MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT,
LHS, RHS);
- else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, VT))
+ } else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM,
+ VT)) {
MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS);
- else
- MinMax = DAG.getSelect(
- DL, VT,
- DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT), LHS,
- RHS);
+ } else {
+ SDValue Compare =
+ DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
+ MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS);
+ }
// Propagate any NaN of both operands
- if (!NoNaN) {
+ if (!N->getFlags().hasNoNaNs() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
ConstantFP *FPNaN = ConstantFP::get(
*DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT)));
MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
@@ -8305,7 +8302,8 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
}
// fminimum/fmaximum requires -0.0 less than +0.0
- if (!NoZeroSign) {
+ if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(LHS) &&
+ !DAG.isKnownNeverZeroFloat(RHS)) {
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
SDValue TestZero =
More information about the flang-commits
mailing list