[llvm] 94f6b6d - [SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM} (#128800)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 07:13:34 PST 2025
Author: Jim Lin
Date: 2025-02-28T23:13:30+08:00
New Revision: 94f6b6d5389cc53a585e55ef3a7e4173c89ae05b
URL: https://github.com/llvm/llvm-project/commit/94f6b6d5389cc53a585e55ef3a7e4173c89ae05b
DIFF: https://github.com/llvm/llvm-project/commit/94f6b6d5389cc53a585e55ef3a7e4173c89ae05b.diff
LOG: [SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM} (#128800)
This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while.
Added:
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f56097fdbb51a..2b8818482a333 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
}
SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
- MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode());
+ MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType()
+ : Node->getOperand(0).getSimpleValueType();
MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
MVT ScalarVT = Node->getSimpleValueType(0);
MVT NewScalarVT = NewVecVT.getVectorElementType();
@@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
SDLoc DL(Node);
SmallVector<SDValue, 4> Operands(Node->getNumOperands());
- // promote the initial value.
// FIXME: Support integer.
assert(Node->getOperand(0).getValueType().isFloatingPoint() &&
"Only FP promotion is supported");
- Operands[0] =
- DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
- for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j)
if (Node->getOperand(j).getValueType().isVector() &&
- !(ISD::isVPOpcode(Node->getOpcode()) &&
+ !(IsVPOpcode &&
ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand.
// promote the vector operand.
// FIXME: Support integer.
@@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
"Only FP promotion is supported");
Operands[j] =
DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ } else if (Node->getOperand(j).getValueType().isFloatingPoint()) {
+ // promote the initial value.
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j));
} else {
Operands[j] = Node->getOperand(j); // Skip VL operand.
}
@@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::SINT_TO_FP ||
Node->getOpcode() == ISD::SETCC ||
Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::VECREDUCE_FMAX ||
+ Node->getOpcode() == ISD::VECREDUCE_FMIN ||
+ Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM ||
+ Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::ATOMIC_STORE ||
@@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
}
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAXIMUM:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index de4447fb0cf1a..27bde7b96c857 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
+ case ISD::VECTOR_FIND_LAST_ACTIVE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
- case ISD::VECTOR_FIND_LAST_ACTIVE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
+ // Defer non-vector results to LegalizeDAG.
+ if (Action == TargetLowering::Promote)
+ Action = TargetLowering::Legal;
break;
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_SEQ_FMUL:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6076fe56416ad..759cf531b74b1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminZvfbfminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
- ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
- ISD::STRICT_FMA};
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FMUL,
+ ISD::FMA,
+ ISD::FDIV,
+ ISD::FSQRT,
+ ISD::FCEIL,
+ ISD::FTRUNC,
+ ISD::FFLOOR,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FRINT,
+ ISD::FNEARBYINT,
+ ISD::IS_FPCLASS,
+ ISD::SETCC,
+ ISD::FMAXIMUM,
+ ISD::FMINIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA,
+ ISD::VECREDUCE_FMIN,
+ ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_FMAXIMUM};
// TODO: support more vp ops.
static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll
new file mode 100644
index 0000000000000..1c42cd29deca9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+define bfloat @vreduce_fmin_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fmin_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vector.reduce.fmin.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fmax_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fmax_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vector.reduce.fmax.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fmin_nnan_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fmin_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fmax_nnan_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fmax_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fminimum_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fminimum_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: beqz a0, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: vfredmin.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vector.reduce.fminimum.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fmaximum_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fmaximum_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10
+; CHECK-NEXT: vcpop.m a0, v8
+; CHECK-NEXT: beqz a0, .LBB5_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: vfredmax.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fminimum_nnan_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fminimum_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
+define bfloat @vreduce_fmaximum_nnan_nxv4bf16(<vscale x 4 x bfloat> %val) {
+; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v10
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4bf16(<vscale x 4 x bfloat> %val)
+ ret bfloat %s
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
new file mode 100644
index 0000000000000..e269b13137d44
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+
+define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmin_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmax_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmin_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmax_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fminimum_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fminimum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8
+; ZVFH-NEXT: vcpop.m a0, v9
+; ZVFH-NEXT: beqz a0, .LBB4_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB4_2:
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
+; ZVFHMIN-NEXT: vcpop.m a0, v8
+; ZVFHMIN-NEXT: beqz a0, .LBB4_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB4_2:
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmaximum_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmaximum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8
+; ZVFH-NEXT: vcpop.m a0, v9
+; ZVFH-NEXT: beqz a0, .LBB5_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB5_2:
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
+; ZVFHMIN-NEXT: vcpop.m a0, v8
+; ZVFHMIN-NEXT: beqz a0, .LBB5_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB5_2:
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fminimum_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmaximum_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll
new file mode 100644
index 0000000000000..37bd0a0496dcf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll
@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+define bfloat @vpreduce_fmin_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fmin_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fmax_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fmax_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fmin_nnan_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fmin_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fmax_nnan_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fmax_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fminimum_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fminimum_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
+; CHECK-NEXT: feq.s a1, fa5, fa5
+; CHECK-NEXT: vcpop.m a2, v8, v0.t
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: beqz a1, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fmaximum_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fmaximum_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
+; CHECK-NEXT: feq.s a1, fa5, fa5
+; CHECK-NEXT: vcpop.m a2, v8, v0.t
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: beqz a1, .LBB5_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fminimum_nnan_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
+
+define bfloat @vpreduce_fmaximum_nnan_nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
new file mode 100644
index 0000000000000..8993bf8a767d8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+
+define half @vpreduce_fmin_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmin_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmax_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmax_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmin_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmax_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fminimum_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fminimum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t
+; ZVFH-NEXT: fcvt.s.h fa5, fa0
+; ZVFH-NEXT: vcpop.m a1, v9, v0.t
+; ZVFH-NEXT: feq.s a2, fa5, fa5
+; ZVFH-NEXT: xori a2, a2, 1
+; ZVFH-NEXT: or a1, a1, a2
+; ZVFH-NEXT: beqz a1, .LBB4_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB4_2:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT: feq.s a1, fa5, fa5
+; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t
+; ZVFHMIN-NEXT: xori a1, a1, 1
+; ZVFHMIN-NEXT: or a1, a2, a1
+; ZVFHMIN-NEXT: beqz a1, .LBB4_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB4_2:
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fminimum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmaximum_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmaximum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t
+; ZVFH-NEXT: fcvt.s.h fa5, fa0
+; ZVFH-NEXT: vcpop.m a1, v9, v0.t
+; ZVFH-NEXT: feq.s a2, fa5, fa5
+; ZVFH-NEXT: xori a2, a2, 1
+; ZVFH-NEXT: or a1, a1, a2
+; ZVFH-NEXT: beqz a1, .LBB5_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB5_2:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmaximum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT: feq.s a1, fa5, fa5
+; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t
+; ZVFHMIN-NEXT: xori a1, a1, 1
+; ZVFHMIN-NEXT: or a1, a2, a1
+; ZVFHMIN-NEXT: beqz a1, .LBB5_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB5_2:
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fminimum_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fminimum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fminimum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fminimum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmaximum_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmaximum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmaximum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
More information about the llvm-commits
mailing list