[llvm] [VP][RISCV] Add vp.reduce.fmaximum/fminimum and its RISC-V codegen (PR #91782)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Fri May 10 14:39:45 PDT 2024
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/91782
>From 89f89cfa38d0358841273bb54dc7db3e363bc429 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Thu, 9 May 2024 09:45:03 -0700
Subject: [PATCH 1/2] [VP][RISCV] Add vp.reduce.fmaximum/fminimum and its
RISC-V codegen
`vp.reduce.fmaximum/fminimum` are the VP version of
`vector.reduce.fmaximum/fminimum`.
---
llvm/docs/LangRef.rst | 140 ++++++++++++
llvm/include/llvm/IR/Intrinsics.td | 10 +
llvm/include/llvm/IR/VPIntrinsics.def | 8 +
llvm/lib/CodeGen/ExpandVectorPredication.cpp | 24 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 4 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 34 ++-
llvm/test/CodeGen/Generic/expand-vp.ll | 83 +++++--
llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll | 32 +++
llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll | 205 ++++++++++++++++++
llvm/test/Verifier/vp-intrinsics.ll | 8 +-
llvm/unittests/IR/VPIntrinsicTest.cpp | 3 +-
13 files changed, 530 insertions(+), 29 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 6f5a4644ffc2b..562841101b75a 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -22182,6 +22182,146 @@ Examples:
%also.r = call float @llvm.minnum.f32(float %reduction, float %start)
+.. _int_vp_reduce_fmaximum:
+
+'``llvm.vp.reduce.fmaximum.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare float @llvm.vp.reduce.fmaximum.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, float <vector_length>)
+ declare double @llvm.vp.reduce.fmaximum.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated floating-point ``MAX`` reduction of a vector and a scalar starting
+value, returning the result as a scalar.
+
+
+Arguments:
+""""""""""
+
+The first operand is the start value of the reduction, which must be a scalar
+floating-point type equal to the result type. The second operand is the vector
+on which the reduction is performed and must be a vector of floating-point
+values whose element type is the result/start type. The third operand is the
+vector mask and is a vector of boolean values with the same number of elements
+as the vector operand. The fourth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.reduce.fmaximum``' intrinsic performs the floating-point ``MAX``
+reduction (:ref:`llvm.vector.reduce.fmaximum <int_vector_reduce_fmaximum>`) of
+the vector operand ``val`` on each enabled lane, taking the maximum of that and
+the scalar ``start_value``. Disabled lanes are treated as containing the
+neutral value (i.e. having no effect on the reduction operation). If the vector
+length is zero, the result is the start value.
+
+The neutral value is dependent on the :ref:`fast-math flags <fastmath>`. If no
+flags are set or only the ``nnan`` is set, the neutral value is ``-Infinity``.
+If ``ninf`` is set, then the neutral value is the smallest floating-point value
+for the result type.
+
+This instruction has the same comparison semantics as the
+:ref:`llvm.vector.reduce.fmaximum <int_vector_reduce_fmaximum>` intrinsic (and
+thus the '``llvm.maximum.*``' intrinsic). That is, the result will always be a
+number unless any of the elements in the vector or the starting value is
+``NaN``. Namely, this intrinsic propagates ``NaN``. Also, -0.0 is considered
+less than +0.0.
+
+To ignore the start value, the neutral value can be used.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call float @llvm.vp.reduce.fmaximum.v4f32(float %float, <4 x float> %a, <4 x i1> %mask, i32 %evl)
+ ; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+ ; are treated as though %mask were false for those lanes.
+
+ %masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float -infinity, float -infinity, float -infinity, float -infinity>
+ %reduction = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %masked.a)
+ %also.r = call float @llvm.maximum.f32(float %reduction, float %start)
+
+
+.. _int_vp_reduce_fmin:
+
+'``llvm.vp.reduce.fminimum.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare float @llvm.vp.reduce.fminimum.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, float <vector_length>)
+ declare double @llvm.vp.reduce.fminimum.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated floating-point ``MIN`` reduction of a vector and a scalar starting
+value, returning the result as a scalar.
+
+
+Arguments:
+""""""""""
+
+The first operand is the start value of the reduction, which must be a scalar
+floating-point type equal to the result type. The second operand is the vector
+on which the reduction is performed and must be a vector of floating-point
+values whose element type is the result/start type. The third operand is the
+vector mask and is a vector of boolean values with the same number of elements
+as the vector operand. The fourth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.reduce.fminimum``' intrinsic performs the floating-point ``MIN``
+reduction (:ref:`llvm.vector.reduce.fminimum <int_vector_reduce_fminimum>`) of
+the vector operand ``val`` on each enabled lane, taking the minimum of that and
+the scalar ``start_value``. Disabled lanes are treated as containing the neutral
+value (i.e. having no effect on the reduction operation). If the vector length
+is zero, the result is the start value.
+
+The neutral value is dependent on the :ref:`fast-math flags <fastmath>`. If no
+flags are set or only the ``nnan`` is set, the neutral value is ``+Infinity``.
+If ``ninf`` is set, then the neutral value is the largest floating-point value
+for the result type.
+
+This instruction has the same comparison semantics as the
+:ref:`llvm.vector.reduce.fminimum <int_vector_reduce_fminimum>` intrinsic (and
+thus the '``llvm.minimum.*``' intrinsic). That is, the result will always be a
+number unless any of the elements in the vector or the starting value is
+``NaN``. Namely, this intrinsic propagates ``NaN``. Also, -0.0 is considered
+less than +0.0.
+
+To ignore the start value, the neutral value can be used.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %a, <4 x i1> %mask, i32 %evl)
+ ; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+ ; are treated as though %mask were false for those lanes.
+
+ %masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float infinity, float infinity, float infinity, float infinity>
+ %reduction = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %masked.a)
+ %also.r = call float @llvm.minimum.f32(float %reduction, float %start)
+
+
.. _int_get_active_lane_mask:
'``llvm.get.active.lane.mask.*``' Intrinsics
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 29143123193b9..42192d472ba6e 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2243,6 +2243,16 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+ def int_vp_reduce_fmaximum : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fminimum : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
}
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<1>>] in {
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index f1cc8bcae467b..20f5bb2b531d3 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -701,6 +701,14 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX,
HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
vector_reduce_fmin)
+// llvm.vp.reduce.fmaximum(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmaximum, VP_REDUCE_FMAXIMUM,
+ vector_reduce_fmaximum)
+
+// llvm.vp.reduce.fminimum(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fminimum, VP_REDUCE_FMINIMUM,
+ vector_reduce_fminimum)
+
#undef HELPER_REGISTER_REDUCTION_VP
// Specialized helper macro for VP reductions as above but with two forms:
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 8e623c85b737b..dc35f33a3a059 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -367,7 +367,8 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Type *EltTy) {
bool Negative = false;
unsigned EltBits = EltTy->getScalarSizeInBits();
- switch (VPI.getIntrinsicID()) {
+ Intrinsic::ID VID = VPI.getIntrinsicID();
+ switch (VID) {
default:
llvm_unreachable("Expecting a VP reduction intrinsic");
case Intrinsic::vp_reduce_add:
@@ -387,12 +388,17 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
return ConstantInt::get(EltTy->getContext(),
APInt::getSignedMinValue(EltBits));
case Intrinsic::vp_reduce_fmax:
+ case Intrinsic::vp_reduce_fmaximum:
Negative = true;
[[fallthrough]];
- case Intrinsic::vp_reduce_fmin: {
+ case Intrinsic::vp_reduce_fmin:
+ case Intrinsic::vp_reduce_fminimum: {
+ bool PropagatesNaN = VID == Intrinsic::vp_reduce_fminimum ||
+ VID == Intrinsic::vp_reduce_fmaximum;
FastMathFlags Flags = VPI.getFastMathFlags();
const fltSemantics &Semantics = EltTy->getFltSemantics();
- return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
+ return (!Flags.noNaNs() && !PropagatesNaN)
+ ? ConstantFP::getQNaN(EltTy, Negative)
: !Flags.noInfs()
? ConstantFP::getInfinity(EltTy, Negative)
: ConstantFP::get(EltTy,
@@ -480,6 +486,18 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
Reduction =
Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
break;
+ case Intrinsic::vp_reduce_fmaximum:
+ Reduction = Builder.CreateFPMaximumReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fminimum:
+ Reduction = Builder.CreateFPMinimumReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start);
+ break;
case Intrinsic::vp_reduce_fadd:
Reduction = Builder.CreateFAddReduce(Start, RedOp);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b3ae419b20fec..fd97a1283b65a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1222,6 +1222,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
case ISD::VP_REDUCE_SEQ_FADD:
case ISD::VP_REDUCE_SEQ_FMUL:
Action = TLI.getOperationAction(
@@ -5015,6 +5017,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::VP_REDUCE_FMUL ||
Node->getOpcode() == ISD::VP_REDUCE_FMAX ||
Node->getOpcode() == ISD::VP_REDUCE_FMIN ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMAXIMUM ||
+ Node->getOpcode() == ISD::VP_REDUCE_FMINIMUM ||
Node->getOpcode() == ISD::VP_REDUCE_SEQ_FADD)
OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC ||
@@ -5687,6 +5691,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::VP_REDUCE_FMUL:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
case ISD::VP_REDUCE_SEQ_FADD:
Results.push_back(PromoteReduction(Node));
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 43db9b8e6be9e..cd858003cf03b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3148,6 +3148,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
Res = SplitVecOp_VP_REDUCE(N, OpNo);
break;
case ISD::VP_CTTZ_ELTS:
@@ -6251,6 +6253,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
Res = WidenVecOp_VP_REDUCE(N);
break;
case ISD::VP_CTTZ_ELTS:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9c1f3c1e34318..0a258350c68a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -470,8 +470,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
case ISD::VP_REDUCE_FMIN:
return ISD::FMINNUM;
case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VP_REDUCE_FMAXIMUM:
return ISD::FMAXIMUM;
case ISD::VECREDUCE_FMINIMUM:
+ case ISD::VP_REDUCE_FMINIMUM:
return ISD::FMINIMUM;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0937989a6a41..ead14c6945025 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -713,7 +713,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
- ISD::EXPERIMENTAL_VP_SPLICE};
+ ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
+ ISD::VP_REDUCE_FMAXIMUM};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -958,7 +959,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
- ISD::VP_FMAXIMUM};
+ ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
@@ -6661,6 +6662,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_REDUCE_SEQ_FADD:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMINIMUM:
+ case ISD::VP_REDUCE_FMAXIMUM:
if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16()))
@@ -9526,8 +9529,10 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) {
case ISD::VP_REDUCE_SEQ_FADD:
return RISCVISD::VECREDUCE_SEQ_FADD_VL;
case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMAXIMUM:
return RISCVISD::VECREDUCE_FMAX_VL;
case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMINIMUM:
return RISCVISD::VECREDUCE_FMIN_VL;
}
@@ -9786,8 +9791,10 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ unsigned Opc = Op.getOpcode();
SDValue Vec = Op.getOperand(1);
EVT VecEVT = Vec.getValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
// TODO: The type may need to be widened rather than split. Or widened before
// it can be split.
@@ -9795,7 +9802,7 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
return SDValue();
MVT VecVT = VecEVT.getSimpleVT();
- unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
+ unsigned RVVOpcode = getRVVReductionOp(Opc);
if (VecVT.isFixedLengthVector()) {
auto ContainerVT = getContainerForFixedLengthVector(VecVT);
@@ -9804,8 +9811,25 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
SDValue VL = Op.getOperand(3);
SDValue Mask = Op.getOperand(2);
- return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
- Vec, Mask, VL, DL, DAG, Subtarget);
+ SDValue Res =
+ lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
+ Vec, Mask, VL, DL, DAG, Subtarget);
+ if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
+ Op->getFlags().hasNoNaNs())
+ return Res;
+
+ // Propagate NaNs.
+ MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
+ SDValue IsNaN = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, PredVT,
+ {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
+ SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
+ SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
+ DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
+ MVT ResVT = Res.getSimpleValueType();
+ return DAG.getSelect(
+ DL, ResVT, NoNaNs, Res,
+ DAG.getConstantFP(DAG.EVTToAPFloatSemantics(ResVT), DL, ResVT));
}
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll
index 40d183273b86d..4fee9a533b947 100644
--- a/llvm/test/CodeGen/Generic/expand-vp.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp.ll
@@ -41,6 +41,8 @@ declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
declare float @llvm.vp.reduce.fmin.v4f32(float, <4 x float>, <4 x i1>, i32)
declare float @llvm.vp.reduce.fmax.v4f32(float, <4 x float>, <4 x i1>, i32)
+declare float @llvm.vp.reduce.fminimum.v4f32(float, <4 x float>, <4 x i1>, i32)
+declare float @llvm.vp.reduce.fmaximum.v4f32(float, <4 x float>, <4 x i1>, i32)
declare float @llvm.vp.reduce.fadd.v4f32(float, <4 x float>, <4 x i1>, i32)
declare float @llvm.vp.reduce.fmul.v4f32(float, <4 x float>, <4 x i1>, i32)
; Comparisons
@@ -133,10 +135,16 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
%r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
%r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
%r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r6 = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r7 = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r8 = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r9 = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r10 = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r11 = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r12 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r13 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r14 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+ %r15 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
ret void
}
@@ -254,6 +262,27 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]])
; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f)
+
+; ALL-CONVERT: [[FMINIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
+; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM]])
+; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minimum.f32(float [[RED]], float %f)
+; ALL-CONVERT: [[FMINIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
+; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN]])
+; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minimum.f32(float [[RED]], float %f)
+; ALL-CONVERT: [[FMINIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
+; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN_NINF]])
+; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minimum.f32(float [[RED]], float %f)
+
+; ALL-CONVERT: [[FMAXIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
+; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM]])
+; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maximum.f32(float [[RED]], float %f)
+; ALL-CONVERT: [[FMAXIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
+; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN]])
+; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maximum.f32(float [[RED]], float %f)
+; ALL-CONVERT: [[FMAXIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000>
+; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN_NINF]])
+; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maximum.f32(float [[RED]], float %f)
+
; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]])
; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
@@ -328,16 +357,22 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
; LEGAL_LEGAL-NEXT: ret void
; LEGAL_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r6 = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r7 = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r8 = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r9 = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r10 = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r11 = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r12 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r13 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r14 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; LEGAL_LEGAL-NEXT: %r15 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: ret void
; LEGAL_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
@@ -425,10 +460,16 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
; DISCARD_LEGAL-NOT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; DISCARD_LEGAL-NOT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; DISCARD_LEGAL-NOT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r6 = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r7 = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r8 = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r9 = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r10 = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r11 = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r12 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r13 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r14 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
+; DISCARD_LEGAL-NOT: %r15 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
; DISCARD_LEGAL: ret void
; DISCARD_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
@@ -501,6 +542,12 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWM]], i32 4)
+; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; CONVERT_LEGAL-NOT: %{{.+}} = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll b/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll
index e9259b3a1d929..1bfc0f432eb55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll
@@ -631,3 +631,35 @@ define float @vreduce_fmax_v4f32_false_mask(float %start, <4 x float> %val, i32
%s = call float @llvm.vp.reduce.fmax.v4f32(float %start, <4 x float> %val, <4 x i1> zeroinitializer, i32 %evl)
ret float %s
}
+
+define float @vreduce_fminimum_v4f32_zero_evl(float %start, <4 x float> %val, <4 x i1> %m) {
+; CHECK-LABEL: vreduce_fminimum_v4f32_zero_evl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 0)
+ ret float %s
+}
+
+define float @vreduce_fminimum_v4f32_false_mask(float %start, <4 x float> %val, i32 %evl) {
+; CHECK-LABEL: vreduce_fminimum_v4f32_false_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> zeroinitializer, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_v4f32_zero_evl(float %start, <4 x float> %val, <4 x i1> %m) {
+; CHECK-LABEL: vreduce_fmaximum_v4f32_zero_evl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 0)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_v4f32_false_mask(float %start, <4 x float> %val, i32 %evl) {
+; CHECK-LABEL: vreduce_fmaximum_v4f32_false_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> zeroinitializer, i32 %evl)
+ ret float %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll
new file mode 100644
index 0000000000000..ac78759c47fce
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+
+define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vreduce_fminimum_nxv4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.s.f v10, fa0
+; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vfredmin.vs v10, v8, v10, v0.t
+; RV32-NEXT: vmfne.vv v11, v8, v8, v0.t
+; RV32-NEXT: vcpop.m a0, v11, v0.t
+; RV32-NEXT: beqz a0, .LBB0_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.w.x fa0, zero
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB0_2:
+; RV32-NEXT: vfmv.f.s fa0, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fminimum_nxv4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.s.f v10, fa0
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT: vfredmin.vs v10, v8, v10, v0.t
+; RV64-NEXT: vmfne.vv v11, v8, v8, v0.t
+; RV64-NEXT: vcpop.m a0, v11, v0.t
+; RV64-NEXT: beqz a0, .LBB0_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.w.x fa0, zero
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB0_2:
+; RV64-NEXT: vfmv.f.s fa0, v10
+; RV64-NEXT: ret
+ %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vreduce_fmaximum_nxv4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.s.f v10, fa0
+; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vfredmax.vs v10, v8, v10, v0.t
+; RV32-NEXT: vmfne.vv v11, v8, v8, v0.t
+; RV32-NEXT: vcpop.m a0, v11, v0.t
+; RV32-NEXT: beqz a0, .LBB1_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.w.x fa0, zero
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB1_2:
+; RV32-NEXT: vfmv.f.s fa0, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmaximum_nxv4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.s.f v10, fa0
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT: vfredmax.vs v10, v8, v10, v0.t
+; RV64-NEXT: vmfne.vv v11, v8, v8, v0.t
+; RV64-NEXT: vcpop.m a0, v11, v0.t
+; RV64-NEXT: beqz a0, .LBB1_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.w.x fa0, zero
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB1_2:
+; RV64-NEXT: vfmv.f.s fa0, v10
+; RV64-NEXT: ret
+ %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fminimum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vreduce_fminimum_nnan_nxv4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.s.f v10, fa0
+; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vfredmin.vs v10, v8, v10, v0.t
+; RV32-NEXT: vfmv.f.s fa0, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fminimum_nnan_nxv4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.s.f v10, fa0
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT: vfredmin.vs v10, v8, v10, v0.t
+; RV64-NEXT: vfmv.f.s fa0, v10
+; RV64-NEXT: ret
+ %s = call nnan float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vreduce_fmaximum_nnan_nxv4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.s.f v10, fa0
+; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV32-NEXT: vfredmax.vs v10, v8, v10, v0.t
+; RV32-NEXT: vfmv.f.s fa0, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmaximum_nnan_nxv4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.s.f v10, fa0
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; RV64-NEXT: vfredmax.vs v10, v8, v10, v0.t
+; RV64-NEXT: vfmv.f.s fa0, v10
+; RV64-NEXT: ret
+ %s = call nnan float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vreduce_fminimum_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.s.f v9, fa0
+; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; RV32-NEXT: vmfne.vv v8, v8, v8, v0.t
+; RV32-NEXT: vcpop.m a0, v8, v0.t
+; RV32-NEXT: beqz a0, .LBB4_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.w.x fa0, zero
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB4_2:
+; RV32-NEXT: vfmv.f.s fa0, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fminimum_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.s.f v9, fa0
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; RV64-NEXT: vmfne.vv v8, v8, v8, v0.t
+; RV64-NEXT: vcpop.m a0, v8, v0.t
+; RV64-NEXT: beqz a0, .LBB4_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.w.x fa0, zero
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB4_2:
+; RV64-NEXT: vfmv.f.s fa0, v9
+; RV64-NEXT: ret
+ %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vreduce_fmaximum_v4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vfmv.s.f v9, fa0
+; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV32-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; RV32-NEXT: vmfne.vv v8, v8, v8, v0.t
+; RV32-NEXT: vcpop.m a0, v8, v0.t
+; RV32-NEXT: beqz a0, .LBB5_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: fmv.w.x fa0, zero
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB5_2:
+; RV32-NEXT: vfmv.f.s fa0, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fmaximum_v4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT: vfmv.s.f v9, fa0
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; RV64-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; RV64-NEXT: vmfne.vv v8, v8, v8, v0.t
+; RV64-NEXT: vcpop.m a0, v8, v0.t
+; RV64-NEXT: beqz a0, .LBB5_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: fmv.w.x fa0, zero
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB5_2:
+; RV64-NEXT: vfmv.f.s fa0, v9
+; RV64-NEXT: ret
+ %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
+ ret float %s
+}
diff --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll
index 765d67356c2b9..9ed8279f94d71 100644
--- a/llvm/test/Verifier/vp-intrinsics.ll
+++ b/llvm/test/Verifier/vp-intrinsics.ll
@@ -43,8 +43,10 @@ define void @test_vp_reduction(i32 %x, <8 x i32> %vi, <8 x float> %vf, float %f,
%r8 = call i32 @llvm.vp.reduce.umin.v8i32(i32 %x, <8 x i32> %vi, <8 x i1> %m, i32 %n)
%r9 = call float @llvm.vp.reduce.fmin.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
%rA = call float @llvm.vp.reduce.fmax.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
- %rB = call float @llvm.vp.reduce.fadd.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
- %rC = call float @llvm.vp.reduce.fmul.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
+ %rB = call float @llvm.vp.reduce.fminimum.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
+ %rC = call float @llvm.vp.reduce.fmaximum.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
+ %rD = call float @llvm.vp.reduce.fadd.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
+ %rE = call float @llvm.vp.reduce.fmul.v8f32(float %f, <8 x float> %vf, <8 x i1> %m, i32 %n)
ret void
}
@@ -113,6 +115,8 @@ declare i32 @llvm.vp.reduce.umax.v8i32(i32, <8 x i32>, <8 x i1>, i32)
declare i32 @llvm.vp.reduce.umin.v8i32(i32, <8 x i32>, <8 x i1>, i32)
declare float @llvm.vp.reduce.fmin.v8f32(float, <8 x float>, <8 x i1>, i32)
declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32)
+declare float @llvm.vp.reduce.fminimum.v8f32(float, <8 x float>, <8 x i1>, i32)
+declare float @llvm.vp.reduce.fmaximum.v8f32(float, <8 x float>, <8 x i1>, i32)
declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32)
declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32)
; casts
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index 626ab2e9a9c57..d6508abd5197e 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -27,7 +27,8 @@ namespace {
static const char *ReductionIntOpcodes[] = {
"add", "mul", "and", "or", "xor", "smin", "smax", "umin", "umax"};
-static const char *ReductionFPOpcodes[] = {"fadd", "fmul", "fmin", "fmax"};
+static const char *ReductionFPOpcodes[] = {"fadd", "fmul", "fmin",
+ "fmax", "fminimum", "fmaximum"};
class VPIntrinsicTest : public testing::Test {
protected:
>From 7cc65093bde3065ed6fc6459a6de2025ff27c8f7 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Fri, 10 May 2024 14:37:47 -0700
Subject: [PATCH 2/2] Address review comments
- Check the start value of vp.reduce for NaN as well.
- Consolidate the tests with other vp.reduce.* test.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +-
llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll | 205 ------------------
.../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 122 +++++++++++
3 files changed, 129 insertions(+), 206 deletions(-)
delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ead14c6945025..60985edd9420e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9792,6 +9792,7 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
unsigned Opc = Op.getOpcode();
+ SDValue Start = Op.getOperand(0);
SDValue Vec = Op.getOperand(1);
EVT VecEVT = Vec.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
@@ -9820,16 +9821,21 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
// Propagate NaNs.
MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
+ // Check if any of the elements in Vec is NaN.
SDValue IsNaN = DAG.getNode(
RISCVISD::SETCC_VL, DL, PredVT,
{Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
+ // Check if the start value is NaN.
+ SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
+ VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
MVT ResVT = Res.getSimpleValueType();
return DAG.getSelect(
DL, ResVT, NoNaNs, Res,
- DAG.getConstantFP(DAG.EVTToAPFloatSemantics(ResVT), DL, ResVT));
+ DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL,
+ ResVT));
}
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll
deleted file mode 100644
index ac78759c47fce..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vp-reduce.ll
+++ /dev/null
@@ -1,205 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
-
-define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
-; RV32-LABEL: vreduce_fminimum_nxv4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.s.f v10, fa0
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vfredmin.vs v10, v8, v10, v0.t
-; RV32-NEXT: vmfne.vv v11, v8, v8, v0.t
-; RV32-NEXT: vcpop.m a0, v11, v0.t
-; RV32-NEXT: beqz a0, .LBB0_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: fmv.w.x fa0, zero
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB0_2:
-; RV32-NEXT: vfmv.f.s fa0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vreduce_fminimum_nxv4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.s.f v10, fa0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vfredmin.vs v10, v8, v10, v0.t
-; RV64-NEXT: vmfne.vv v11, v8, v8, v0.t
-; RV64-NEXT: vcpop.m a0, v11, v0.t
-; RV64-NEXT: beqz a0, .LBB0_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: fmv.w.x fa0, zero
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB0_2:
-; RV64-NEXT: vfmv.f.s fa0, v10
-; RV64-NEXT: ret
- %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
- ret float %s
-}
-
-define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
-; RV32-LABEL: vreduce_fmaximum_nxv4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.s.f v10, fa0
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vfredmax.vs v10, v8, v10, v0.t
-; RV32-NEXT: vmfne.vv v11, v8, v8, v0.t
-; RV32-NEXT: vcpop.m a0, v11, v0.t
-; RV32-NEXT: beqz a0, .LBB1_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: fmv.w.x fa0, zero
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB1_2:
-; RV32-NEXT: vfmv.f.s fa0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vreduce_fmaximum_nxv4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.s.f v10, fa0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vfredmax.vs v10, v8, v10, v0.t
-; RV64-NEXT: vmfne.vv v11, v8, v8, v0.t
-; RV64-NEXT: vcpop.m a0, v11, v0.t
-; RV64-NEXT: beqz a0, .LBB1_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: fmv.w.x fa0, zero
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB1_2:
-; RV64-NEXT: vfmv.f.s fa0, v10
-; RV64-NEXT: ret
- %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
- ret float %s
-}
-
-define float @vreduce_fminimum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
-; RV32-LABEL: vreduce_fminimum_nnan_nxv4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.s.f v10, fa0
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vfredmin.vs v10, v8, v10, v0.t
-; RV32-NEXT: vfmv.f.s fa0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vreduce_fminimum_nnan_nxv4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.s.f v10, fa0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vfredmin.vs v10, v8, v10, v0.t
-; RV64-NEXT: vfmv.f.s fa0, v10
-; RV64-NEXT: ret
- %s = call nnan float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
- ret float %s
-}
-
-define float @vreduce_fmaximum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl) {
-; RV32-LABEL: vreduce_fmaximum_nnan_nxv4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.s.f v10, fa0
-; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV32-NEXT: vfredmax.vs v10, v8, v10, v0.t
-; RV32-NEXT: vfmv.f.s fa0, v10
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vreduce_fmaximum_nnan_nxv4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.s.f v10, fa0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
-; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; RV64-NEXT: vfredmax.vs v10, v8, v10, v0.t
-; RV64-NEXT: vfmv.f.s fa0, v10
-; RV64-NEXT: ret
- %s = call nnan float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
- ret float %s
-}
-
-define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) {
-; RV32-LABEL: vreduce_fminimum_v4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.s.f v9, fa0
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vfredmin.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmfne.vv v8, v8, v8, v0.t
-; RV32-NEXT: vcpop.m a0, v8, v0.t
-; RV32-NEXT: beqz a0, .LBB4_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: fmv.w.x fa0, zero
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB4_2:
-; RV32-NEXT: vfmv.f.s fa0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vreduce_fminimum_v4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.s.f v9, fa0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vfredmin.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmfne.vv v8, v8, v8, v0.t
-; RV64-NEXT: vcpop.m a0, v8, v0.t
-; RV64-NEXT: beqz a0, .LBB4_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: fmv.w.x fa0, zero
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB4_2:
-; RV64-NEXT: vfmv.f.s fa0, v9
-; RV64-NEXT: ret
- %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
- ret float %s
-}
-
-define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) {
-; RV32-LABEL: vreduce_fmaximum_v4f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vfmv.s.f v9, fa0
-; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV32-NEXT: vfredmax.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmfne.vv v8, v8, v8, v0.t
-; RV32-NEXT: vcpop.m a0, v8, v0.t
-; RV32-NEXT: beqz a0, .LBB5_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: fmv.w.x fa0, zero
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB5_2:
-; RV32-NEXT: vfmv.f.s fa0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vreduce_fmaximum_v4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vfmv.s.f v9, fa0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
-; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; RV64-NEXT: vfredmax.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmfne.vv v8, v8, v8, v0.t
-; RV64-NEXT: vcpop.m a0, v8, v0.t
-; RV64-NEXT: beqz a0, .LBB5_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: fmv.w.x fa0, zero
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB5_2:
-; RV64-NEXT: vfmv.f.s fa0, v9
-; RV64-NEXT: ret
- %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
- ret float %s
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index 4f7cb84c08644..46560fc501c6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -579,3 +579,125 @@ define double @vpreduce_ord_fadd_nxv4f64(double %s, <vscale x 4 x double> %v, <v
%r = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 %evl)
ret double %r
}
+
+define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vreduce_fminimum_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t
+; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t
+; CHECK-NEXT: vcpop.m a0, v11, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: beqz a0, .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa0, a0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vfmv.f.s fa0, v10
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vreduce_fmaximum_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t
+; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t
+; CHECK-NEXT: vcpop.m a0, v11, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: beqz a0, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa0, a0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vfmv.f.s fa0, v10
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fminimum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vreduce_fminimum_nnan_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v10
+; CHECK-NEXT: ret
+ %s = call nnan float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v10
+; CHECK-NEXT: ret
+ %s = call nnan float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vreduce_fminimum_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: beqz a0, .LBB26_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa0, a0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vfmv.f.s fa0, v9
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
+ ret float %s
+}
+
+define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vreduce_fmaximum_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t
+; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: beqz a0, .LBB27_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: lui a0, 523264
+; CHECK-NEXT: fmv.w.x fa0, a0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vfmv.f.s fa0, v9
+; CHECK-NEXT: ret
+ %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
+ ret float %s
+}
More information about the llvm-commits
mailing list