[llvm] [SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM} (PR #128800)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 23:31:56 PST 2025
https://github.com/tclin914 updated https://github.com/llvm/llvm-project/pull/128800
>From 50cc58c6078e722fe284f91eb29fcf80b3caf0e0 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Mon, 24 Feb 2025 16:00:39 +0800
Subject: [PATCH 1/3] [SelectionDAG][RISCV] Promote
VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}
This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 25 +-
.../SelectionDAG/LegalizeVectorOps.cpp | 8 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 36 ++-
.../RISCV/rvv/vreductions-fp-sdnode-f16.ll | 212 ++++++++++++++
.../RISCV/rvv/vreductions-fp-vp-f16.ll | 269 ++++++++++++++++++
5 files changed, 535 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f56097fdbb51a..5f70588ab84e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
}
SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
- MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode());
+ MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType()
+ : Node->getOperand(0).getSimpleValueType();
MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
MVT ScalarVT = Node->getSimpleValueType(0);
MVT NewScalarVT = NewVecVT.getVectorElementType();
@@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
SDLoc DL(Node);
SmallVector<SDValue, 4> Operands(Node->getNumOperands());
- // promote the initial value.
// FIXME: Support integer.
assert(Node->getOperand(0).getValueType().isFloatingPoint() &&
"Only FP promotion is supported");
- Operands[0] =
- DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
- for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j)
if (Node->getOperand(j).getValueType().isVector() &&
- !(ISD::isVPOpcode(Node->getOpcode()) &&
+ !(IsVPOpcode &&
ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand.
// promote the vector operand.
// FIXME: Support integer.
@@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
"Only FP promotion is supported");
Operands[j] =
DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ } else if (Node->getOperand(j).getValueType().isFloatingPoint()) {
+ // prmote the initial value.
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j));
} else {
Operands[j] = Node->getOperand(j); // Skip VL operand.
}
@@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::SINT_TO_FP ||
Node->getOpcode() == ISD::SETCC ||
Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::VECREDUCE_FMAX ||
+ Node->getOpcode() == ISD::VECREDUCE_FMIN ||
+ Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM ||
+ Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::ATOMIC_STORE ||
@@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
break;
}
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAXIMUM:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index de4447fb0cf1a..13345b76e7e92 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
+ case ISD::VECTOR_FIND_LAST_ACTIVE:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
- case ISD::VECTOR_FIND_LAST_ACTIVE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
+ // Defer non-vector results to LegalizeDAG.
+ if (Action == TargetLowering::Promote)
+ Action = TargetLowering::Legal;
break;
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_SEQ_FMUL:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6076fe56416ad..759cf531b74b1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminZvfbfminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
- ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
- ISD::STRICT_FMA};
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FMUL,
+ ISD::FMA,
+ ISD::FDIV,
+ ISD::FSQRT,
+ ISD::FCEIL,
+ ISD::FTRUNC,
+ ISD::FFLOOR,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FRINT,
+ ISD::FNEARBYINT,
+ ISD::IS_FPCLASS,
+ ISD::SETCC,
+ ISD::FMAXIMUM,
+ ISD::FMINIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA,
+ ISD::VECREDUCE_FMIN,
+ ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_FMAXIMUM};
// TODO: support more vp ops.
static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
new file mode 100644
index 0000000000000..e269b13137d44
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+
+define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmin_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmax_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmin_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmax_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fminimum_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fminimum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8
+; ZVFH-NEXT: vcpop.m a0, v9
+; ZVFH-NEXT: beqz a0, .LBB4_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB4_2:
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
+; ZVFHMIN-NEXT: vcpop.m a0, v8
+; ZVFHMIN-NEXT: beqz a0, .LBB4_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB4_2:
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmaximum_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmaximum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8
+; ZVFH-NEXT: vcpop.m a0, v9
+; ZVFH-NEXT: beqz a0, .LBB5_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB5_2:
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
+; ZVFHMIN-NEXT: vcpop.m a0, v8
+; ZVFHMIN-NEXT: beqz a0, .LBB5_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB5_2:
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fminimum_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
+define half @vreduce_fmaximum_nnan_nxv4f16(<vscale x 4 x half> %val) {
+; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v8, v8, v8
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
+ ret half %s
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
new file mode 100644
index 0000000000000..8993bf8a767d8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
+
+define half @vpreduce_fmin_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmin_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmax_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmax_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmin_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmax_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fminimum_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fminimum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t
+; ZVFH-NEXT: fcvt.s.h fa5, fa0
+; ZVFH-NEXT: vcpop.m a1, v9, v0.t
+; ZVFH-NEXT: feq.s a2, fa5, fa5
+; ZVFH-NEXT: xori a2, a2, 1
+; ZVFH-NEXT: or a1, a1, a2
+; ZVFH-NEXT: beqz a1, .LBB4_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB4_2:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT: feq.s a1, fa5, fa5
+; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t
+; ZVFHMIN-NEXT: xori a1, a1, 1
+; ZVFHMIN-NEXT: or a1, a2, a1
+; ZVFHMIN-NEXT: beqz a1, .LBB4_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB4_2:
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fminimum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmaximum_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmaximum_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t
+; ZVFH-NEXT: fcvt.s.h fa5, fa0
+; ZVFH-NEXT: vcpop.m a1, v9, v0.t
+; ZVFH-NEXT: feq.s a2, fa5, fa5
+; ZVFH-NEXT: xori a2, a2, 1
+; ZVFH-NEXT: or a1, a1, a2
+; ZVFH-NEXT: beqz a1, .LBB5_2
+; ZVFH-NEXT: # %bb.1:
+; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
+; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0)
+; ZVFH-NEXT: ret
+; ZVFH-NEXT: .LBB5_2:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmaximum_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t
+; ZVFHMIN-NEXT: feq.s a1, fa5, fa5
+; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t
+; ZVFHMIN-NEXT: xori a1, a1, 1
+; ZVFHMIN-NEXT: or a1, a2, a1
+; ZVFHMIN-NEXT: beqz a1, .LBB5_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: lui a0, 523264
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+; ZVFHMIN-NEXT: .LBB5_2:
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fminimum_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fminimum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fminimum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fminimum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
+
+define half @vpreduce_fmaximum_nnan_nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vpreduce_fmaximum_nnan_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t
+; ZVFH-NEXT: vfmv.f.s fa0, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vpreduce_fmaximum_nnan_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t
+; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
+; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
+; ZVFHMIN-NEXT: ret
+ %s = call nnan half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, <vscale x 4 x half> %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret half %s
+}
>From 496b064ccb0324f7a457c6549df9682495222a61 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 26 Feb 2025 10:46:25 +0800
Subject: [PATCH 2/3] clang-format
---
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 13345b76e7e92..27bde7b96c857 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -515,7 +515,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Node->getOperand(0).getValueType());
// Defer non-vector results to LegalizeDAG.
if (Action == TargetLowering::Promote)
- Action = TargetLowering::Legal;
+ Action = TargetLowering::Legal;
break;
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_SEQ_FMUL:
>From e43532fae8da39f7307070fae4686c3b2ffcf90d Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 26 Feb 2025 14:38:53 +0800
Subject: [PATCH 3/3] prmote -> promote
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5f70588ab84e4..2b8818482a333 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2938,7 +2938,7 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
Operands[j] =
DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
} else if (Node->getOperand(j).getValueType().isFloatingPoint()) {
- // prmote the initial value.
+ // promote the initial value.
Operands[j] =
DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j));
} else {
More information about the llvm-commits
mailing list