[llvm] [RISCV] Promote SETCC and VP_SETCC of f16 vectors when only have zvfhmin (PR #66866)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 20 00:17:57 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
<details>
<summary>Changes</summary>
This patch implements the promotion of fp16 vectors SETCC and VP_SETCC when we only have zvfhmin but no zvfh.
---
Patch is 332.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66866.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+36)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+20-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll (+767-310)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll (+859-261)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll (+1861-589)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll (+1294-402)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index dec81475f3a88fc..2e490b0b7f38991 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -173,6 +173,12 @@ class VectorLegalizer {
/// result is truncated back to the original scalar type.
void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ /// Implements vector setcc operation promotion.
+ ///
+ /// All vector operands are promoted to a vector type with larger element
+ /// type.
+ void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -603,6 +609,31 @@ void VectorLegalizer::PromoteReduction(SDNode *Node,
Results.push_back(Res);
}
+void VectorLegalizer::PromoteSETCC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(0).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
+ Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
+ Operands[2] = Node->getOperand(2);
+
+ if (Node->getOpcode() == ISD::VP_SETCC) {
+ Operands[3] = Node->getOperand(3); // mask
+ Operands[4] = Node->getOperand(4); // evl
+ }
+
+ SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
+ Operands, Node->getFlags());
+
+ Results.push_back(Res);
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -638,6 +669,11 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// Promote the operation by extending the operand.
PromoteReduction(Node, Results);
return;
+ case ISD::VP_SETCC:
+ case ISD::SETCC:
+ // Promote the operation by extending the operand.
+ PromoteSETCC(Node, Results);
+ return;
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f1cea6c6756f4fc..a75a9cb4f4a1599 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -824,11 +824,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
- ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
- ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
+ ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR, ISD::SETCC};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
@@ -839,7 +839,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
- ISD::VP_FNEARBYINT};
+ ISD::VP_FNEARBYINT, ISD::VP_SETCC};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
@@ -5396,6 +5396,11 @@ static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
continue;
}
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
std::tie(LoOperands[j], HiOperands[j]) =
DAG.SplitVector(Op.getOperand(j), DL);
}
@@ -6083,6 +6088,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
}
+ if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
+
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
}
case ISD::ADD:
@@ -6250,6 +6260,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_FP_TO_UINT:
return lowerVPFPIntConvOp(Op, DAG);
case ISD::VP_SETCC:
+ if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPSetCCMaskOp(Op, DAG);
[[fallthrough]];
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index a7b8d8dbe330bce..a566fab1596f60c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -1,16 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_oeq_vv_v8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v9, (a1)
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
-; CHECK-NEXT: vsm.v v8, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_oeq_vv_v8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v9, (a1)
+; ZVFH-NEXT: vmfeq.vv v8, v8, v9
+; ZVFH-NEXT: vsm.v v8, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = fcmp oeq <8 x half> %a, %b
@@ -19,14 +33,26 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_oeq_vv_v8f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v9, (a1)
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
-; CHECK-NEXT: vsm.v v8, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_oeq_vv_v8f16_nonans:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v9, (a1)
+; ZVFH-NEXT: vmfeq.vv v8, v8, v9
+; ZVFH-NEXT: vsm.v v8, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16_nonans:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = fcmp nnan oeq <8 x half> %a, %b
@@ -135,14 +161,26 @@ define void @fcmp_ogt_vv_v2f64_nonans(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_olt_vv_v16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v10, (a1)
-; CHECK-NEXT: vmflt.vv v12, v8, v10
-; CHECK-NEXT: vsm.v v12, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_olt_vv_v16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v10, (a1)
+; ZVFH-NEXT: vmflt.vv v12, v8, v10
+; ZVFH-NEXT: vsm.v v12, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = load <16 x half>, ptr %y
%c = fcmp olt <16 x half> %a, %b
@@ -151,14 +189,26 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_olt_vv_v16f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v10, (a1)
-; CHECK-NEXT: vmflt.vv v12, v8, v10
-; CHECK-NEXT: vsm.v v12, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_olt_vv_v16f16_nonans:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v10, (a1)
+; ZVFH-NEXT: vmflt.vv v12, v8, v10
+; ZVFH-NEXT: vsm.v v12, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v10, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = load <16 x half>, ptr %y
%c = fcmp nnan olt <16 x half> %a, %b
@@ -249,16 +299,30 @@ define void @fcmp_ole_vv_v4f64_nonans(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_ule_vv_v32f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a3, 32
-; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v12, (a1)
-; CHECK-NEXT: vmflt.vv v16, v12, v8
-; CHECK-NEXT: vmnot.m v8, v16
-; CHECK-NEXT: vsm.v v8, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_ule_vv_v32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a3, 32
+; ZVFH-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v12, (a1)
+; ZVFH-NEXT: vmflt.vv v16, v12, v8
+; ZVFH-NEXT: vmnot.m v8, v16
+; ZVFH-NEXT: vsm.v v8, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_ule_vv_v32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: li a3, 32
+; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v12, (a1)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16
+; ZVFHMIN-NEXT: vmnot.m v8, v8
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
%b = load <32 x half>, ptr %y
%c = fcmp ule <32 x half> %a, %b
@@ -267,15 +331,28 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_ule_vv_v32f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a3, 32
-; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v12, (a1)
-; CHECK-NEXT: vmfle.vv v16, v8, v12
-; CHECK-NEXT: vsm.v v16, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_ule_vv_v32f16_nonans:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a3, 32
+; ZVFH-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v12, (a1)
+; ZVFH-NEXT: vmfle.vv v16, v8, v12
+; ZVFH-NEXT: vsm.v v16, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_ule_vv_v32f16_nonans:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: li a3, 32
+; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v12, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <32 x half>, ptr %x
%b = load <32 x half>, ptr %y
%c = fcmp nnan ule <32 x half> %a, %b
@@ -350,16 +427,16 @@ define void @fcmp_ult_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_ugt_vv_v64f16(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_ugt_vv_v64f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a3, 64
-; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v16, (a1)
-; CHECK-NEXT: vmfle.vv v24, v8, v16
-; CHECK-NEXT: vmnot.m v8, v24
-; CHECK-NEXT: vsm.v v8, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_ugt_vv_v64f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a3, 64
+; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v16, (a1)
+; ZVFH-NEXT: vmfle.vv v24, v8, v16
+; ZVFH-NEXT: vmnot.m v8, v24
+; ZVFH-NEXT: vsm.v v8, (a2)
+; ZVFH-NEXT: ret
%a = load <64 x half>, ptr %x
%b = load <64 x half>, ptr %y
%c = fcmp ugt <64 x half> %a, %b
@@ -368,15 +445,15 @@ define void @fcmp_ugt_vv_v64f16(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_ugt_vv_v64f16_nonans(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_ugt_vv_v64f16_nonans:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a3, 64
-; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v16, (a1)
-; CHECK-NEXT: vmflt.vv v24, v16, v8
-; CHECK-NEXT: vsm.v v24, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_ugt_vv_v64f16_nonans:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a3, 64
+; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vle16.v v16, (a1)
+; ZVFH-NEXT: vmflt.vv v24, v16, v8
+; ZVFH-NEXT: vsm.v v24, (a2)
+; ZVFH-NEXT: ret
%a = load <64 x half>, ptr %x
%b = load <64 x half>, ptr %y
%c = fcmp nnan ugt <64 x half> %a, %b
@@ -455,25 +532,50 @@ define void @fcmp_one_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_ord_vv_v4f16(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_ord_vv_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vmfeq.vv v8, v8, v8
-; CHECK-NEXT: vmfeq.vv v9, v9, v9
-; CHECK-NEXT: vmand.mm v0, v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmsne.vi v8, v9, 0
-; CHECK-NEXT: vsm.v v8, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_ord_vv_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a1)
+; ZVFH-NEXT: vle16.v v9, (a0)
+; ZVFH-NEXT: vmfeq.vv v8, v8, v8
+; ZVFH-NEXT: vmfeq.vv v9, v9, v9
+; ZVFH-NEXT: vmand.mm v0, v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZVFH-NEXT: vmv.v.i v8, 0
+; ZVFH-NEXT: vmerge.vim v8, v8, 1, v0
+; ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFH-NEXT: vmv.v.i v9, 0
+; ZVFH-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; ZVFH-NEXT: vmv.v.v v9, v8
+; ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFH-NEXT: vmsne.vi v8, v9, 0
+; ZVFH-NEXT: vsm.v v8, (a2)
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: fcmp_ord_vv_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10
+; ZVFHMIN-NEXT: vmand.mm v0, v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.i v8, 0
+; ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.i v9, 0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; ZVFHMIN-NEXT: vmv.v.v v9, v8
+; ZVFHMIN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vmsne.vi v8, v9, 0
+; ZVFHMIN-NEXT: vsm.v v8, (a2)
+; ZVFHMIN-NEXT: ret
%a = load <4 x half>, ptr %x
%b = load <4 x half>, ptr %y
%c = fcmp ord <4 x half> %a, %b
@@ -482,25 +584,50 @@ define void @fcmp_ord_vv_v4f16(ptr %x, ptr %y, ptr %z) {
}
define void @fcmp_uno_vv_v4f16(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fcmp_uno_vv_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vmfne.vv v8, v8, v8
-; CHECK-NEXT: vmfne.vv v9, v9, v9
-; CHECK-NEXT: vmor.mm v0, v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmsne.vi v8, v9, 0
-; CHECK-NEXT: vsm.v v8, (a2)
-; CHECK-NEXT: ret
+; ZVFH-LABEL: fcmp_uno_vv_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a1)
+; ZVFH-NEXT: vle16.v v9, (a0)
+; ZVFH-NEXT: vmfne.vv v8, v8, v8
+; ZVFH-NEXT: vmfne.vv v9, v9, v9
+; ZVFH-NEXT: vmor.mm v0, v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; ZVFH-NEXT: vmv.v.i v8, 0
+; ZVFH-NEXT: vmerge.vim v8, v8, 1, v0
+; ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVFH-NEXT: vmv.v.i v9, 0
+; ZVFH-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
+; ZVFH-NEXT: vmv.v.v v9, v8
+; ZVFH-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/66866
More information about the llvm-commits
mailing list