[llvm] [RISCV] Support fmaximum/fminimum for fp16 vector when only Zvfhmin enabled (PR #67393)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 25 22:30:57 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
<details>
<summary>Changes</summary>
This patch promotes fmaximum/fminimum for fp16 vector to float operation.
---
Patch is 67.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67393.diff
5 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+10-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll (+194-69)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll (+194-69)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll (+321-94)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll (+321-94)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 686350de29883aa..060edfa3c9c6bbc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -822,11 +822,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
- ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
- ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
+ ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR, ISD::FMAXIMUM,
+ ISD::FMINIMUM};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
@@ -5654,6 +5655,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVectorOp(Op, DAG);
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::FP_EXTEND: {
SDLoc DL(Op);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 0ec9055093ce99a..dee94ecedff68dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -1,22 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>)
define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: vfmax_v2f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v10, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: vfmax.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v2f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v10, v9, v9
+; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
+; ZVFH-NEXT: vmv1r.v v0, v10
+; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vfmax.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -24,16 +44,32 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
-; CHECK-LABEL: vfmax_v4f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v10, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: vfmax.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v4f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v10, v9, v9
+; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
+; ZVFH-NEXT: vmv1r.v v0, v10
+; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vfmax.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v4f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
+; ZVFHMIN-NEXT: vmv.v.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -41,16 +77,32 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
-; CHECK-LABEL: vfmax_v8f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v10, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: vfmax.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v8f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v10, v9, v9
+; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
+; ZVFH-NEXT: vmv.v.v v0, v10
+; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vfmax.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v8f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
%v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %v
}
@@ -58,16 +110,32 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)
define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
-; CHECK-LABEL: vfmax_v16f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v12, v10, v10
-; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: vfmax.vv v8, v8, v14
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v16f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v12, v10, v10
+; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0
+; ZVFH-NEXT: vmv1r.v v0, v12
+; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT: vfmax.vv v8, v8, v14
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v16f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12
+; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
%v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %v
}
@@ -220,28 +288,62 @@ define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwi
}
define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: vfmax_v2f16_vv_nnan:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v2f16_vv_nnan:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfmax.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: vfmax_v2f16_vv_nnana:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfadd.vv v10, v8, v8
-; CHECK-NEXT: vmfeq.vv v0, v9, v9
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v2f16_vv_nnana:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vv v10, v8, v8
+; ZVFH-NEXT: vmfeq.vv v0, v9, v9
+; ZVFH-NEXT: vmfeq.vv v8, v10, v10
+; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0
+; ZVFH-NEXT: vmv1r.v v0, v8
+; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFH-NEXT: vfmax.vv v8, v11, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9
+; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%c = fadd nnan <2 x half> %a, %a
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b)
ret <2 x half> %v
@@ -249,17 +351,40 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; FIXME: The nnan from fadd isn't propagating.
define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: vfmax_v2f16_vv_nnanb:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfadd.vv v10, v9, v9
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v9, v10, v10
-; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
-; CHECK-NEXT: vfmax.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_v2f16_vv_nnanb:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vv v10, v9, v9
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v9, v10, v10
+; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0
+; ZVFH-NEXT: vmv1r.v v0, v9
+; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT: vfmax.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11
+; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%c = fadd nnan <2 x half> %b, %b
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
ret <2 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index 8750b37b4a824b6..a201dcc47853b83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -1,22 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>)
define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: vfmin_v2f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v10, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: vfmin.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_v2f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v10, v9, v9
+; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
+; ZVFH-NEXT: vmv1r.v v0, v10
+; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vfmin.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v2f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -24,16 +44,32 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
-; CHECK-LABEL: vfmin_v4f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v10, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: vfmin.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_v4f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v10, v9, v9
+; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
+; ZVFH-NEXT: vmv1r.v v0, v10
+; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vfmin.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v4f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0
+; ZVFHMIN-NEXT: vmv.v.v v0, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -41,16 +77,32 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
-; CHECK-LABEL: vfmin_v8f16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v10, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
-; CHECK-NEXT: vfmin.vv v8, v8, v11
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_v8f16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v10, v9, v9
+; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0
+; ZVFH-NEXT: vmv.v.v v0, v10
+; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFH-NEXT: vfmin.vv v8, v8, v11
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v8f16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10
+; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/67393
More information about the llvm-commits
mailing list