[llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)

Brandon Wu via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 11 02:16:28 PST 2025


https://github.com/4vtomat created https://github.com/llvm/llvm-project/pull/171794

This patch supports for both scalable vector and fixed-length vector.
It also enables fsetcc pattern match for zvfbfa to make fminimum and
fmaximum work correctly.


>From 120e35994ad43c65143d64d1e9b3fa3504e2d76f Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 11 Dec 2025 01:08:11 -0800
Subject: [PATCH 1/2] pre-commit test

---
 .../RISCV/rvv/fixed-vectors-fmaximum.ll       |  378 +++-
 .../RISCV/rvv/fixed-vectors-fmaximumnum.ll    | 1537 +++++++++++++++++
 .../RISCV/rvv/fixed-vectors-fminimum.ll       |  378 +++-
 .../RISCV/rvv/fixed-vectors-fminimumnum.ll    | 1537 +++++++++++++++++
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll  |  660 ++++++-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll  |  660 ++++++-
 .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll |  778 +++++++--
 .../CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll   |  927 ++++++++--
 .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll |  778 +++++++--
 .../CodeGen/RISCV/rvv/fminimumnum-sdnode.ll   |  927 ++++++++--
 llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll   |  874 ++++++++--
 llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll   |  874 ++++++++--
 12 files changed, 9314 insertions(+), 994 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 6ee2e204bcfe3..299361374f2f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -1,12 +1,236 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmax.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmax.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT:    vfmax.vv v10, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT:    vfmax.vv v12, v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %v = call <16 x bfloat> @llvm.maximum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+  ret <16 x bfloat> %v
+}
 
 define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFH-LABEL: vfmax_v2f16_vv:
@@ -35,6 +259,23 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
 }
@@ -66,6 +307,23 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
 }
@@ -97,6 +355,23 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
 }
@@ -128,6 +403,23 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
 }
@@ -262,6 +554,17 @@ define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnan:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
 }
@@ -295,6 +598,26 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnana:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <2 x half> %a, %a
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b)
   ret <2 x half> %v
@@ -329,6 +652,26 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnanb:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v10, v9, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v9, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <2 x half> %b, %b
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
   ret <2 x half> %v
@@ -373,6 +716,33 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v9, v11, v11
+; ZVFBFA-NEXT:    vfadd.vv v8, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFBFA-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vslideup.vi v11, v9, 2
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %d = fadd nnan <2 x half> %a, %a
   %e = fadd nnan <2 x half> %b, %b
   %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> poison, <2 x half> %d, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
index c8cea368f905e..1a5e5af61b8ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
@@ -1,6 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
 
 define <2 x double> @max_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: max_v2f64:
@@ -107,6 +111,17 @@ define <2 x half> @max_v2f16(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v2f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %c
@@ -129,6 +144,17 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v4f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %c
@@ -151,6 +177,17 @@ define <8 x half> @max_v8f16(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v8f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %c
@@ -173,6 +210,17 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v9f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b)
   ret <9 x half> %c
@@ -195,7 +243,1496 @@ define <16 x half> @max_v16f16(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v16f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %c
 }
+
+define <2 x bfloat> @max_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: max_v2bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -32
+; ZVFH-NEXT:    .cfi_def_cfa_offset 32
+; ZVFH-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    fmv.x.w s0, fa0
+; ZVFH-NEXT:    fmv.x.w s1, fa2
+; ZVFH-NEXT:    fmv.x.w a0, fa1
+; ZVFH-NEXT:    fmv.x.w a1, fa3
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    slli s0, s0, 16
+; ZVFH-NEXT:    slli s2, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s1
+; ZVFH-NEXT:    fmv.w.x fa4, s0
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli a0, a0, 48
+; ZVFH-NEXT:    srli a0, a0, 48
+; ZVFH-NEXT:    or a0, a0, s2
+; ZVFH-NEXT:    lui a1, 1048560
+; ZVFH-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; ZVFH-NEXT:    vmv.s.x v8, a0
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT:    vmv.x.s a0, v8
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH-NEXT:    or a0, a0, a1
+; ZVFH-NEXT:    fmv.w.x fa0, a0
+; ZVFH-NEXT:    vmv.x.s a0, v8
+; ZVFH-NEXT:    or a0, a0, a1
+; ZVFH-NEXT:    fmv.w.x fa1, a0
+; ZVFH-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    addi sp, sp, 32
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: max_v2bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -32
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    fmv.x.w s0, fa0
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa1
+; ZVFHMIN-NEXT:    fmv.x.w a1, fa3
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    slli s0, s0, 16
+; ZVFHMIN-NEXT:    slli s2, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s0
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli a0, a0, 48
+; ZVFHMIN-NEXT:    srli a0, a0, 48
+; ZVFHMIN-NEXT:    or a0, a0, s2
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.s.x v8, a0
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa1, a0
+; ZVFHMIN-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    addi sp, sp, 32
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v2bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  ret <2 x bfloat> %c
+}
+
+define <4 x bfloat> @max_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: max_v4bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -80
+; ZVFH-NEXT:    .cfi_def_cfa_offset 80
+; ZVFH-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w s1, fa3
+; ZVFH-NEXT:    fmv.x.w s2, fa7
+; ZVFH-NEXT:    fmv.x.w s3, fa2
+; ZVFH-NEXT:    fmv.x.w s4, fa6
+; ZVFH-NEXT:    fmv.x.w s5, fa1
+; ZVFH-NEXT:    fmv.x.w s6, fa5
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    fmv.x.w a1, fa4
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s7, fa0
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s6
+; ZVFH-NEXT:    fmv.w.x fa4, s5
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s5, fa0
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s3
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s3, fa0
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s2
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sh s7, 0(s0)
+; ZVFH-NEXT:    sh s5, 2(s0)
+; ZVFH-NEXT:    sh s3, 4(s0)
+; ZVFH-NEXT:    sh a0, 6(s0)
+; ZVFH-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    addi sp, sp, 80
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: max_v4bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -80
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa3
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa7
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa2
+; ZVFHMIN-NEXT:    fmv.x.w s4, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa1
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa5
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    fmv.x.w a1, fa4
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa0
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s6
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s5
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa0
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s3
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa0
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s2
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sh s7, 0(s0)
+; ZVFHMIN-NEXT:    sh s5, 2(s0)
+; ZVFHMIN-NEXT:    sh s3, 4(s0)
+; ZVFHMIN-NEXT:    sh a0, 6(s0)
+; ZVFHMIN-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    addi sp, sp, 80
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v4bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  ret <4 x bfloat> %c
+}
+
+define <8 x bfloat> @max_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: max_v8bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 128
+; ZVFH-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    .cfi_offset s8, -80
+; ZVFH-NEXT:    .cfi_offset s9, -88
+; ZVFH-NEXT:    .cfi_offset s10, -96
+; ZVFH-NEXT:    .cfi_offset s11, -104
+; ZVFH-NEXT:    sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s2, a6
+; ZVFH-NEXT:    mv s3, a5
+; ZVFH-NEXT:    mv s4, a4
+; ZVFH-NEXT:    mv s5, a3
+; ZVFH-NEXT:    mv s6, a2
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w a0, fa7
+; ZVFH-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    fmv.x.w s10, fa6
+; ZVFH-NEXT:    fmv.x.w s11, fa5
+; ZVFH-NEXT:    fmv.x.w s9, fa4
+; ZVFH-NEXT:    fmv.x.w s1, fa3
+; ZVFH-NEXT:    fmv.x.w s7, fa2
+; ZVFH-NEXT:    fmv.x.w s8, fa1
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    slli s8, s8, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s6
+; ZVFH-NEXT:    fmv.w.x fa4, s8
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s6, fa0
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    slli s7, s7, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s5
+; ZVFH-NEXT:    fmv.w.x fa4, s7
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s5, fa0
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s4, fa0
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    slli s9, s9, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s3
+; ZVFH-NEXT:    fmv.w.x fa4, s9
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s1, fa0
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    slli s11, s11, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s2
+; ZVFH-NEXT:    fmv.w.x fa4, s11
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s2, fa0
+; ZVFH-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s10, s10, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s10
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s3, fa0
+; ZVFH-NEXT:    lhu a0, 128(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sh s1, 8(s0)
+; ZVFH-NEXT:    sh s2, 10(s0)
+; ZVFH-NEXT:    sh s3, 12(s0)
+; ZVFH-NEXT:    sh a0, 14(s0)
+; ZVFH-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 0(s0)
+; ZVFH-NEXT:    sh s6, 2(s0)
+; ZVFH-NEXT:    sh s5, 4(s0)
+; ZVFH-NEXT:    sh s4, 6(s0)
+; ZVFH-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    .cfi_restore s8
+; ZVFH-NEXT:    .cfi_restore s9
+; ZVFH-NEXT:    .cfi_restore s10
+; ZVFH-NEXT:    .cfi_restore s11
+; ZVFH-NEXT:    addi sp, sp, 128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: max_v8bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    .cfi_offset s8, -80
+; ZVFHMIN-NEXT:    .cfi_offset s9, -88
+; ZVFHMIN-NEXT:    .cfi_offset s10, -96
+; ZVFHMIN-NEXT:    .cfi_offset s11, -104
+; ZVFHMIN-NEXT:    sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s2, a6
+; ZVFHMIN-NEXT:    mv s3, a5
+; ZVFHMIN-NEXT:    mv s4, a4
+; ZVFHMIN-NEXT:    mv s5, a3
+; ZVFHMIN-NEXT:    mv s6, a2
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa7
+; ZVFHMIN-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa5
+; ZVFHMIN-NEXT:    fmv.x.w s9, fa4
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa3
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa2
+; ZVFHMIN-NEXT:    fmv.x.w s8, fa1
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    slli s8, s8, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s6
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s8
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa0
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    slli s7, s7, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s5
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s7
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa0
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s4, fa0
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    slli s9, s9, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s3
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s9
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa0
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    slli s11, s11, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s2
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s11
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa0
+; ZVFHMIN-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s10, s10, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s10
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa0
+; ZVFHMIN-NEXT:    lhu a0, 128(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sh s1, 8(s0)
+; ZVFHMIN-NEXT:    sh s2, 10(s0)
+; ZVFHMIN-NEXT:    sh s3, 12(s0)
+; ZVFHMIN-NEXT:    sh a0, 14(s0)
+; ZVFHMIN-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 0(s0)
+; ZVFHMIN-NEXT:    sh s6, 2(s0)
+; ZVFHMIN-NEXT:    sh s5, 4(s0)
+; ZVFHMIN-NEXT:    sh s4, 6(s0)
+; ZVFHMIN-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    .cfi_restore s8
+; ZVFHMIN-NEXT:    .cfi_restore s9
+; ZVFHMIN-NEXT:    .cfi_restore s10
+; ZVFHMIN-NEXT:    .cfi_restore s11
+; ZVFHMIN-NEXT:    addi sp, sp, 128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v8bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %c
+}
+
+define <9 x bfloat> @max_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
+; ZVFH-LABEL: max_v9bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 128
+; ZVFH-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    .cfi_offset s8, -80
+; ZVFH-NEXT:    .cfi_offset s9, -88
+; ZVFH-NEXT:    .cfi_offset s10, -96
+; ZVFH-NEXT:    .cfi_offset s11, -104
+; ZVFH-NEXT:    sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s3, a6
+; ZVFH-NEXT:    mv s5, a4
+; ZVFH-NEXT:    mv s4, a3
+; ZVFH-NEXT:    mv s6, a2
+; ZVFH-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w a0, fa5
+; ZVFH-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    fmv.x.w s9, fa4
+; ZVFH-NEXT:    fmv.x.w s10, fa6
+; ZVFH-NEXT:    fmv.x.w s7, fa7
+; ZVFH-NEXT:    fmv.x.w s11, fa1
+; ZVFH-NEXT:    fmv.x.w s1, fa0
+; ZVFH-NEXT:    fmv.x.w s2, fa2
+; ZVFH-NEXT:    fmv.x.w a0, fa3
+; ZVFH-NEXT:    slli a5, a5, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a5
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    slli s8, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, s5
+; ZVFH-NEXT:    fmv.w.x fa4, s2
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    slli a0, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, s6
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    srli a0, a0, 16
+; ZVFH-NEXT:    or s1, s8, a0
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lui s5, 16
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s11, s11, 16
+; ZVFH-NEXT:    addi s5, s5, -1
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s11
+; ZVFH-NEXT:    and s2, a0, s5
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu a1, 136(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli s7, s7, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, s7
+; ZVFH-NEXT:    or a0, s2, a0
+; ZVFH-NEXT:    slli a0, a0, 32
+; ZVFH-NEXT:    srli a0, a0, 32
+; ZVFH-NEXT:    or s4, a0, s1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu a1, 128(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli s10, s10, 16
+; ZVFH-NEXT:    slli s1, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, s10
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    slli s9, s9, 16
+; ZVFH-NEXT:    slli a0, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, s3
+; ZVFH-NEXT:    fmv.w.x fa4, s9
+; ZVFH-NEXT:    srli s2, a0, 16
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a2, a2, 16
+; ZVFH-NEXT:    and s3, a0, s5
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a2
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu a1, 144(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a2, a2, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a2
+; ZVFH-NEXT:    or a0, s3, a0
+; ZVFH-NEXT:    slli a0, a0, 32
+; ZVFH-NEXT:    srli a0, a0, 32
+; ZVFH-NEXT:    or a0, a0, s1
+; ZVFH-NEXT:    or s1, a0, s2
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v8, s1
+; ZVFH-NEXT:    sh a0, 16(s0)
+; ZVFH-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; ZVFH-NEXT:    vmv.s.x v8, s4
+; ZVFH-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-NEXT:    vse64.v v8, (s0)
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH-NEXT:    addi s0, s0, 8
+; ZVFH-NEXT:    vse64.v v8, (s0)
+; ZVFH-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    .cfi_restore s8
+; ZVFH-NEXT:    .cfi_restore s9
+; ZVFH-NEXT:    .cfi_restore s10
+; ZVFH-NEXT:    .cfi_restore s11
+; ZVFH-NEXT:    addi sp, sp, 128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: max_v9bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    .cfi_offset s8, -80
+; ZVFHMIN-NEXT:    .cfi_offset s9, -88
+; ZVFHMIN-NEXT:    .cfi_offset s10, -96
+; ZVFHMIN-NEXT:    .cfi_offset s11, -104
+; ZVFHMIN-NEXT:    sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s3, a6
+; ZVFHMIN-NEXT:    mv s5, a4
+; ZVFHMIN-NEXT:    mv s4, a3
+; ZVFHMIN-NEXT:    mv s6, a2
+; ZVFHMIN-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa5
+; ZVFHMIN-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    fmv.x.w s9, fa4
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa7
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa1
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa0
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa3
+; ZVFHMIN-NEXT:    slli a5, a5, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a5
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    slli s8, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s5
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s2
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s6
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    srli a0, a0, 16
+; ZVFHMIN-NEXT:    or s1, s8, a0
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lui s5, 16
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s11, s11, 16
+; ZVFHMIN-NEXT:    addi s5, s5, -1
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s11
+; ZVFHMIN-NEXT:    and s2, a0, s5
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu a1, 136(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli s7, s7, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s7
+; ZVFHMIN-NEXT:    or a0, s2, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 32
+; ZVFHMIN-NEXT:    srli a0, a0, 32
+; ZVFHMIN-NEXT:    or s4, a0, s1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu a1, 128(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli s10, s10, 16
+; ZVFHMIN-NEXT:    slli s1, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s10
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    slli s9, s9, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s3
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s9
+; ZVFHMIN-NEXT:    srli s2, a0, 16
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a2, a2, 16
+; ZVFHMIN-NEXT:    and s3, a0, s5
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a2
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu a1, 144(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a2, a2, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a2
+; ZVFHMIN-NEXT:    or a0, s3, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 32
+; ZVFHMIN-NEXT:    srli a0, a0, 32
+; ZVFHMIN-NEXT:    or a0, a0, s1
+; ZVFHMIN-NEXT:    or s1, a0, s2
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, s1
+; ZVFHMIN-NEXT:    sh a0, 16(s0)
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v8, s4
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-NEXT:    vse64.v v8, (s0)
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT:    addi s0, s0, 8
+; ZVFHMIN-NEXT:    vse64.v v8, (s0)
+; ZVFHMIN-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    .cfi_restore s8
+; ZVFHMIN-NEXT:    .cfi_restore s9
+; ZVFHMIN-NEXT:    .cfi_restore s10
+; ZVFHMIN-NEXT:    .cfi_restore s11
+; ZVFHMIN-NEXT:    addi sp, sp, 128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v9bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <9 x bfloat> @llvm.maximumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
+  ret <9 x bfloat> %c
+}
+
+define <16 x bfloat> @max_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: max_v16bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -192
+; ZVFH-NEXT:    .cfi_def_cfa_offset 192
+; ZVFH-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    .cfi_offset s8, -80
+; ZVFH-NEXT:    .cfi_offset s9, -88
+; ZVFH-NEXT:    .cfi_offset s10, -96
+; ZVFH-NEXT:    .cfi_offset s11, -104
+; ZVFH-NEXT:    sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s7, a1
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w s11, fa7
+; ZVFH-NEXT:    fmv.x.w s2, fa6
+; ZVFH-NEXT:    fmv.x.w s8, fa5
+; ZVFH-NEXT:    fmv.x.w s1, fa4
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu s9, 224(sp)
+; ZVFH-NEXT:    lhu a1, 200(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    lhu s4, 216(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    lhu s3, 208(sp)
+; ZVFH-NEXT:    fmax.s fa0, fa5, fa4
+; ZVFH-NEXT:    fmv.x.w s5, fa3
+; ZVFH-NEXT:    fmv.x.w s6, fa2
+; ZVFH-NEXT:    fmv.x.w s10, fa1
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    slli s10, s10, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s3
+; ZVFH-NEXT:    fmv.w.x fa4, s10
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s6
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s9, s9, 16
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s9
+; ZVFH-NEXT:    fmv.w.x fa4, s5
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    lhu a0, 232(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s1, fa0
+; ZVFH-NEXT:    lhu a0, 240(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s8, s8, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s8
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s8, fa0
+; ZVFH-NEXT:    lhu a0, 248(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s2
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s2, fa0
+; ZVFH-NEXT:    lhu a0, 256(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s11, s11, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s11
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s11, fa0
+; ZVFH-NEXT:    lhu a0, 264(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s7, s7, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s7
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s7, fa0
+; ZVFH-NEXT:    lhu a0, 272(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s6, fa0
+; ZVFH-NEXT:    lhu a0, 280(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s3, fa0
+; ZVFH-NEXT:    lhu a0, 288(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s4, fa0
+; ZVFH-NEXT:    lhu a0, 296(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s5, fa0
+; ZVFH-NEXT:    lhu a0, 304(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s10, fa0
+; ZVFH-NEXT:    lhu a0, 312(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s9, fa0
+; ZVFH-NEXT:    lhu a0, 320(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    lhu a1, 192(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sh s7, 16(s0)
+; ZVFH-NEXT:    sh s6, 18(s0)
+; ZVFH-NEXT:    sh s3, 20(s0)
+; ZVFH-NEXT:    sh s4, 22(s0)
+; ZVFH-NEXT:    sh s1, 8(s0)
+; ZVFH-NEXT:    sh s8, 10(s0)
+; ZVFH-NEXT:    sh s2, 12(s0)
+; ZVFH-NEXT:    sh s11, 14(s0)
+; ZVFH-NEXT:    sh s5, 24(s0)
+; ZVFH-NEXT:    sh s10, 26(s0)
+; ZVFH-NEXT:    sh s9, 28(s0)
+; ZVFH-NEXT:    sh a0, 30(s0)
+; ZVFH-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 0(s0)
+; ZVFH-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 2(s0)
+; ZVFH-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 4(s0)
+; ZVFH-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 6(s0)
+; ZVFH-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    .cfi_restore s8
+; ZVFH-NEXT:    .cfi_restore s9
+; ZVFH-NEXT:    .cfi_restore s10
+; ZVFH-NEXT:    .cfi_restore s11
+; ZVFH-NEXT:    addi sp, sp, 192
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: max_v16bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -192
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 192
+; ZVFHMIN-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    .cfi_offset s8, -80
+; ZVFHMIN-NEXT:    .cfi_offset s9, -88
+; ZVFHMIN-NEXT:    .cfi_offset s10, -96
+; ZVFHMIN-NEXT:    .cfi_offset s11, -104
+; ZVFHMIN-NEXT:    sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s7, a1
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa7
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s8, fa5
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa4
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu s9, 224(sp)
+; ZVFHMIN-NEXT:    lhu a1, 200(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    lhu s4, 216(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    lhu s3, 208(sp)
+; ZVFHMIN-NEXT:    fmax.s fa0, fa5, fa4
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa3
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa2
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa1
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    slli s10, s10, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s3
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s10
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s6
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s9, s9, 16
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s9
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s5
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    lhu a0, 232(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa0
+; ZVFHMIN-NEXT:    lhu a0, 240(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s8, s8, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s8
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s8, fa0
+; ZVFHMIN-NEXT:    lhu a0, 248(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s2
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa0
+; ZVFHMIN-NEXT:    lhu a0, 256(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s11, s11, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s11
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa0
+; ZVFHMIN-NEXT:    lhu a0, 264(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s7, s7, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s7
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa0
+; ZVFHMIN-NEXT:    lhu a0, 272(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa0
+; ZVFHMIN-NEXT:    lhu a0, 280(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa0
+; ZVFHMIN-NEXT:    lhu a0, 288(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s4, fa0
+; ZVFHMIN-NEXT:    lhu a0, 296(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa0
+; ZVFHMIN-NEXT:    lhu a0, 304(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa0
+; ZVFHMIN-NEXT:    lhu a0, 312(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s9, fa0
+; ZVFHMIN-NEXT:    lhu a0, 320(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    lhu a1, 192(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sh s7, 16(s0)
+; ZVFHMIN-NEXT:    sh s6, 18(s0)
+; ZVFHMIN-NEXT:    sh s3, 20(s0)
+; ZVFHMIN-NEXT:    sh s4, 22(s0)
+; ZVFHMIN-NEXT:    sh s1, 8(s0)
+; ZVFHMIN-NEXT:    sh s8, 10(s0)
+; ZVFHMIN-NEXT:    sh s2, 12(s0)
+; ZVFHMIN-NEXT:    sh s11, 14(s0)
+; ZVFHMIN-NEXT:    sh s5, 24(s0)
+; ZVFHMIN-NEXT:    sh s10, 26(s0)
+; ZVFHMIN-NEXT:    sh s9, 28(s0)
+; ZVFHMIN-NEXT:    sh a0, 30(s0)
+; ZVFHMIN-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 0(s0)
+; ZVFHMIN-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 2(s0)
+; ZVFHMIN-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 4(s0)
+; ZVFHMIN-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 6(s0)
+; ZVFHMIN-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    .cfi_restore s8
+; ZVFHMIN-NEXT:    .cfi_restore s9
+; ZVFHMIN-NEXT:    .cfi_restore s10
+; ZVFHMIN-NEXT:    .cfi_restore s11
+; ZVFHMIN-NEXT:    addi sp, sp, 192
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: max_v16bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+  ret <16 x bfloat> %c
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index a95177a1de9a6..bcd86e5237918 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -1,12 +1,236 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmin.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.minimum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmin.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.minimum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT:    vfmin.vv v10, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT:    vfmin.vv v12, v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %v = call <16 x bfloat> @llvm.minimum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+  ret <16 x bfloat> %v
+}
 
 define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFH-LABEL: vfmin_v2f16_vv:
@@ -35,6 +259,23 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
 }
@@ -66,6 +307,23 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
 }
@@ -97,6 +355,23 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
 }
@@ -128,6 +403,23 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <16 x half> @llvm.minimum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
 }
@@ -262,6 +554,17 @@ define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnan:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
 }
@@ -295,6 +598,26 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnana:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <2 x half> %a, %a
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b)
   ret <2 x half> %v
@@ -329,6 +652,26 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnanb:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v10, v9, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v9, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <2 x half> %b, %b
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
   ret <2 x half> %v
@@ -373,6 +716,33 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v9, v11, v11
+; ZVFBFA-NEXT:    vfadd.vv v8, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFBFA-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vslideup.vi v11, v9, 2
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %d = fadd nnan <2 x half> %a, %a
   %e = fadd nnan <2 x half> %b, %b
   %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> poison, <2 x half> %d, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
index 36114d56aa0d6..eed806da19601 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
@@ -1,6 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
 
 define <2 x double> @min_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: min_v2f64:
@@ -107,6 +111,17 @@ define <2 x half> @min_v2f16(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v2f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %c
@@ -129,6 +144,17 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v4f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %c
@@ -151,6 +177,17 @@ define <8 x half> @min_v8f16(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v8f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %c
@@ -173,6 +210,17 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v9f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <9 x half> @llvm.minimumnum.v9f16(<9 x half> %a, <9 x half> %b)
   ret <9 x half> %c
@@ -195,7 +243,1496 @@ define <16 x half> @min_v16f16(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v16f16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
 entry:
   %c = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %c
 }
+
+define <2 x bfloat> @min_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: min_v2bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -32
+; ZVFH-NEXT:    .cfi_def_cfa_offset 32
+; ZVFH-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    fmv.x.w s0, fa0
+; ZVFH-NEXT:    fmv.x.w s1, fa2
+; ZVFH-NEXT:    fmv.x.w a0, fa1
+; ZVFH-NEXT:    fmv.x.w a1, fa3
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    slli s0, s0, 16
+; ZVFH-NEXT:    slli s2, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s1
+; ZVFH-NEXT:    fmv.w.x fa4, s0
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli a0, a0, 48
+; ZVFH-NEXT:    srli a0, a0, 48
+; ZVFH-NEXT:    or a0, a0, s2
+; ZVFH-NEXT:    lui a1, 1048560
+; ZVFH-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; ZVFH-NEXT:    vmv.s.x v8, a0
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT:    vmv.x.s a0, v8
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH-NEXT:    or a0, a0, a1
+; ZVFH-NEXT:    fmv.w.x fa0, a0
+; ZVFH-NEXT:    vmv.x.s a0, v8
+; ZVFH-NEXT:    or a0, a0, a1
+; ZVFH-NEXT:    fmv.w.x fa1, a0
+; ZVFH-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    addi sp, sp, 32
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: min_v2bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -32
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 32
+; ZVFHMIN-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    fmv.x.w s0, fa0
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa1
+; ZVFHMIN-NEXT:    fmv.x.w a1, fa3
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    slli s0, s0, 16
+; ZVFHMIN-NEXT:    slli s2, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s0
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli a0, a0, 48
+; ZVFHMIN-NEXT:    srli a0, a0, 48
+; ZVFHMIN-NEXT:    or a0, a0, s2
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.s.x v8, a0
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa1, a0
+; ZVFHMIN-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    addi sp, sp, 32
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v2bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  ret <2 x bfloat> %c
+}
+
+define <4 x bfloat> @min_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: min_v4bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -80
+; ZVFH-NEXT:    .cfi_def_cfa_offset 80
+; ZVFH-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w s1, fa3
+; ZVFH-NEXT:    fmv.x.w s2, fa7
+; ZVFH-NEXT:    fmv.x.w s3, fa2
+; ZVFH-NEXT:    fmv.x.w s4, fa6
+; ZVFH-NEXT:    fmv.x.w s5, fa1
+; ZVFH-NEXT:    fmv.x.w s6, fa5
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    fmv.x.w a1, fa4
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s7, fa0
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s6
+; ZVFH-NEXT:    fmv.w.x fa4, s5
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s5, fa0
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s3
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s3, fa0
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s2
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sh s7, 0(s0)
+; ZVFH-NEXT:    sh s5, 2(s0)
+; ZVFH-NEXT:    sh s3, 4(s0)
+; ZVFH-NEXT:    sh a0, 6(s0)
+; ZVFH-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    addi sp, sp, 80
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: min_v4bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -80
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 80
+; ZVFHMIN-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa3
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa7
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa2
+; ZVFHMIN-NEXT:    fmv.x.w s4, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa1
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa5
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    fmv.x.w a1, fa4
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa0
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s6
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s5
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa0
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s3
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa0
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s2
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sh s7, 0(s0)
+; ZVFHMIN-NEXT:    sh s5, 2(s0)
+; ZVFHMIN-NEXT:    sh s3, 4(s0)
+; ZVFHMIN-NEXT:    sh a0, 6(s0)
+; ZVFHMIN-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    addi sp, sp, 80
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v4bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  ret <4 x bfloat> %c
+}
+
+define <8 x bfloat> @min_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: min_v8bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 128
+; ZVFH-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    .cfi_offset s8, -80
+; ZVFH-NEXT:    .cfi_offset s9, -88
+; ZVFH-NEXT:    .cfi_offset s10, -96
+; ZVFH-NEXT:    .cfi_offset s11, -104
+; ZVFH-NEXT:    sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s2, a6
+; ZVFH-NEXT:    mv s3, a5
+; ZVFH-NEXT:    mv s4, a4
+; ZVFH-NEXT:    mv s5, a3
+; ZVFH-NEXT:    mv s6, a2
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w a0, fa7
+; ZVFH-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    fmv.x.w s10, fa6
+; ZVFH-NEXT:    fmv.x.w s11, fa5
+; ZVFH-NEXT:    fmv.x.w s9, fa4
+; ZVFH-NEXT:    fmv.x.w s1, fa3
+; ZVFH-NEXT:    fmv.x.w s7, fa2
+; ZVFH-NEXT:    fmv.x.w s8, fa1
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    slli s8, s8, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s6
+; ZVFH-NEXT:    fmv.w.x fa4, s8
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s6, fa0
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    slli s7, s7, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s5
+; ZVFH-NEXT:    fmv.w.x fa4, s7
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s5, fa0
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s4, fa0
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    slli s9, s9, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s3
+; ZVFH-NEXT:    fmv.w.x fa4, s9
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s1, fa0
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    slli s11, s11, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s2
+; ZVFH-NEXT:    fmv.w.x fa4, s11
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s2, fa0
+; ZVFH-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s10, s10, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s10
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s3, fa0
+; ZVFH-NEXT:    lhu a0, 128(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sh s1, 8(s0)
+; ZVFH-NEXT:    sh s2, 10(s0)
+; ZVFH-NEXT:    sh s3, 12(s0)
+; ZVFH-NEXT:    sh a0, 14(s0)
+; ZVFH-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 0(s0)
+; ZVFH-NEXT:    sh s6, 2(s0)
+; ZVFH-NEXT:    sh s5, 4(s0)
+; ZVFH-NEXT:    sh s4, 6(s0)
+; ZVFH-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    .cfi_restore s8
+; ZVFH-NEXT:    .cfi_restore s9
+; ZVFH-NEXT:    .cfi_restore s10
+; ZVFH-NEXT:    .cfi_restore s11
+; ZVFH-NEXT:    addi sp, sp, 128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: min_v8bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    .cfi_offset s8, -80
+; ZVFHMIN-NEXT:    .cfi_offset s9, -88
+; ZVFHMIN-NEXT:    .cfi_offset s10, -96
+; ZVFHMIN-NEXT:    .cfi_offset s11, -104
+; ZVFHMIN-NEXT:    sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s2, a6
+; ZVFHMIN-NEXT:    mv s3, a5
+; ZVFHMIN-NEXT:    mv s4, a4
+; ZVFHMIN-NEXT:    mv s5, a3
+; ZVFHMIN-NEXT:    mv s6, a2
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa7
+; ZVFHMIN-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa5
+; ZVFHMIN-NEXT:    fmv.x.w s9, fa4
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa3
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa2
+; ZVFHMIN-NEXT:    fmv.x.w s8, fa1
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    slli s8, s8, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s6
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s8
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa0
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    slli s7, s7, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s5
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s7
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa0
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s4, fa0
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    slli s9, s9, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s3
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s9
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa0
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    slli s11, s11, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s2
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s11
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa0
+; ZVFHMIN-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s10, s10, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s10
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa0
+; ZVFHMIN-NEXT:    lhu a0, 128(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sh s1, 8(s0)
+; ZVFHMIN-NEXT:    sh s2, 10(s0)
+; ZVFHMIN-NEXT:    sh s3, 12(s0)
+; ZVFHMIN-NEXT:    sh a0, 14(s0)
+; ZVFHMIN-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 0(s0)
+; ZVFHMIN-NEXT:    sh s6, 2(s0)
+; ZVFHMIN-NEXT:    sh s5, 4(s0)
+; ZVFHMIN-NEXT:    sh s4, 6(s0)
+; ZVFHMIN-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    .cfi_restore s8
+; ZVFHMIN-NEXT:    .cfi_restore s9
+; ZVFHMIN-NEXT:    .cfi_restore s10
+; ZVFHMIN-NEXT:    .cfi_restore s11
+; ZVFHMIN-NEXT:    addi sp, sp, 128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v8bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %c
+}
+
+define <9 x bfloat> @min_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
+; ZVFH-LABEL: min_v9bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 128
+; ZVFH-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    .cfi_offset s8, -80
+; ZVFH-NEXT:    .cfi_offset s9, -88
+; ZVFH-NEXT:    .cfi_offset s10, -96
+; ZVFH-NEXT:    .cfi_offset s11, -104
+; ZVFH-NEXT:    sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s3, a6
+; ZVFH-NEXT:    mv s5, a4
+; ZVFH-NEXT:    mv s4, a3
+; ZVFH-NEXT:    mv s6, a2
+; ZVFH-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w a0, fa5
+; ZVFH-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    fmv.x.w s9, fa4
+; ZVFH-NEXT:    fmv.x.w s10, fa6
+; ZVFH-NEXT:    fmv.x.w s7, fa7
+; ZVFH-NEXT:    fmv.x.w s11, fa1
+; ZVFH-NEXT:    fmv.x.w s1, fa0
+; ZVFH-NEXT:    fmv.x.w s2, fa2
+; ZVFH-NEXT:    fmv.x.w a0, fa3
+; ZVFH-NEXT:    slli a5, a5, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a5
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa4, a0
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    slli s8, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, s5
+; ZVFH-NEXT:    fmv.w.x fa4, s2
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    slli a0, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, s6
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    srli a0, a0, 16
+; ZVFH-NEXT:    or s1, s8, a0
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lui s5, 16
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s11, s11, 16
+; ZVFH-NEXT:    addi s5, s5, -1
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s11
+; ZVFH-NEXT:    and s2, a0, s5
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu a1, 136(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli s7, s7, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, s7
+; ZVFH-NEXT:    or a0, s2, a0
+; ZVFH-NEXT:    slli a0, a0, 32
+; ZVFH-NEXT:    srli a0, a0, 32
+; ZVFH-NEXT:    or s4, a0, s1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu a1, 128(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    slli s10, s10, 16
+; ZVFH-NEXT:    slli s1, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, s10
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    slli s9, s9, 16
+; ZVFH-NEXT:    slli a0, a0, 48
+; ZVFH-NEXT:    fmv.w.x fa5, s3
+; ZVFH-NEXT:    fmv.w.x fa4, s9
+; ZVFH-NEXT:    srli s2, a0, 16
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a2, a2, 16
+; ZVFH-NEXT:    and s3, a0, s5
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a2
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu a1, 144(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a2, a2, 16
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a1
+; ZVFH-NEXT:    fmv.w.x fa4, a2
+; ZVFH-NEXT:    or a0, s3, a0
+; ZVFH-NEXT:    slli a0, a0, 32
+; ZVFH-NEXT:    srli a0, a0, 32
+; ZVFH-NEXT:    or a0, a0, s1
+; ZVFH-NEXT:    or s1, a0, s2
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v8, s1
+; ZVFH-NEXT:    sh a0, 16(s0)
+; ZVFH-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; ZVFH-NEXT:    vmv.s.x v8, s4
+; ZVFH-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-NEXT:    vse64.v v8, (s0)
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFH-NEXT:    addi s0, s0, 8
+; ZVFH-NEXT:    vse64.v v8, (s0)
+; ZVFH-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    .cfi_restore s8
+; ZVFH-NEXT:    .cfi_restore s9
+; ZVFH-NEXT:    .cfi_restore s10
+; ZVFH-NEXT:    .cfi_restore s11
+; ZVFH-NEXT:    addi sp, sp, 128
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: min_v9bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    .cfi_offset s8, -80
+; ZVFHMIN-NEXT:    .cfi_offset s9, -88
+; ZVFHMIN-NEXT:    .cfi_offset s10, -96
+; ZVFHMIN-NEXT:    .cfi_offset s11, -104
+; ZVFHMIN-NEXT:    sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s3, a6
+; ZVFHMIN-NEXT:    mv s5, a4
+; ZVFHMIN-NEXT:    mv s4, a3
+; ZVFHMIN-NEXT:    mv s6, a2
+; ZVFHMIN-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa5
+; ZVFHMIN-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    fmv.x.w s9, fa4
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa7
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa1
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa0
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa3
+; ZVFHMIN-NEXT:    slli a5, a5, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a5
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a0
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    slli s8, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s5
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s2
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s6
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    srli a0, a0, 16
+; ZVFHMIN-NEXT:    or s1, s8, a0
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lui s5, 16
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s11, s11, 16
+; ZVFHMIN-NEXT:    addi s5, s5, -1
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s11
+; ZVFHMIN-NEXT:    and s2, a0, s5
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu a1, 136(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli s7, s7, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s7
+; ZVFHMIN-NEXT:    or a0, s2, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 32
+; ZVFHMIN-NEXT:    srli a0, a0, 32
+; ZVFHMIN-NEXT:    or s4, a0, s1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu a1, 128(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    slli s10, s10, 16
+; ZVFHMIN-NEXT:    slli s1, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s10
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    slli s9, s9, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 48
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s3
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s9
+; ZVFHMIN-NEXT:    srli s2, a0, 16
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a2, a2, 16
+; ZVFHMIN-NEXT:    and s3, a0, s5
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a2
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu a1, 144(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a2, a2, 16
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a2
+; ZVFHMIN-NEXT:    or a0, s3, a0
+; ZVFHMIN-NEXT:    slli a0, a0, 32
+; ZVFHMIN-NEXT:    srli a0, a0, 32
+; ZVFHMIN-NEXT:    or a0, a0, s1
+; ZVFHMIN-NEXT:    or s1, a0, s2
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, s1
+; ZVFHMIN-NEXT:    sh a0, 16(s0)
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v8, s4
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-NEXT:    vse64.v v8, (s0)
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT:    addi s0, s0, 8
+; ZVFHMIN-NEXT:    vse64.v v8, (s0)
+; ZVFHMIN-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    .cfi_restore s8
+; ZVFHMIN-NEXT:    .cfi_restore s9
+; ZVFHMIN-NEXT:    .cfi_restore s10
+; ZVFHMIN-NEXT:    .cfi_restore s11
+; ZVFHMIN-NEXT:    addi sp, sp, 128
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v9bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <9 x bfloat> @llvm.minimumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
+  ret <9 x bfloat> %c
+}
+
+define <16 x bfloat> @min_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: min_v16bf16:
+; ZVFH:       # %bb.0: # %entry
+; ZVFH-NEXT:    addi sp, sp, -192
+; ZVFH-NEXT:    .cfi_def_cfa_offset 192
+; ZVFH-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    .cfi_offset ra, -8
+; ZVFH-NEXT:    .cfi_offset s0, -16
+; ZVFH-NEXT:    .cfi_offset s1, -24
+; ZVFH-NEXT:    .cfi_offset s2, -32
+; ZVFH-NEXT:    .cfi_offset s3, -40
+; ZVFH-NEXT:    .cfi_offset s4, -48
+; ZVFH-NEXT:    .cfi_offset s5, -56
+; ZVFH-NEXT:    .cfi_offset s6, -64
+; ZVFH-NEXT:    .cfi_offset s7, -72
+; ZVFH-NEXT:    .cfi_offset s8, -80
+; ZVFH-NEXT:    .cfi_offset s9, -88
+; ZVFH-NEXT:    .cfi_offset s10, -96
+; ZVFH-NEXT:    .cfi_offset s11, -104
+; ZVFH-NEXT:    sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    mv s7, a1
+; ZVFH-NEXT:    mv s0, a0
+; ZVFH-NEXT:    fmv.x.w s11, fa7
+; ZVFH-NEXT:    fmv.x.w s2, fa6
+; ZVFH-NEXT:    fmv.x.w s8, fa5
+; ZVFH-NEXT:    fmv.x.w s1, fa4
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    lhu s9, 224(sp)
+; ZVFH-NEXT:    lhu a1, 200(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    lhu s4, 216(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    lhu s3, 208(sp)
+; ZVFH-NEXT:    fmin.s fa0, fa5, fa4
+; ZVFH-NEXT:    fmv.x.w s5, fa3
+; ZVFH-NEXT:    fmv.x.w s6, fa2
+; ZVFH-NEXT:    fmv.x.w s10, fa1
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s3, s3, 16
+; ZVFH-NEXT:    slli s10, s10, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s3
+; ZVFH-NEXT:    fmv.w.x fa4, s10
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s4, s4, 16
+; ZVFH-NEXT:    slli s6, s6, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s4
+; ZVFH-NEXT:    fmv.w.x fa4, s6
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    slli s9, s9, 16
+; ZVFH-NEXT:    slli s5, s5, 16
+; ZVFH-NEXT:    fmv.w.x fa5, s9
+; ZVFH-NEXT:    fmv.w.x fa4, s5
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT:    lhu a0, 232(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s1, s1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s1, fa0
+; ZVFH-NEXT:    lhu a0, 240(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s8, s8, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s8
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s8, fa0
+; ZVFH-NEXT:    lhu a0, 248(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s2, s2, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s2
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s2, fa0
+; ZVFH-NEXT:    lhu a0, 256(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s11, s11, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s11
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s11, fa0
+; ZVFH-NEXT:    lhu a0, 264(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    slli s7, s7, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, s7
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s7, fa0
+; ZVFH-NEXT:    lhu a0, 272(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s6, fa0
+; ZVFH-NEXT:    lhu a0, 280(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s3, fa0
+; ZVFH-NEXT:    lhu a0, 288(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s4, fa0
+; ZVFH-NEXT:    lhu a0, 296(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s5, fa0
+; ZVFH-NEXT:    lhu a0, 304(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s10, fa0
+; ZVFH-NEXT:    lhu a0, 312(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w s9, fa0
+; ZVFH-NEXT:    lhu a0, 320(sp)
+; ZVFH-NEXT:    slli a0, a0, 16
+; ZVFH-NEXT:    lhu a1, 192(sp)
+; ZVFH-NEXT:    slli a1, a1, 16
+; ZVFH-NEXT:    fmv.w.x fa5, a0
+; ZVFH-NEXT:    fmv.w.x fa4, a1
+; ZVFH-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFH-NEXT:    call __truncsfbf2
+; ZVFH-NEXT:    fmv.x.w a0, fa0
+; ZVFH-NEXT:    sh s7, 16(s0)
+; ZVFH-NEXT:    sh s6, 18(s0)
+; ZVFH-NEXT:    sh s3, 20(s0)
+; ZVFH-NEXT:    sh s4, 22(s0)
+; ZVFH-NEXT:    sh s1, 8(s0)
+; ZVFH-NEXT:    sh s8, 10(s0)
+; ZVFH-NEXT:    sh s2, 12(s0)
+; ZVFH-NEXT:    sh s11, 14(s0)
+; ZVFH-NEXT:    sh s5, 24(s0)
+; ZVFH-NEXT:    sh s10, 26(s0)
+; ZVFH-NEXT:    sh s9, 28(s0)
+; ZVFH-NEXT:    sh a0, 30(s0)
+; ZVFH-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 0(s0)
+; ZVFH-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 2(s0)
+; ZVFH-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 4(s0)
+; ZVFH-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    sh a0, 6(s0)
+; ZVFH-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT:    .cfi_restore ra
+; ZVFH-NEXT:    .cfi_restore s0
+; ZVFH-NEXT:    .cfi_restore s1
+; ZVFH-NEXT:    .cfi_restore s2
+; ZVFH-NEXT:    .cfi_restore s3
+; ZVFH-NEXT:    .cfi_restore s4
+; ZVFH-NEXT:    .cfi_restore s5
+; ZVFH-NEXT:    .cfi_restore s6
+; ZVFH-NEXT:    .cfi_restore s7
+; ZVFH-NEXT:    .cfi_restore s8
+; ZVFH-NEXT:    .cfi_restore s9
+; ZVFH-NEXT:    .cfi_restore s10
+; ZVFH-NEXT:    .cfi_restore s11
+; ZVFH-NEXT:    addi sp, sp, 192
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: min_v16bf16:
+; ZVFHMIN:       # %bb.0: # %entry
+; ZVFHMIN-NEXT:    addi sp, sp, -192
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 192
+; ZVFHMIN-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-NEXT:    .cfi_offset s1, -24
+; ZVFHMIN-NEXT:    .cfi_offset s2, -32
+; ZVFHMIN-NEXT:    .cfi_offset s3, -40
+; ZVFHMIN-NEXT:    .cfi_offset s4, -48
+; ZVFHMIN-NEXT:    .cfi_offset s5, -56
+; ZVFHMIN-NEXT:    .cfi_offset s6, -64
+; ZVFHMIN-NEXT:    .cfi_offset s7, -72
+; ZVFHMIN-NEXT:    .cfi_offset s8, -80
+; ZVFHMIN-NEXT:    .cfi_offset s9, -88
+; ZVFHMIN-NEXT:    .cfi_offset s10, -96
+; ZVFHMIN-NEXT:    .cfi_offset s11, -104
+; ZVFHMIN-NEXT:    sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    mv s7, a1
+; ZVFHMIN-NEXT:    mv s0, a0
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa7
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa6
+; ZVFHMIN-NEXT:    fmv.x.w s8, fa5
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa4
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    lhu s9, 224(sp)
+; ZVFHMIN-NEXT:    lhu a1, 200(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    lhu s4, 216(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    lhu s3, 208(sp)
+; ZVFHMIN-NEXT:    fmin.s fa0, fa5, fa4
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa3
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa2
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa1
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s3, s3, 16
+; ZVFHMIN-NEXT:    slli s10, s10, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s3
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s10
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s4, s4, 16
+; ZVFHMIN-NEXT:    slli s6, s6, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s4
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s6
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    slli s9, s9, 16
+; ZVFHMIN-NEXT:    slli s5, s5, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, s9
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s5
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT:    lhu a0, 232(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s1, s1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s1, fa0
+; ZVFHMIN-NEXT:    lhu a0, 240(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s8, s8, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s8
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s8, fa0
+; ZVFHMIN-NEXT:    lhu a0, 248(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s2, s2, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s2
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s2, fa0
+; ZVFHMIN-NEXT:    lhu a0, 256(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s11, s11, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s11
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s11, fa0
+; ZVFHMIN-NEXT:    lhu a0, 264(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    slli s7, s7, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, s7
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s7, fa0
+; ZVFHMIN-NEXT:    lhu a0, 272(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s6, fa0
+; ZVFHMIN-NEXT:    lhu a0, 280(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s3, fa0
+; ZVFHMIN-NEXT:    lhu a0, 288(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s4, fa0
+; ZVFHMIN-NEXT:    lhu a0, 296(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s5, fa0
+; ZVFHMIN-NEXT:    lhu a0, 304(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s10, fa0
+; ZVFHMIN-NEXT:    lhu a0, 312(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w s9, fa0
+; ZVFHMIN-NEXT:    lhu a0, 320(sp)
+; ZVFHMIN-NEXT:    slli a0, a0, 16
+; ZVFHMIN-NEXT:    lhu a1, 192(sp)
+; ZVFHMIN-NEXT:    slli a1, a1, 16
+; ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; ZVFHMIN-NEXT:    fmv.w.x fa4, a1
+; ZVFHMIN-NEXT:    fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT:    call __truncsfbf2
+; ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; ZVFHMIN-NEXT:    sh s7, 16(s0)
+; ZVFHMIN-NEXT:    sh s6, 18(s0)
+; ZVFHMIN-NEXT:    sh s3, 20(s0)
+; ZVFHMIN-NEXT:    sh s4, 22(s0)
+; ZVFHMIN-NEXT:    sh s1, 8(s0)
+; ZVFHMIN-NEXT:    sh s8, 10(s0)
+; ZVFHMIN-NEXT:    sh s2, 12(s0)
+; ZVFHMIN-NEXT:    sh s11, 14(s0)
+; ZVFHMIN-NEXT:    sh s5, 24(s0)
+; ZVFHMIN-NEXT:    sh s10, 26(s0)
+; ZVFHMIN-NEXT:    sh s9, 28(s0)
+; ZVFHMIN-NEXT:    sh a0, 30(s0)
+; ZVFHMIN-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 0(s0)
+; ZVFHMIN-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 2(s0)
+; ZVFHMIN-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 4(s0)
+; ZVFHMIN-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    sh a0, 6(s0)
+; ZVFHMIN-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT:    .cfi_restore ra
+; ZVFHMIN-NEXT:    .cfi_restore s0
+; ZVFHMIN-NEXT:    .cfi_restore s1
+; ZVFHMIN-NEXT:    .cfi_restore s2
+; ZVFHMIN-NEXT:    .cfi_restore s3
+; ZVFHMIN-NEXT:    .cfi_restore s4
+; ZVFHMIN-NEXT:    .cfi_restore s5
+; ZVFHMIN-NEXT:    .cfi_restore s6
+; ZVFHMIN-NEXT:    .cfi_restore s7
+; ZVFHMIN-NEXT:    .cfi_restore s8
+; ZVFHMIN-NEXT:    .cfi_restore s9
+; ZVFHMIN-NEXT:    .cfi_restore s10
+; ZVFHMIN-NEXT:    .cfi_restore s11
+; ZVFHMIN-NEXT:    addi sp, sp, 192
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: min_v16bf16:
+; ZVFBFA:       # %bb.0: # %entry
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+entry:
+  %c = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+  ret <16 x bfloat> %c
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index 44362efa1fe83..179d88cecc7f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -1,12 +1,516 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmax_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %splat)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmax_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v2bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v8, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %splat, <2 x bfloat> %a)
+  ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %splat)
+  ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v4bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v8, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %splat, <4 x bfloat> %a)
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v10, v12, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vmv.v.x v12, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v10, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %splat)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v8bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vmv.v.x v12, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v10, v8, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v10, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %splat, <8 x bfloat> %a)
+  ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vv v12, v16, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+  ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v16, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vv v12, v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %splat)
+  ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v16bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v16, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vv v12, v8, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v12, v8, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %splat, <16 x bfloat> %a)
+  ret <16 x bfloat> %v
+}
 
 define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFH-LABEL: vfmax_v2f16_vv:
@@ -25,6 +529,17 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
 }
@@ -48,6 +563,19 @@ define <2 x half> @vfmax_v2f16_vf(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
   %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %splat)
@@ -73,6 +601,19 @@ define <2 x half> @vfmax_v2f16_fv(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
   %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %splat, <2 x half> %a)
@@ -96,6 +637,17 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
 }
@@ -119,6 +671,19 @@ define <4 x half> @vfmax_v4f16_vf(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
   %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %splat)
@@ -144,6 +709,19 @@ define <4 x half> @vfmax_v4f16_fv(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
   %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %splat, <4 x half> %a)
@@ -167,6 +745,17 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
 }
@@ -190,6 +779,19 @@ define <8 x half> @vfmax_v8f16_vf(<8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v12, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
   %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %splat)
@@ -215,6 +817,19 @@ define <8 x half> @vfmax_v8f16_fv(<8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v12, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
   %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %splat, <8 x half> %a)
@@ -238,6 +853,17 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
 }
@@ -261,6 +887,19 @@ define <16 x half> @vfmax_v16f16_vf(<16 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v16, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
   %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %splat)
@@ -286,6 +925,19 @@ define <16 x half> @vfmax_v16f16_fv(<16 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v16, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
   %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %splat, <16 x half> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index c9bb99d6cb3d6..2228292d9e66b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -1,12 +1,516 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmin_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v2bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %splat)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmin_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v2bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v8, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+  %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %splat, <2 x bfloat> %a)
+  ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+  ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v4bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %splat)
+  ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v4bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v9, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v8, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+  %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %splat, <4 x bfloat> %a)
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v10, v12, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v8bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vmv.v.x v12, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v10, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %splat)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v8bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vmv.v.x v12, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v10, v8, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v10, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+  %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %splat, <8 x bfloat> %a)
+  ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vv v12, v16, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+  ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v16bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v16, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vv v12, v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %splat)
+  ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v16bf16_fv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vmv.v.x v16, a0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vv v12, v8, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_fv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v16, a0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v12, v8, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
+  %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+  %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+  %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %splat, <16 x bfloat> %a)
+  ret <16 x bfloat> %v
+}
 
 define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFH-LABEL: vfmin_v2f16_vv:
@@ -25,6 +529,17 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %v
 }
@@ -48,6 +563,19 @@ define <2 x half> @vfmin_v2f16_vf(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
   %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %splat)
@@ -73,6 +601,19 @@ define <2 x half> @vfmin_v2f16_fv(<2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x half> poison, half %b, i32 0
   %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
   %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %splat, <2 x half> %a)
@@ -96,6 +637,17 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %v
 }
@@ -119,6 +671,19 @@ define <4 x half> @vfmin_v4f16_vf(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
   %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %splat)
@@ -144,6 +709,19 @@ define <4 x half> @vfmin_v4f16_fv(<4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v9, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x half> poison, half %b, i32 0
   %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
   %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %splat, <4 x half> %a)
@@ -167,6 +745,17 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %v
 }
@@ -190,6 +779,19 @@ define <8 x half> @vfmin_v8f16_vf(<8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v12, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
   %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %splat)
@@ -215,6 +817,19 @@ define <8 x half> @vfmin_v8f16_fv(<8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v12, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v8, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x half> poison, half %b, i32 0
   %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
   %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %splat, <8 x half> %a)
@@ -238,6 +853,17 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %b)
   ret <16 x half> %v
 }
@@ -261,6 +887,19 @@ define <16 x half> @vfmin_v16f16_vf(<16 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v16, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
   %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %splat)
@@ -286,6 +925,19 @@ define <16 x half> @vfmin_v16f16_fv(<16 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_fv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fmv.x.w a0, fa0
+; ZVFBFA-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v16, a0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v8, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x half> poison, half %b, i32 0
   %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
   %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %splat, <16 x half> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index fd70f95ed53c6..9a3d27becd644 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -11,178 +11,472 @@
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
 
 define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv1bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv1bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmax.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv1bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
 }
 
 define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv2bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmax.vv v9, v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmax.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
 }
 
 define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv4bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT:    vfmax.vv v10, v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT:    vfmax.vv v10, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
 }
 
 define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv8bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT:    vfmax.vv v12, v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT:    vfmax.vv v12, v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
 }
 
 define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv16bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
-; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    vfmax.vv v16, v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFH-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFH-NEXT:    vfmax.vv v16, v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFHMIN-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
 }
 
 define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
-; CHECK-LABEL: vfmax_nxv32bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv8r.v v0, v16
-; CHECK-NEXT:    vmv8r.v v24, v8
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v3, v16, v16
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v3
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v4
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT:    vfmax.vv v16, v8, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv32bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 4
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vmv8r.v v0, v16
+; ZVFH-NEXT:    vmv8r.v v24, v8
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vmv1r.v v0, v3
+; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v4
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v8, v8, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v20
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFH-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFH-NEXT:    vfmax.vv v16, v8, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v24
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 4
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
+; ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv1r.v v0, v3
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v4
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v8, v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v16, v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vmv8r.v v0, v16
+; ZVFBFA-NEXT:    vmv8r.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vmv1r.v v0, v3
+; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v16, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
 }
@@ -214,6 +508,23 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
 }
@@ -245,6 +556,23 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
 }
@@ -276,6 +604,23 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
 }
@@ -307,6 +652,23 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
 }
@@ -337,6 +699,22 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
 }
@@ -415,6 +793,69 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vmv8r.v v0, v16
+; ZVFBFA-NEXT:    vmv8r.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vmv1r.v v0, v3
+; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v16, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
   ret <vscale x 32 x half> %v
 }
@@ -564,6 +1005,17 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv_nnan:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call nnan <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
 }
@@ -595,6 +1047,24 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv_nnana:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT:    vmv1r.v v9, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v10, v0.t
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %a, %a
   %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -627,6 +1097,24 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv_nnanb:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT:    vmv1r.v v8, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %b, %b
   %v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
   ret <vscale x 1 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
index dce5004d03e16..20f4a4d939ce1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
@@ -11,33 +11,83 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
 ; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
 
 define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv1bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vv_nxv1bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv1bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv1bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
   ret <vscale x 1 x bfloat> %vc
 }
 
 define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv1bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vf_nxv1bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv1bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv1bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
   %vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 }
 
 define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv2bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vv_nxv2bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv2bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv2bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vc
 }
 
 define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv2bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vf_nxv2bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv2bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv2bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
   %vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 }
 
 define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv4bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vv_nxv4bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v10, v12, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv4bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv4bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
   ret <vscale x 4 x bfloat> %vc
 }
 
 define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv4bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vf v10, v10, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vf_nxv4bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv4bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv4bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
   %vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 }
 
 define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv8bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vv_nxv8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vv v12, v16, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv8bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
   ret <vscale x 8 x bfloat> %vc
 }
 
 define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv8bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vf_nxv8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv8bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat)
@@ -141,16 +323,38 @@ define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 }
 
 define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_fv_nxv8bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_fv_nxv8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_fv_nxv8bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_fv_nxv8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va)
@@ -158,31 +362,75 @@ define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 }
 
 define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv16bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vv_nxv16bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv16bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv16bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
   ret <vscale x 16 x bfloat> %vc
 }
 
 define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv16bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vf v16, v16, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vf_nxv16bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv16bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv16bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
   %vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat)
@@ -190,78 +438,213 @@ define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 }
 
 define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv32bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v0, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v16, v24
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vv_nxv32bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v0, v0, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v16, v16, v24
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv32bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv32bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 32 x bfloat> @llvm.maximumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
   ret <vscale x 32 x bfloat> %vc
 }
 
 define <vscale x 32 x bfloat> @vfmax_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv32bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_vf_nxv32bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    addi a1, sp, 16
+; ZVFH-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT:    vmv.v.x v8, a0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v0, v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv32bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv32bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v0, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
   %vc = call <vscale x 32 x bfloat> @llvm.maximumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat)
@@ -285,6 +668,17 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv1f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 1 x half> @llvm.maximumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb)
   ret <vscale x 1 x half> %vc
 }
@@ -306,6 +700,17 @@ define <vscale x 1 x half> @vfmax_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv1f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
   %vc = call <vscale x 1 x half> @llvm.maximumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat)
@@ -329,6 +734,17 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv2f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb)
   ret <vscale x 2 x half> %vc
 }
@@ -350,6 +766,17 @@ define <vscale x 2 x half> @vfmax_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv2f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
   %vc = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %splat)
@@ -373,6 +800,17 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv4f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb)
   ret <vscale x 4 x half> %vc
 }
@@ -394,6 +832,17 @@ define <vscale x 4 x half> @vfmax_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv4f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
   %vc = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat)
@@ -417,6 +866,17 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv8f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb)
   ret <vscale x 8 x half> %vc
 }
@@ -438,6 +898,17 @@ define <vscale x 8 x half> @vfmax_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv8f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %splat)
@@ -461,6 +932,17 @@ define <vscale x 8 x half> @vfmax_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_fv_nxv8f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %splat, <vscale x 8 x half> %va)
@@ -484,6 +966,17 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv16f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb)
   ret <vscale x 16 x half> %vc
 }
@@ -505,6 +998,17 @@ define <vscale x 16 x half> @vfmax_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv16f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
   %vc = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat)
@@ -549,6 +1053,38 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv32f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 32 x half> @llvm.maximumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb)
   ret <vscale x 32 x half> %vc
 }
@@ -596,6 +1132,43 @@ define <vscale x 32 x half> @vfmax_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv32f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    fmv.x.h a0, fa0
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    addi a1, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v8, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v0, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
   %vc = call <vscale x 32 x half> @llvm.maximumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index 339f97a73ee52..bb0025d85ab1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -11,178 +11,472 @@
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
 
 define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv1bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv1bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmin.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv1bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
 }
 
 define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv2bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    vfmin.vv v9, v8, v9
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT:    vfmin.vv v9, v8, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
 }
 
 define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv4bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT:    vfmin.vv v10, v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT:    vfmin.vv v10, v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
 }
 
 define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv8bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT:    vmfeq.vv v0, v16, v16
-; CHECK-NEXT:    vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT:    vfmin.vv v12, v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT:    vfmin.vv v12, v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
 }
 
 define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv16bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v24, v24
-; CHECK-NEXT:    vmfeq.vv v7, v16, v16
-; CHECK-NEXT:    vmerge.vvm v8, v24, v16, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT:    vfmin.vv v16, v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFH-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFH-NEXT:    vfmin.vv v16, v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFHMIN-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
 }
 
 define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
-; CHECK-LABEL: vfmin_nxv32bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv8r.v v0, v16
-; CHECK-NEXT:    vmv8r.v v24, v8
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v24
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v3, v16, v16
-; CHECK-NEXT:    vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vmv1r.v v0, v3
-; CHECK-NEXT:    vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v4
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v20
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v8, v8
-; CHECK-NEXT:    vmfeq.vv v7, v24, v24
-; CHECK-NEXT:    vmerge.vvm v16, v8, v24, v0
-; CHECK-NEXT:    vmv1r.v v0, v7
-; CHECK-NEXT:    vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT:    vfmin.vv v16, v8, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv32bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 4
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vmv8r.v v0, v16
+; ZVFH-NEXT:    vmv8r.v v24, v8
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v0
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFH-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vmv1r.v v0, v3
+; ZVFH-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v4
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v8, v8, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v8, v20
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFH-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFH-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFH-NEXT:    vmv1r.v v0, v7
+; ZVFH-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFH-NEXT:    vfmin.vv v16, v8, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add a0, sp, a0
+; ZVFH-NEXT:    addi a0, a0, 16
+; ZVFH-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v24
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 4
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vmv8r.v v0, v16
+; ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v0
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v24
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFHMIN-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vmv1r.v v0, v3
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v4
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v8, v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v8, v20
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFHMIN-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFHMIN-NEXT:    vmv1r.v v0, v7
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFHMIN-NEXT:    vfmin.vv v16, v8, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add a0, sp, a0
+; ZVFHMIN-NEXT:    addi a0, a0, 16
+; ZVFHMIN-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v24
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 4
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vmv8r.v v0, v16
+; ZVFBFA-NEXT:    vmv8r.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vmv1r.v v0, v3
+; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v16, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
 }
@@ -214,6 +508,23 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
 }
@@ -245,6 +556,23 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
 }
@@ -276,6 +604,23 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
 }
@@ -307,6 +652,23 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
 }
@@ -337,6 +699,22 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
 }
@@ -415,6 +793,69 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    add sp, sp, a0
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vmv8r.v v0, v16
+; ZVFBFA-NEXT:    vmv8r.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT:    vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vmv1r.v v0, v3
+; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT:    vmv1r.v v0, v7
+; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v16, v8, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add a0, sp, a0
+; ZVFBFA-NEXT:    addi a0, a0, 16
+; ZVFBFA-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 4
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.minimum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
   ret <vscale x 32 x half> %v
 }
@@ -564,6 +1005,17 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscal
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv_nnan:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call nnan <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
 }
@@ -595,6 +1047,24 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv_nnana:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT:    vmv1r.v v9, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v10, v0.t
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %a, %a
   %v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
@@ -627,6 +1097,24 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv_nnanb:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT:    vmv1r.v v8, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %c = fadd nnan <vscale x 1 x half> %b, %b
   %v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
   ret <vscale x 1 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
index a52625d9e8ef4..f2106256c7937 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
@@ -11,33 +11,83 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
 ; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
 
 define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv1bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vv_nxv1bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv1bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv1bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 1 x bfloat> @llvm.minimumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
   ret <vscale x 1 x bfloat> %vc
 }
 
 define <vscale x 1 x bfloat> @vfmin_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv1bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vf_nxv1bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv1bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv1bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
   %vc = call <vscale x 1 x bfloat> @llvm.minimumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmin_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 }
 
 define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv2bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vv_nxv2bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv2bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv2bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vc
 }
 
 define <vscale x 2 x bfloat> @vfmin_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv2bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vf_nxv2bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv2bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv2bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
   %vc = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmin_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 }
 
 define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv4bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vv v10, v12, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vv_nxv4bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v10, v12, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv4bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv4bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
   ret <vscale x 4 x bfloat> %vc
 }
 
 define <vscale x 4 x bfloat> @vfmin_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv4bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vf v10, v10, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vf_nxv4bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv4bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv4bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
   %vc = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmin_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 }
 
 define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv8bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vv v12, v16, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vv_nxv8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vv v12, v16, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv8bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
   ret <vscale x 8 x bfloat> %vc
 }
 
 define <vscale x 8 x bfloat> @vfmin_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv8bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vf v12, v12, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vf_nxv8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv8bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat)
@@ -141,16 +323,38 @@ define <vscale x 8 x bfloat> @vfmin_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 }
 
 define <vscale x 8 x bfloat> @vfmin_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_fv_nxv8bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vf v12, v12, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_fv_nxv8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_fv_nxv8bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_fv_nxv8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va)
@@ -158,31 +362,75 @@ define <vscale x 8 x bfloat> @vfmin_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 }
 
 define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv16bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vv_nxv16bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv16bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv16bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
   ret <vscale x 16 x bfloat> %vc
 }
 
 define <vscale x 16 x bfloat> @vfmin_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv16bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vf v16, v16, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vf_nxv16bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv16bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv16bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
   %vc = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat)
@@ -190,78 +438,213 @@ define <vscale x 16 x bfloat> @vfmin_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 }
 
 define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv32bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v0, v0, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v16, v24
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vv_nxv32bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v0, v0, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v16, v16, v24
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv32bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv32bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 32 x bfloat> @llvm.minimumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
   ret <vscale x 32 x bfloat> %vc
 }
 
 define <vscale x 32 x bfloat> @vfmin_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv32bf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v0, v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_vf_nxv32bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    addi a1, sp, 16
+; ZVFH-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT:    vmv.v.x v8, a0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v0, v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv32bf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv32bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v0, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
   %vc = call <vscale x 32 x bfloat> @llvm.minimumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat)
@@ -285,6 +668,17 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv1f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 1 x half> @llvm.minimumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb)
   ret <vscale x 1 x half> %vc
 }
@@ -306,6 +700,17 @@ define <vscale x 1 x half> @vfmin_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv1f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
   %vc = call <vscale x 1 x half> @llvm.minimumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat)
@@ -329,6 +734,17 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv2f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb)
   ret <vscale x 2 x half> %vc
 }
@@ -350,6 +766,17 @@ define <vscale x 2 x half> @vfmin_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv2f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
   %vc = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %splat)
@@ -373,6 +800,17 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv4f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb)
   ret <vscale x 4 x half> %vc
 }
@@ -394,6 +832,17 @@ define <vscale x 4 x half> @vfmin_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv4f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
   %vc = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat)
@@ -417,6 +866,17 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv8f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb)
   ret <vscale x 8 x half> %vc
 }
@@ -438,6 +898,17 @@ define <vscale x 8 x half> @vfmin_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv8f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %splat)
@@ -461,6 +932,17 @@ define <vscale x 8 x half> @vfmin_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_fv_nxv8f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
   %vc = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> %splat, <vscale x 8 x half> %va)
@@ -484,6 +966,17 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv16f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb)
   ret <vscale x 16 x half> %vc
 }
@@ -505,6 +998,17 @@ define <vscale x 16 x half> @vfmin_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv16f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
   %vc = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat)
@@ -549,6 +1053,38 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv32f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 32 x half> @llvm.minimumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb)
   ret <vscale x 32 x half> %vc
 }
@@ -596,6 +1132,43 @@ define <vscale x 32 x half> @vfmin_vf_nxv32f16(<vscale x 32 x half> %va, half %b
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv32f16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    fmv.x.h a0, fa0
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    addi a1, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v8, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v0, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
   %vc = call <vscale x 32 x half> @llvm.minimumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index 63bfe1dfad5fc..e228f8459f108 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -11,33 +11,83 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
 ; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
 
 define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv1bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv1bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv1bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
 }
 
 define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv1bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv1bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv1bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
 }
 
 define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv2bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
 }
 
 define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv2bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv2bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv2bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
 }
 
 define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv4bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v12, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vv v10, v12, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
 }
 
 define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv4bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vf v10, v10, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv4bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv4bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
 }
 
 define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv8bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v16, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vv v12, v16, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
 }
 
 define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv8bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vf v12, v12, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv8bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv8bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat)
@@ -141,31 +323,75 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
 }
 
 define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv16bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
 }
 
 define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv16bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vf v16, v16, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv16bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv16bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat)
@@ -173,78 +399,213 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
 }
 
 define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv32bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v0, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v16, v24
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv32bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v0, v0, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v16, v16, v24
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
 }
 
 define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv32bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v0, v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmax_nxv32bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    addi a1, sp, 16
+; ZVFH-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT:    vmv.v.x v8, a0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v0, v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmax.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv32bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v0, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat)
@@ -268,6 +629,17 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
 }
@@ -289,6 +661,17 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %splat)
@@ -312,6 +695,17 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
 }
@@ -333,6 +727,17 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
@@ -356,6 +761,17 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
 }
@@ -377,6 +793,17 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
@@ -400,6 +827,17 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
 }
@@ -421,6 +859,17 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
@@ -444,6 +893,17 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
 }
@@ -465,6 +925,17 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %splat)
@@ -509,6 +980,38 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.maxnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
   ret <vscale x 32 x half> %v
 }
@@ -556,6 +1059,43 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vf(<vscale x 32 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    fmv.x.h a0, fa0
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    addi a1, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v8, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v0, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x half> @llvm.maxnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index bb435c9d0114f..b284d87ab97d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -11,33 +11,83 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
 ; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     --check-prefixes=CHECK,ZVFBFA
 
 define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv1bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv1bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv1bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
 }
 
 define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv1bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv1bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv1bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
 }
 
 define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv2bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v9, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv2bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vv v9, v9, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv2bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
 }
 
 define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv2bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vf v9, v9, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv2bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv2bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
 }
 
 define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv4bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vv v10, v12, v10
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv4bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vv v10, v12, v10
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv4bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
 }
 
 define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv4bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vf v10, v10, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv4bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv4bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
 }
 
 define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv8bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vv v12, v16, v12
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv8bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vv v12, v16, v12
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv8bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
 }
 
 define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv8bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vf v12, v12, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv8bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv8bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat)
@@ -141,31 +323,75 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
 }
 
 define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv16bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv16bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv16bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
 }
 
 define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv16bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vf v16, v16, fa5
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv16bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv16bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat)
@@ -173,78 +399,213 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
 }
 
 define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv32bf16_vv:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v0, v0, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v16, v24
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv32bf16_vv:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v0, v0, v8
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v16, v16, v24
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v0, v0, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
 }
 
 define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv32bf16_vf:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v0, v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v24, v16
-; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa sp, 16
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vfmin_nxv32bf16_vf:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi sp, sp, -16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    sub sp, sp, a0
+; ZVFH-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT:    fmv.x.h a0, fa0
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT:    addi a1, sp, 16
+; ZVFH-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT:    vmv.v.x v8, a0
+; ZVFH-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT:    addi a0, sp, 16
+; ZVFH-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v0, v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT:    vfmin.vv v16, v24, v16
+; ZVFH-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT:    csrr a0, vlenb
+; ZVFH-NEXT:    slli a0, a0, 3
+; ZVFH-NEXT:    add sp, sp, a0
+; ZVFH-NEXT:    .cfi_def_cfa sp, 16
+; ZVFH-NEXT:    addi sp, sp, 16
+; ZVFH-NEXT:    .cfi_def_cfa_offset 0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv32bf16_vf:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    sub sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT:    addi a1, sp, 16
+; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT:    vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT:    addi a0, sp, 16
+; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v0, v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT:    vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT:    csrr a0, vlenb
+; ZVFHMIN-NEXT:    slli a0, a0, 3
+; ZVFHMIN-NEXT:    add sp, sp, a0
+; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32bf16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v0, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat)
@@ -268,6 +629,17 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
   ret <vscale x 1 x half> %v
 }
@@ -289,6 +661,17 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
   %v = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %splat)
@@ -312,6 +695,17 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
   ret <vscale x 2 x half> %v
 }
@@ -333,6 +727,17 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
   %v = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
@@ -356,6 +761,17 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
   ret <vscale x 4 x half> %v
 }
@@ -377,6 +793,17 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
   %v = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
@@ -400,6 +827,17 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %v
 }
@@ -421,6 +859,17 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
   %v = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
@@ -444,6 +893,17 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
   ret <vscale x 16 x half> %v
 }
@@ -465,6 +925,17 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
   %v = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %splat)
@@ -509,6 +980,38 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32f16_vv:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x half> @llvm.minnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
   ret <vscale x 32 x half> %v
 }
@@ -556,6 +1059,43 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vf(<vscale x 32 x half> %a, half %b)
 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 ; ZVFHMIN-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32f16_vf:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    addi sp, sp, -16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    sub sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT:    fmv.x.h a0, fa0
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT:    addi a1, sp, 16
+; ZVFBFA-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT:    vmv.v.x v8, a0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT:    addi a0, sp, 16
+; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v0, v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT:    csrr a0, vlenb
+; ZVFBFA-NEXT:    slli a0, a0, 3
+; ZVFBFA-NEXT:    add sp, sp, a0
+; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
   %v = call <vscale x 32 x half> @llvm.minnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %splat)

>From ec23fa25be43a6bbb73afa6be8c77961600a7264 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 11 Dec 2025 01:59:10 -0800
Subject: [PATCH 2/2] [RISCV][llvm] Support fminimum, fmaximum, fminnum,
 fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa

This patch supports for both scalable vector and fixed-length vector.
It also enables fsetcc pattern match for zvfbfa to make fminimum and
fmaximum work correctly.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  18 +--
 .../Target/RISCV/RISCVInstrInfoVSDPatterns.td |   9 +-
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |  23 ++-
 .../RISCV/rvv/fixed-vectors-fmaximum.ll       |  64 +++-----
 .../RISCV/rvv/fixed-vectors-fmaximumnum.ll    |  35 +----
 .../RISCV/rvv/fixed-vectors-fminimum.ll       |  64 +++-----
 .../RISCV/rvv/fixed-vectors-fminimumnum.ll    |  35 +----
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll  |  92 ++----------
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll  |  92 ++----------
 .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 140 ++++--------------
 .../CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll   | 137 ++---------------
 .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 140 ++++--------------
 .../CodeGen/RISCV/rvv/fminimumnum-sdnode.ll   | 137 ++---------------
 llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll   | 130 ++--------------
 llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll   | 130 ++--------------
 15 files changed, 209 insertions(+), 1037 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2c0a02ae396c7..9cd6817c1b7a7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -91,8 +91,10 @@ static cl::opt<bool>
 static const unsigned ZvfbfaVPOps[] = {
     ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
 static const unsigned ZvfbfaOps[] = {
-    ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
-    ISD::FADD, ISD::FSUB, ISD::FMUL};
+    ISD::FNEG,    ISD::FABS,        ISD::FCOPYSIGN,   ISD::SPLAT_VECTOR,
+    ISD::FADD,    ISD::FSUB,        ISD::FMUL,        ISD::FMINNUM,
+    ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUM,
+    ISD::FMAXIMUM};
 
 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                          const RISCVSubtarget &STI)
@@ -1087,11 +1089,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         ISD::VECREDUCE_FMAXIMUM};
 
     // TODO: Make more of these ops legal.
-    static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM,
-                                                ISD::FMAXNUM,
-                                                ISD::FMINIMUMNUM,
-                                                ISD::FMAXIMUMNUM,
-                                                ISD::FDIV,
+    static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV,
                                                 ISD::FMA,
                                                 ISD::FSQRT,
                                                 ISD::FCEIL,
@@ -1103,8 +1101,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                                 ISD::FNEARBYINT,
                                                 ISD::IS_FPCLASS,
                                                 ISD::SETCC,
-                                                ISD::FMAXIMUM,
-                                                ISD::FMINIMUM,
                                                 ISD::STRICT_FADD,
                                                 ISD::STRICT_FSUB,
                                                 ISD::STRICT_FMUL,
@@ -1297,6 +1293,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                           ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE,
                           ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS},
                          VT, Custom);
+      setOperationAction(
+          {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
+          Legal);
+      setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
       setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
       setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 14ad7ca0eb35a..b3cc33d31761d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -201,12 +201,15 @@ class VPatBinarySDNode_VF_RM<SDPatternOperator vop,
 
 multiclass VPatBinaryFPSDNode_VV_VF<SDPatternOperator vop, string instruction_name,
                                     bit isSEWAware = 0> {
-  foreach vti = AllFloatVectors in {
+  foreach vti = AllFloatAndBF16Vectors in {
     let Predicates = GetVTypePredicates<vti>.Predicates in {
-      def : VPatBinarySDNode_VV<vop, instruction_name,
+      def : VPatBinarySDNode_VV<vop, instruction_name #
+                                     !if(!eq(vti.Scalar, bf16), "_ALT", ""),
                                 vti.Vector, vti.Vector, vti.Log2SEW,
                                 vti.LMul, vti.AVL, vti.RegClass, isSEWAware>;
-      def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+      def : VPatBinarySDNode_VF<vop, instruction_name#
+                                     !if(!eq(vti.Scalar, bf16), "_ALT", "")#
+                                     "_V"#vti.ScalarSuffix,
                                 vti.Vector, vti.Vector, vti.Scalar,
                                 vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
                                 vti.ScalarRegClass, isSEWAware>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 9273ce094eb0a..4c41667560a98 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1043,13 +1043,16 @@ class VPatBinaryVL_VF_RM<SDPatternOperator vop,
 
 multiclass VPatBinaryFPVL_VV_VF<SDPatternOperator vop, string instruction_name,
                                 bit isSEWAware = 0> {
-  foreach vti = AllFloatVectors in {
+  foreach vti = AllFloatAndBF16Vectors in {
     let Predicates = GetVTypePredicates<vti>.Predicates in {
-      def : VPatBinaryVL_V<vop, instruction_name, "VV",
+      def : VPatBinaryVL_V<vop, instruction_name#
+                                !if(!eq(vti.Scalar, bf16), "_ALT", ""), "VV",
                            vti.Vector, vti.Vector, vti.Vector, vti.Mask,
                            vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
                            vti.RegClass, isSEWAware>;
-      def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+      def : VPatBinaryVL_VF<vop, instruction_name#
+                                 !if(!eq(vti.Scalar, bf16), "_ALT", "")#"_V"#
+                                 vti.ScalarSuffix,
                             vti.Vector, vti.Vector, vti.Vector, vti.Mask,
                             vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
                             vti.ScalarRegClass, isSEWAware>;
@@ -1199,7 +1202,7 @@ multiclass VPatIntegerSetCCVL_VI_Swappable<VTypeInfo vti, string instruction_nam
 multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
                                   string inst_name,
                                   string swapped_op_inst_name> {
-  foreach fvti = AllFloatVectors in {
+  foreach fvti = AllFloatAndBF16Vectors in {
     let Predicates = GetVTypePredicates<fvti>.Predicates in {
       def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1),
                                  fvti.RegClass:$rs2,
@@ -1207,7 +1210,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
                                  VR:$passthru,
                                  (fvti.Mask VMV0:$vm),
                                  VLOpFrag)),
-                (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
+                (!cast<Instruction>(inst_name#
+                                    !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+                                    "_VV_"#fvti.LMul.MX#"_MASK")
                     VR:$passthru, fvti.RegClass:$rs1,
                     fvti.RegClass:$rs2, (fvti.Mask VMV0:$vm),
                     GPR:$vl, fvti.Log2SEW, TA_MU)>;
@@ -1217,7 +1222,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
                                 VR:$passthru,
                                 (fvti.Mask VMV0:$vm),
                                 VLOpFrag)),
-                (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+                (!cast<Instruction>(inst_name#
+                                    !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+                                    "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
                     VR:$passthru, fvti.RegClass:$rs1,
                     fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm),
                     GPR:$vl, fvti.Log2SEW, TA_MU)>;
@@ -1227,7 +1234,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
                                 VR:$passthru,
                                 (fvti.Mask VMV0:$vm),
                                 VLOpFrag)),
-                (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+                (!cast<Instruction>(swapped_op_inst_name#
+                                    !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+                                    "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
                     VR:$passthru, fvti.RegClass:$rs1,
                     fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm),
                     GPR:$vl, fvti.Log2SEW, TA_MU)>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 299361374f2f4..c3d6b1fac50dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -50,18 +50,11 @@ define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
   ret <2 x bfloat> %v
@@ -105,18 +98,11 @@ define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
   ret <4 x bfloat> %v
@@ -160,18 +146,11 @@ define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
   ret <8 x bfloat> %v
@@ -215,18 +194,11 @@ define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %v = call <16 x bfloat> @llvm.maximum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
   ret <16 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
index 1a5e5af61b8ef..52c26900de5e5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
@@ -377,12 +377,7 @@ define <2 x bfloat> @max_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; ZVFBFA-LABEL: max_v2bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
@@ -563,12 +558,7 @@ define <4 x bfloat> @max_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; ZVFBFA-LABEL: max_v4bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
@@ -869,12 +859,7 @@ define <8 x bfloat> @max_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
 ; ZVFBFA-LABEL: max_v8bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
@@ -1239,12 +1224,7 @@ define <9 x bfloat> @max_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
 ; ZVFBFA-LABEL: max_v9bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <9 x bfloat> @llvm.maximumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
@@ -1725,12 +1705,7 @@ define <16 x bfloat> @max_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
 ; ZVFBFA-LABEL: max_v16bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index bcd86e5237918..9a25beeec8c72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -50,18 +50,11 @@ define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <2 x bfloat> @llvm.minimum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
   ret <2 x bfloat> %v
@@ -105,18 +98,11 @@ define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <4 x bfloat> @llvm.minimum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
   ret <4 x bfloat> %v
@@ -160,18 +146,11 @@ define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
   ret <8 x bfloat> %v
@@ -215,18 +194,11 @@ define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %v = call <16 x bfloat> @llvm.minimum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
   ret <16 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
index eed806da19601..39f557b767410 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
@@ -377,12 +377,7 @@ define <2 x bfloat> @min_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; ZVFBFA-LABEL: min_v2bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
@@ -563,12 +558,7 @@ define <4 x bfloat> @min_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; ZVFBFA-LABEL: min_v4bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
@@ -869,12 +859,7 @@ define <8 x bfloat> @min_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
 ; ZVFBFA-LABEL: min_v8bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
@@ -1239,12 +1224,7 @@ define <9 x bfloat> @min_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
 ; ZVFBFA-LABEL: min_v9bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <9 x bfloat> @llvm.minimumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
@@ -1725,12 +1705,7 @@ define <16 x bfloat> @min_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
 ; ZVFBFA-LABEL: min_v16bf16:
 ; ZVFBFA:       # %bb.0: # %entry
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
 entry:
   %c = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index 179d88cecc7f8..bdf78211988b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -38,12 +38,7 @@ define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
   ret <2 x bfloat> %v
@@ -79,13 +74,7 @@ define <2 x bfloat> @vfmax_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v2bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -123,13 +112,7 @@ define <2 x bfloat> @vfmax_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v2bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -163,12 +146,7 @@ define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
   ret <4 x bfloat> %v
@@ -204,13 +182,7 @@ define <4 x bfloat> @vfmax_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v4bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -248,13 +220,7 @@ define <4 x bfloat> @vfmax_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v4bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v8, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -288,12 +254,7 @@ define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
   ret <8 x bfloat> %v
@@ -329,13 +290,7 @@ define <8 x bfloat> @vfmax_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v8bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -373,13 +328,7 @@ define <8 x bfloat> @vfmax_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v8bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v10, v8, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -413,12 +362,7 @@ define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmax_v16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
   ret <16 x bfloat> %v
@@ -454,13 +398,7 @@ define <16 x bfloat> @vfmax_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v16bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
@@ -498,13 +436,7 @@ define <16 x bfloat> @vfmax_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmax_v16bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v8, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index 2228292d9e66b..3283dd35abf82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -38,12 +38,7 @@ define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
   ret <2 x bfloat> %v
@@ -79,13 +74,7 @@ define <2 x bfloat> @vfmin_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v2bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -123,13 +112,7 @@ define <2 x bfloat> @vfmin_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v2bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -163,12 +146,7 @@ define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
   ret <4 x bfloat> %v
@@ -204,13 +182,7 @@ define <4 x bfloat> @vfmin_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v4bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -248,13 +220,7 @@ define <4 x bfloat> @vfmin_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v4bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v9, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v8, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -288,12 +254,7 @@ define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
   ret <8 x bfloat> %v
@@ -329,13 +290,7 @@ define <8 x bfloat> @vfmin_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v8bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -373,13 +328,7 @@ define <8 x bfloat> @vfmin_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v8bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v12, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v10, v8, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -413,12 +362,7 @@ define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
 ; ZVFBFA-LABEL: vfmin_v16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
   ret <16 x bfloat> %v
@@ -454,13 +398,7 @@ define <16 x bfloat> @vfmin_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v16bf16_vf:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
@@ -498,13 +436,7 @@ define <16 x bfloat> @vfmin_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
 ; ZVFBFA-LABEL: vfmin_v16bf16_fv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v8, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 9a3d27becd644..dcf6dee8f736c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -54,18 +54,11 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -109,18 +102,11 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -164,18 +150,11 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT:    vfmax.vv v10, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -219,18 +198,11 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT:    vfmax.vv v12, v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -272,17 +244,11 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v24, v24
-; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
-; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v16, v0
-; ZVFBFA-NEXT:    vmv1r.v v0, v7
-; ZVFBFA-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -417,65 +383,13 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
 ;
 ; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 4
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vmv8r.v v0, v16
-; ZVFBFA-NEXT:    vmv8r.v v24, v8
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
 ; ZVFBFA-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vmv1r.v v0, v3
-; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v4
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v8, v8, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; ZVFBFA-NEXT:    vmv1r.v v0, v7
-; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFBFA-NEXT:    vfmax.vv v16, v8, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 4
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v24
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
index 20f4a4d939ce1..ed9ce8cefd538 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmax_vv_nxv1bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
   ret <vscale x 1 x bfloat> %vc
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmax_vf_nxv1bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmax_vv_nxv2bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vc
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmax_vf_nxv2bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmax_vv_nxv4bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
   ret <vscale x 4 x bfloat> %vc
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmax_vf_nxv4bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v10, v10, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmax_vv_nxv8bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
   ret <vscale x 8 x bfloat> %vc
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmax_vf_nxv8bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -347,13 +307,8 @@ define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmax_fv_nxv8bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -387,12 +342,7 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; ZVFBFA-LABEL: vfmax_vv_nxv16bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
   ret <vscale x 16 x bfloat> %vc
@@ -423,13 +373,8 @@ define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ;
 ; ZVFBFA-LABEL: vfmax_vf_nxv16bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v16, v16, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -504,34 +449,8 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ;
 ; ZVFBFA-LABEL: vfmax_vv_nxv32bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v0, v0, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v16, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 32 x bfloat> @llvm.maximumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
   ret <vscale x 32 x bfloat> %vc
@@ -614,36 +533,8 @@ define <vscale x 32 x bfloat> @vfmax_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ;
 ; ZVFBFA-LABEL: vfmax_vf_nxv32bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v0, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index bb0025d85ab1d..8473edc0ce003 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -54,18 +54,11 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -109,18 +102,11 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v9, v10, v8, v0
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT:    vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -164,18 +150,11 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT:    vfmin.vv v10, v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -219,18 +198,11 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT:    vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT:    vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT:    vfmin.vv v12, v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -272,17 +244,11 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v24, v24
-; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
-; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v16, v0
-; ZVFBFA-NEXT:    vmv1r.v v0, v7
-; ZVFBFA-NEXT:    vmerge.vvm v16, v16, v24, v0
-; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v12, v0
+; ZVFBFA-NEXT:    vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT:    vmerge.vvm v8, v12, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -417,65 +383,13 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
 ;
 ; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 4
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vmv8r.v v0, v16
-; ZVFBFA-NEXT:    vmv8r.v v24, v8
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v0
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
 ; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT:    vmfeq.vv v7, v16, v16
 ; ZVFBFA-NEXT:    vmerge.vvm v24, v8, v16, v0
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vmv1r.v v0, v3
-; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v4
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v8, v8, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v20
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT:    vmfeq.vv v7, v24, v24
-; ZVFBFA-NEXT:    vmerge.vvm v16, v8, v24, v0
 ; ZVFBFA-NEXT:    vmv1r.v v0, v7
-; ZVFBFA-NEXT:    vmerge.vvm v8, v24, v8, v0
-; ZVFBFA-NEXT:    vfmin.vv v16, v8, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add a0, sp, a0
-; ZVFBFA-NEXT:    addi a0, a0, 16
-; ZVFBFA-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v24
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 4
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    addi sp, sp, 16
+; ZVFBFA-NEXT:    vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v24
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
index f2106256c7937..8360d5787d9b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmin_vv_nxv1bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 1 x bfloat> @llvm.minimumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
   ret <vscale x 1 x bfloat> %vc
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmin_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmin_vf_nxv1bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmin_vv_nxv2bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
   ret <vscale x 2 x bfloat> %vc
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmin_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmin_vf_nxv2bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmin_vv_nxv4bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
   ret <vscale x 4 x bfloat> %vc
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmin_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmin_vf_nxv4bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v10, v10, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 ; ZVFBFA-LABEL: vfmin_vv_nxv8bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
   ret <vscale x 8 x bfloat> %vc
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmin_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmin_vf_nxv8bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -347,13 +307,8 @@ define <vscale x 8 x bfloat> @vfmin_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 ;
 ; ZVFBFA-LABEL: vfmin_fv_nxv8bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -387,12 +342,7 @@ define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 ; ZVFBFA-LABEL: vfmin_vv_nxv16bf16:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
   ret <vscale x 16 x bfloat> %vc
@@ -423,13 +373,8 @@ define <vscale x 16 x bfloat> @vfmin_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
 ;
 ; ZVFBFA-LABEL: vfmin_vf_nxv16bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v16, v16, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -504,34 +449,8 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
 ;
 ; ZVFBFA-LABEL: vfmin_vv_nxv32bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v0, v0, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v16, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFBFA-NEXT:    ret
   %vc = call <vscale x 32 x bfloat> @llvm.minimumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
   ret <vscale x 32 x bfloat> %vc
@@ -614,36 +533,8 @@ define <vscale x 32 x bfloat> @vfmin_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
 ;
 ; ZVFBFA-LABEL: vfmin_vf_nxv32bf16:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v0, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index e228f8459f108..f0ca715f925c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmax_nxv1bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmax_nxv2bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmax_nxv4bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v10, v10, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmax_nxv8bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v12, v12, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -348,12 +308,7 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v24, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -384,13 +339,8 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
 ;
 ; ZVFBFA-LABEL: vfmax_nxv16bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vf v16, v16, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -465,34 +415,8 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
 ;
 ; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v0, v0, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v16, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmax.vv v8, v8, v16
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
@@ -575,36 +499,8 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfl
 ;
 ; ZVFBFA-LABEL: vfmax_nxv32bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmax.vv v16, v0, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vfmax.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index b284d87ab97d4..89d67f710c7a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
   ret <vscale x 1 x bfloat> %v
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmin_nxv1bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
   ret <vscale x 2 x bfloat> %v
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmin_nxv2bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v9
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
   ret <vscale x 4 x bfloat> %v
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmin_nxv4bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v10, v10, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 ; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v10
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
   ret <vscale x 8 x bfloat> %v
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
 ;
 ; ZVFBFA-LABEL: vfmin_nxv8bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v12, v12, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -348,12 +308,7 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 ; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
 ; ZVFBFA:       # %bb.0:
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v24, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v12
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
   ret <vscale x 16 x bfloat> %v
@@ -384,13 +339,8 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
 ;
 ; ZVFBFA-LABEL: vfmin_nxv16bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    fcvt.s.bf16 fa5, fa0
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vf v16, v16, fa5
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -465,34 +415,8 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
 ;
 ; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT:    vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v0, v0, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v16, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT:    vfmin.vv v8, v8, v16
 ; ZVFBFA-NEXT:    ret
   %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
   ret <vscale x 32 x bfloat> %v
@@ -575,36 +499,8 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfl
 ;
 ; ZVFBFA-LABEL: vfmin_nxv32bf16_vf:
 ; ZVFBFA:       # %bb.0:
-; ZVFBFA-NEXT:    addi sp, sp, -16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    sub sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 ; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT:    vfmv.v.f v16, fa0
-; ZVFBFA-NEXT:    vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT:    addi a0, sp, 16
-; ZVFBFA-NEXT:    vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT:    vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT:    vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v16, v8
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT:    vfmin.vv v16, v0, v24
-; ZVFBFA-NEXT:    vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT:    vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT:    csrr a0, vlenb
-; ZVFBFA-NEXT:    slli a0, a0, 3
-; ZVFBFA-NEXT:    add sp, sp, a0
-; ZVFBFA-NEXT:    .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT:    addi sp, sp, 16
-; ZVFBFA-NEXT:    .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT:    vfmin.vf v8, v8, fa0
 ; ZVFBFA-NEXT:    ret
   %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer



More information about the llvm-commits mailing list